xref: /openbmc/linux/fs/btrfs/ctree.c (revision 5963ffca)
1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0
26cbd5570SChris Mason /*
3d352ac68SChris Mason  * Copyright (C) 2007,2008 Oracle.  All rights reserved.
46cbd5570SChris Mason  */
56cbd5570SChris Mason 
6a6b6e75eSChris Mason #include <linux/sched.h>
75a0e3ad6STejun Heo #include <linux/slab.h>
8bd989ba3SJan Schmidt #include <linux/rbtree.h>
9adf02123SDavid Sterba #include <linux/mm.h>
10eb60ceacSChris Mason #include "ctree.h"
11eb60ceacSChris Mason #include "disk-io.h"
127f5c1516SChris Mason #include "transaction.h"
135f39d397SChris Mason #include "print-tree.h"
14925baeddSChris Mason #include "locking.h"
15de37aa51SNikolay Borisov #include "volumes.h"
16f616f5cdSQu Wenruo #include "qgroup.h"
17f3a84ccdSFilipe Manana #include "tree-mod-log.h"
189a8dd150SChris Mason 
19e089f05cSChris Mason static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
20e089f05cSChris Mason 		      *root, struct btrfs_path *path, int level);
21310712b2SOmar Sandoval static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
22310712b2SOmar Sandoval 		      const struct btrfs_key *ins_key, struct btrfs_path *path,
23310712b2SOmar Sandoval 		      int data_size, int extend);
245f39d397SChris Mason static int push_node_left(struct btrfs_trans_handle *trans,
252ff7e61eSJeff Mahoney 			  struct extent_buffer *dst,
26971a1f66SChris Mason 			  struct extent_buffer *src, int empty);
275f39d397SChris Mason static int balance_node_right(struct btrfs_trans_handle *trans,
285f39d397SChris Mason 			      struct extent_buffer *dst_buf,
295f39d397SChris Mason 			      struct extent_buffer *src_buf);
30afe5fea7STsutomu Itoh static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
31afe5fea7STsutomu Itoh 		    int level, int slot);
32d97e63b6SChris Mason 
33af024ed2SJohannes Thumshirn static const struct btrfs_csums {
34af024ed2SJohannes Thumshirn 	u16		size;
3559a0fcdbSDavid Sterba 	const char	name[10];
3659a0fcdbSDavid Sterba 	const char	driver[12];
37af024ed2SJohannes Thumshirn } btrfs_csums[] = {
38af024ed2SJohannes Thumshirn 	[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
393951e7f0SJohannes Thumshirn 	[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
403831bf00SJohannes Thumshirn 	[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
41352ae07bSDavid Sterba 	[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
42352ae07bSDavid Sterba 				     .driver = "blake2b-256" },
43af024ed2SJohannes Thumshirn };
44af024ed2SJohannes Thumshirn 
45af024ed2SJohannes Thumshirn int btrfs_super_csum_size(const struct btrfs_super_block *s)
46af024ed2SJohannes Thumshirn {
47af024ed2SJohannes Thumshirn 	u16 t = btrfs_super_csum_type(s);
48af024ed2SJohannes Thumshirn 	/*
49af024ed2SJohannes Thumshirn 	 * csum type is validated at mount time
50af024ed2SJohannes Thumshirn 	 */
51af024ed2SJohannes Thumshirn 	return btrfs_csums[t].size;
52af024ed2SJohannes Thumshirn }
53af024ed2SJohannes Thumshirn 
54af024ed2SJohannes Thumshirn const char *btrfs_super_csum_name(u16 csum_type)
55af024ed2SJohannes Thumshirn {
56af024ed2SJohannes Thumshirn 	/* csum type is validated at mount time */
57af024ed2SJohannes Thumshirn 	return btrfs_csums[csum_type].name;
58af024ed2SJohannes Thumshirn }
59af024ed2SJohannes Thumshirn 
60b4e967beSDavid Sterba /*
61b4e967beSDavid Sterba  * Return driver name if defined, otherwise the name that's also a valid driver
62b4e967beSDavid Sterba  * name
63b4e967beSDavid Sterba  */
64b4e967beSDavid Sterba const char *btrfs_super_csum_driver(u16 csum_type)
65b4e967beSDavid Sterba {
66b4e967beSDavid Sterba 	/* csum type is validated at mount time */
6759a0fcdbSDavid Sterba 	return btrfs_csums[csum_type].driver[0] ?
6859a0fcdbSDavid Sterba 		btrfs_csums[csum_type].driver :
69b4e967beSDavid Sterba 		btrfs_csums[csum_type].name;
70b4e967beSDavid Sterba }
71b4e967beSDavid Sterba 
72604997b4SDavid Sterba size_t __attribute_const__ btrfs_get_num_csums(void)
73f7cea56cSDavid Sterba {
74f7cea56cSDavid Sterba 	return ARRAY_SIZE(btrfs_csums);
75f7cea56cSDavid Sterba }
76f7cea56cSDavid Sterba 
772c90e5d6SChris Mason struct btrfs_path *btrfs_alloc_path(void)
782c90e5d6SChris Mason {
79e2c89907SMasahiro Yamada 	return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
802c90e5d6SChris Mason }
812c90e5d6SChris Mason 
82d352ac68SChris Mason /* this also releases the path */
832c90e5d6SChris Mason void btrfs_free_path(struct btrfs_path *p)
842c90e5d6SChris Mason {
85ff175d57SJesper Juhl 	if (!p)
86ff175d57SJesper Juhl 		return;
87b3b4aa74SDavid Sterba 	btrfs_release_path(p);
882c90e5d6SChris Mason 	kmem_cache_free(btrfs_path_cachep, p);
892c90e5d6SChris Mason }
902c90e5d6SChris Mason 
91d352ac68SChris Mason /*
92d352ac68SChris Mason  * path release drops references on the extent buffers in the path
93d352ac68SChris Mason  * and it drops any locks held by this path
94d352ac68SChris Mason  *
95d352ac68SChris Mason  * It is safe to call this on paths that no locks or extent buffers held.
96d352ac68SChris Mason  */
97b3b4aa74SDavid Sterba noinline void btrfs_release_path(struct btrfs_path *p)
98eb60ceacSChris Mason {
99eb60ceacSChris Mason 	int i;
100a2135011SChris Mason 
101234b63a0SChris Mason 	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
1023f157a2fSChris Mason 		p->slots[i] = 0;
103eb60ceacSChris Mason 		if (!p->nodes[i])
104925baeddSChris Mason 			continue;
105925baeddSChris Mason 		if (p->locks[i]) {
106bd681513SChris Mason 			btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
107925baeddSChris Mason 			p->locks[i] = 0;
108925baeddSChris Mason 		}
1095f39d397SChris Mason 		free_extent_buffer(p->nodes[i]);
1103f157a2fSChris Mason 		p->nodes[i] = NULL;
111eb60ceacSChris Mason 	}
112eb60ceacSChris Mason }
113eb60ceacSChris Mason 
114d352ac68SChris Mason /*
115d352ac68SChris Mason  * safely gets a reference on the root node of a tree.  A lock
116d352ac68SChris Mason  * is not taken, so a concurrent writer may put a different node
117d352ac68SChris Mason  * at the root of the tree.  See btrfs_lock_root_node for the
118d352ac68SChris Mason  * looping required.
119d352ac68SChris Mason  *
120d352ac68SChris Mason  * The extent buffer returned by this has a reference taken, so
121d352ac68SChris Mason  * it won't disappear.  It may stop being the root of the tree
122d352ac68SChris Mason  * at any time because there are no locks held.
123d352ac68SChris Mason  */
124925baeddSChris Mason struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
125925baeddSChris Mason {
126925baeddSChris Mason 	struct extent_buffer *eb;
127240f62c8SChris Mason 
1283083ee2eSJosef Bacik 	while (1) {
129240f62c8SChris Mason 		rcu_read_lock();
130240f62c8SChris Mason 		eb = rcu_dereference(root->node);
1313083ee2eSJosef Bacik 
1323083ee2eSJosef Bacik 		/*
1333083ee2eSJosef Bacik 		 * RCU really hurts here, we could free up the root node because
13401327610SNicholas D Steeves 		 * it was COWed but we may not get the new root node yet so do
1353083ee2eSJosef Bacik 		 * the inc_not_zero dance and if it doesn't work then
1363083ee2eSJosef Bacik 		 * synchronize_rcu and try again.
1373083ee2eSJosef Bacik 		 */
1383083ee2eSJosef Bacik 		if (atomic_inc_not_zero(&eb->refs)) {
139240f62c8SChris Mason 			rcu_read_unlock();
1403083ee2eSJosef Bacik 			break;
1413083ee2eSJosef Bacik 		}
1423083ee2eSJosef Bacik 		rcu_read_unlock();
1433083ee2eSJosef Bacik 		synchronize_rcu();
1443083ee2eSJosef Bacik 	}
145925baeddSChris Mason 	return eb;
146925baeddSChris Mason }
147925baeddSChris Mason 
14892a7cc42SQu Wenruo /*
14992a7cc42SQu Wenruo  * Cowonly root (not-shareable trees, everything not subvolume or reloc roots),
15092a7cc42SQu Wenruo  * just get put onto a simple dirty list.  Transaction walks this list to make
15192a7cc42SQu Wenruo  * sure they get properly updated on disk.
152d352ac68SChris Mason  */
1530b86a832SChris Mason static void add_root_to_dirty_list(struct btrfs_root *root)
1540b86a832SChris Mason {
1550b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
1560b246afaSJeff Mahoney 
157e7070be1SJosef Bacik 	if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
158e7070be1SJosef Bacik 	    !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
159e7070be1SJosef Bacik 		return;
160e7070be1SJosef Bacik 
1610b246afaSJeff Mahoney 	spin_lock(&fs_info->trans_lock);
162e7070be1SJosef Bacik 	if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
163e7070be1SJosef Bacik 		/* Want the extent tree to be the last on the list */
1644fd786e6SMisono Tomohiro 		if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
165e7070be1SJosef Bacik 			list_move_tail(&root->dirty_list,
1660b246afaSJeff Mahoney 				       &fs_info->dirty_cowonly_roots);
167e7070be1SJosef Bacik 		else
168e7070be1SJosef Bacik 			list_move(&root->dirty_list,
1690b246afaSJeff Mahoney 				  &fs_info->dirty_cowonly_roots);
1700b86a832SChris Mason 	}
1710b246afaSJeff Mahoney 	spin_unlock(&fs_info->trans_lock);
1720b86a832SChris Mason }
1730b86a832SChris Mason 
174d352ac68SChris Mason /*
175d352ac68SChris Mason  * used by snapshot creation to make a copy of a root for a tree with
176d352ac68SChris Mason  * a given objectid.  The buffer with the new root node is returned in
177d352ac68SChris Mason  * cow_ret, and this func returns zero on success or a negative error code.
178d352ac68SChris Mason  */
179be20aa9dSChris Mason int btrfs_copy_root(struct btrfs_trans_handle *trans,
180be20aa9dSChris Mason 		      struct btrfs_root *root,
181be20aa9dSChris Mason 		      struct extent_buffer *buf,
182be20aa9dSChris Mason 		      struct extent_buffer **cow_ret, u64 new_root_objectid)
183be20aa9dSChris Mason {
1840b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
185be20aa9dSChris Mason 	struct extent_buffer *cow;
186be20aa9dSChris Mason 	int ret = 0;
187be20aa9dSChris Mason 	int level;
1885d4f98a2SYan Zheng 	struct btrfs_disk_key disk_key;
189be20aa9dSChris Mason 
19092a7cc42SQu Wenruo 	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
1910b246afaSJeff Mahoney 		trans->transid != fs_info->running_transaction->transid);
19292a7cc42SQu Wenruo 	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
19327cdeb70SMiao Xie 		trans->transid != root->last_trans);
194be20aa9dSChris Mason 
195be20aa9dSChris Mason 	level = btrfs_header_level(buf);
1965d4f98a2SYan Zheng 	if (level == 0)
1975d4f98a2SYan Zheng 		btrfs_item_key(buf, &disk_key, 0);
1985d4f98a2SYan Zheng 	else
1995d4f98a2SYan Zheng 		btrfs_node_key(buf, &disk_key, 0);
20031840ae1SZheng Yan 
2014d75f8a9SDavid Sterba 	cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
202cf6f34aaSJosef Bacik 				     &disk_key, level, buf->start, 0,
203cf6f34aaSJosef Bacik 				     BTRFS_NESTING_NEW_ROOT);
2045d4f98a2SYan Zheng 	if (IS_ERR(cow))
205be20aa9dSChris Mason 		return PTR_ERR(cow);
206be20aa9dSChris Mason 
20758e8012cSDavid Sterba 	copy_extent_buffer_full(cow, buf);
208be20aa9dSChris Mason 	btrfs_set_header_bytenr(cow, cow->start);
209be20aa9dSChris Mason 	btrfs_set_header_generation(cow, trans->transid);
2105d4f98a2SYan Zheng 	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
2115d4f98a2SYan Zheng 	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
2125d4f98a2SYan Zheng 				     BTRFS_HEADER_FLAG_RELOC);
2135d4f98a2SYan Zheng 	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
2145d4f98a2SYan Zheng 		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
2155d4f98a2SYan Zheng 	else
216be20aa9dSChris Mason 		btrfs_set_header_owner(cow, new_root_objectid);
217be20aa9dSChris Mason 
218de37aa51SNikolay Borisov 	write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
2192b82032cSYan Zheng 
220be20aa9dSChris Mason 	WARN_ON(btrfs_header_generation(buf) > trans->transid);
2215d4f98a2SYan Zheng 	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
222e339a6b0SJosef Bacik 		ret = btrfs_inc_ref(trans, root, cow, 1);
2235d4f98a2SYan Zheng 	else
224e339a6b0SJosef Bacik 		ret = btrfs_inc_ref(trans, root, cow, 0);
225867ed321SJosef Bacik 	if (ret) {
22672c9925fSFilipe Manana 		btrfs_tree_unlock(cow);
22772c9925fSFilipe Manana 		free_extent_buffer(cow);
228867ed321SJosef Bacik 		btrfs_abort_transaction(trans, ret);
229be20aa9dSChris Mason 		return ret;
230867ed321SJosef Bacik 	}
231be20aa9dSChris Mason 
232be20aa9dSChris Mason 	btrfs_mark_buffer_dirty(cow);
233be20aa9dSChris Mason 	*cow_ret = cow;
234be20aa9dSChris Mason 	return 0;
235be20aa9dSChris Mason }
236be20aa9dSChris Mason 
237d352ac68SChris Mason /*
2385d4f98a2SYan Zheng  * check if the tree block can be shared by multiple trees
2395d4f98a2SYan Zheng  */
2405d4f98a2SYan Zheng int btrfs_block_can_be_shared(struct btrfs_root *root,
2415d4f98a2SYan Zheng 			      struct extent_buffer *buf)
2425d4f98a2SYan Zheng {
2435d4f98a2SYan Zheng 	/*
24492a7cc42SQu Wenruo 	 * Tree blocks not in shareable trees and tree roots are never shared.
24592a7cc42SQu Wenruo 	 * If a block was allocated after the last snapshot and the block was
24692a7cc42SQu Wenruo 	 * not allocated by tree relocation, we know the block is not shared.
2475d4f98a2SYan Zheng 	 */
24892a7cc42SQu Wenruo 	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
2495d4f98a2SYan Zheng 	    buf != root->node && buf != root->commit_root &&
2505d4f98a2SYan Zheng 	    (btrfs_header_generation(buf) <=
2515d4f98a2SYan Zheng 	     btrfs_root_last_snapshot(&root->root_item) ||
2525d4f98a2SYan Zheng 	     btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
2535d4f98a2SYan Zheng 		return 1;
254a79865c6SNikolay Borisov 
2555d4f98a2SYan Zheng 	return 0;
2565d4f98a2SYan Zheng }
2575d4f98a2SYan Zheng 
2585d4f98a2SYan Zheng static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
2595d4f98a2SYan Zheng 				       struct btrfs_root *root,
2605d4f98a2SYan Zheng 				       struct extent_buffer *buf,
261f0486c68SYan, Zheng 				       struct extent_buffer *cow,
262f0486c68SYan, Zheng 				       int *last_ref)
2635d4f98a2SYan Zheng {
2640b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
2655d4f98a2SYan Zheng 	u64 refs;
2665d4f98a2SYan Zheng 	u64 owner;
2675d4f98a2SYan Zheng 	u64 flags;
2685d4f98a2SYan Zheng 	u64 new_flags = 0;
2695d4f98a2SYan Zheng 	int ret;
2705d4f98a2SYan Zheng 
2715d4f98a2SYan Zheng 	/*
2725d4f98a2SYan Zheng 	 * Backrefs update rules:
2735d4f98a2SYan Zheng 	 *
2745d4f98a2SYan Zheng 	 * Always use full backrefs for extent pointers in tree block
2755d4f98a2SYan Zheng 	 * allocated by tree relocation.
2765d4f98a2SYan Zheng 	 *
2775d4f98a2SYan Zheng 	 * If a shared tree block is no longer referenced by its owner
2785d4f98a2SYan Zheng 	 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
2795d4f98a2SYan Zheng 	 * use full backrefs for extent pointers in tree block.
2805d4f98a2SYan Zheng 	 *
2815d4f98a2SYan Zheng 	 * If a tree block is been relocating
2825d4f98a2SYan Zheng 	 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
2835d4f98a2SYan Zheng 	 * use full backrefs for extent pointers in tree block.
2845d4f98a2SYan Zheng 	 * The reason for this is some operations (such as drop tree)
2855d4f98a2SYan Zheng 	 * are only allowed for blocks use full backrefs.
2865d4f98a2SYan Zheng 	 */
2875d4f98a2SYan Zheng 
2885d4f98a2SYan Zheng 	if (btrfs_block_can_be_shared(root, buf)) {
2892ff7e61eSJeff Mahoney 		ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
2903173a18fSJosef Bacik 					       btrfs_header_level(buf), 1,
2913173a18fSJosef Bacik 					       &refs, &flags);
292be1a5564SMark Fasheh 		if (ret)
293be1a5564SMark Fasheh 			return ret;
294e5df9573SMark Fasheh 		if (refs == 0) {
295e5df9573SMark Fasheh 			ret = -EROFS;
2960b246afaSJeff Mahoney 			btrfs_handle_fs_error(fs_info, ret, NULL);
297e5df9573SMark Fasheh 			return ret;
298e5df9573SMark Fasheh 		}
2995d4f98a2SYan Zheng 	} else {
3005d4f98a2SYan Zheng 		refs = 1;
3015d4f98a2SYan Zheng 		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
3025d4f98a2SYan Zheng 		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
3035d4f98a2SYan Zheng 			flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
3045d4f98a2SYan Zheng 		else
3055d4f98a2SYan Zheng 			flags = 0;
3065d4f98a2SYan Zheng 	}
3075d4f98a2SYan Zheng 
3085d4f98a2SYan Zheng 	owner = btrfs_header_owner(buf);
3095d4f98a2SYan Zheng 	BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
3105d4f98a2SYan Zheng 	       !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
3115d4f98a2SYan Zheng 
3125d4f98a2SYan Zheng 	if (refs > 1) {
3135d4f98a2SYan Zheng 		if ((owner == root->root_key.objectid ||
3145d4f98a2SYan Zheng 		     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
3155d4f98a2SYan Zheng 		    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
316e339a6b0SJosef Bacik 			ret = btrfs_inc_ref(trans, root, buf, 1);
317692826b2SJeff Mahoney 			if (ret)
318692826b2SJeff Mahoney 				return ret;
3195d4f98a2SYan Zheng 
3205d4f98a2SYan Zheng 			if (root->root_key.objectid ==
3215d4f98a2SYan Zheng 			    BTRFS_TREE_RELOC_OBJECTID) {
322e339a6b0SJosef Bacik 				ret = btrfs_dec_ref(trans, root, buf, 0);
323692826b2SJeff Mahoney 				if (ret)
324692826b2SJeff Mahoney 					return ret;
325e339a6b0SJosef Bacik 				ret = btrfs_inc_ref(trans, root, cow, 1);
326692826b2SJeff Mahoney 				if (ret)
327692826b2SJeff Mahoney 					return ret;
3285d4f98a2SYan Zheng 			}
3295d4f98a2SYan Zheng 			new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
3305d4f98a2SYan Zheng 		} else {
3315d4f98a2SYan Zheng 
3325d4f98a2SYan Zheng 			if (root->root_key.objectid ==
3335d4f98a2SYan Zheng 			    BTRFS_TREE_RELOC_OBJECTID)
334e339a6b0SJosef Bacik 				ret = btrfs_inc_ref(trans, root, cow, 1);
3355d4f98a2SYan Zheng 			else
336e339a6b0SJosef Bacik 				ret = btrfs_inc_ref(trans, root, cow, 0);
337692826b2SJeff Mahoney 			if (ret)
338692826b2SJeff Mahoney 				return ret;
3395d4f98a2SYan Zheng 		}
3405d4f98a2SYan Zheng 		if (new_flags != 0) {
341b1c79e09SJosef Bacik 			int level = btrfs_header_level(buf);
342b1c79e09SJosef Bacik 
34342c9d0b5SDavid Sterba 			ret = btrfs_set_disk_extent_flags(trans, buf,
344b1c79e09SJosef Bacik 							  new_flags, level, 0);
345be1a5564SMark Fasheh 			if (ret)
346be1a5564SMark Fasheh 				return ret;
3475d4f98a2SYan Zheng 		}
3485d4f98a2SYan Zheng 	} else {
3495d4f98a2SYan Zheng 		if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
3505d4f98a2SYan Zheng 			if (root->root_key.objectid ==
3515d4f98a2SYan Zheng 			    BTRFS_TREE_RELOC_OBJECTID)
352e339a6b0SJosef Bacik 				ret = btrfs_inc_ref(trans, root, cow, 1);
3535d4f98a2SYan Zheng 			else
354e339a6b0SJosef Bacik 				ret = btrfs_inc_ref(trans, root, cow, 0);
355692826b2SJeff Mahoney 			if (ret)
356692826b2SJeff Mahoney 				return ret;
357e339a6b0SJosef Bacik 			ret = btrfs_dec_ref(trans, root, buf, 1);
358692826b2SJeff Mahoney 			if (ret)
359692826b2SJeff Mahoney 				return ret;
3605d4f98a2SYan Zheng 		}
3616a884d7dSDavid Sterba 		btrfs_clean_tree_block(buf);
362f0486c68SYan, Zheng 		*last_ref = 1;
3635d4f98a2SYan Zheng 	}
3645d4f98a2SYan Zheng 	return 0;
3655d4f98a2SYan Zheng }
3665d4f98a2SYan Zheng 
367a6279470SFilipe Manana static struct extent_buffer *alloc_tree_block_no_bg_flush(
368a6279470SFilipe Manana 					  struct btrfs_trans_handle *trans,
369a6279470SFilipe Manana 					  struct btrfs_root *root,
370a6279470SFilipe Manana 					  u64 parent_start,
371a6279470SFilipe Manana 					  const struct btrfs_disk_key *disk_key,
372a6279470SFilipe Manana 					  int level,
373a6279470SFilipe Manana 					  u64 hint,
3749631e4ccSJosef Bacik 					  u64 empty_size,
3759631e4ccSJosef Bacik 					  enum btrfs_lock_nesting nest)
376a6279470SFilipe Manana {
377a6279470SFilipe Manana 	struct btrfs_fs_info *fs_info = root->fs_info;
378a6279470SFilipe Manana 	struct extent_buffer *ret;
379a6279470SFilipe Manana 
380a6279470SFilipe Manana 	/*
381a6279470SFilipe Manana 	 * If we are COWing a node/leaf from the extent, chunk, device or free
382a6279470SFilipe Manana 	 * space trees, make sure that we do not finish block group creation of
383a6279470SFilipe Manana 	 * pending block groups. We do this to avoid a deadlock.
384a6279470SFilipe Manana 	 * COWing can result in allocation of a new chunk, and flushing pending
385a6279470SFilipe Manana 	 * block groups (btrfs_create_pending_block_groups()) can be triggered
386a6279470SFilipe Manana 	 * when finishing allocation of a new chunk. Creation of a pending block
387a6279470SFilipe Manana 	 * group modifies the extent, chunk, device and free space trees,
388a6279470SFilipe Manana 	 * therefore we could deadlock with ourselves since we are holding a
389a6279470SFilipe Manana 	 * lock on an extent buffer that btrfs_create_pending_block_groups() may
390a6279470SFilipe Manana 	 * try to COW later.
391a6279470SFilipe Manana 	 * For similar reasons, we also need to delay flushing pending block
392a6279470SFilipe Manana 	 * groups when splitting a leaf or node, from one of those trees, since
393a6279470SFilipe Manana 	 * we are holding a write lock on it and its parent or when inserting a
394a6279470SFilipe Manana 	 * new root node for one of those trees.
395a6279470SFilipe Manana 	 */
396a6279470SFilipe Manana 	if (root == fs_info->extent_root ||
397a6279470SFilipe Manana 	    root == fs_info->chunk_root ||
398a6279470SFilipe Manana 	    root == fs_info->dev_root ||
399a6279470SFilipe Manana 	    root == fs_info->free_space_root)
400a6279470SFilipe Manana 		trans->can_flush_pending_bgs = false;
401a6279470SFilipe Manana 
402a6279470SFilipe Manana 	ret = btrfs_alloc_tree_block(trans, root, parent_start,
403a6279470SFilipe Manana 				     root->root_key.objectid, disk_key, level,
4049631e4ccSJosef Bacik 				     hint, empty_size, nest);
405a6279470SFilipe Manana 	trans->can_flush_pending_bgs = true;
406a6279470SFilipe Manana 
407a6279470SFilipe Manana 	return ret;
408a6279470SFilipe Manana }
409a6279470SFilipe Manana 
4105d4f98a2SYan Zheng /*
411d397712bSChris Mason  * does the dirty work in cow of a single block.  The parent block (if
412d397712bSChris Mason  * supplied) is updated to point to the new cow copy.  The new buffer is marked
413d397712bSChris Mason  * dirty and returned locked.  If you modify the block it needs to be marked
414d397712bSChris Mason  * dirty again.
415d352ac68SChris Mason  *
416d352ac68SChris Mason  * search_start -- an allocation hint for the new block
417d352ac68SChris Mason  *
418d397712bSChris Mason  * empty_size -- a hint that you plan on doing more cow.  This is the size in
419d397712bSChris Mason  * bytes the allocator should try to find free next to the block it returns.
420d397712bSChris Mason  * This is just a hint and may be ignored by the allocator.
421d352ac68SChris Mason  */
422d397712bSChris Mason static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
4235f39d397SChris Mason 			     struct btrfs_root *root,
4245f39d397SChris Mason 			     struct extent_buffer *buf,
4255f39d397SChris Mason 			     struct extent_buffer *parent, int parent_slot,
4265f39d397SChris Mason 			     struct extent_buffer **cow_ret,
4279631e4ccSJosef Bacik 			     u64 search_start, u64 empty_size,
4289631e4ccSJosef Bacik 			     enum btrfs_lock_nesting nest)
4296702ed49SChris Mason {
4300b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
4315d4f98a2SYan Zheng 	struct btrfs_disk_key disk_key;
4325f39d397SChris Mason 	struct extent_buffer *cow;
433be1a5564SMark Fasheh 	int level, ret;
434f0486c68SYan, Zheng 	int last_ref = 0;
435925baeddSChris Mason 	int unlock_orig = 0;
4360f5053ebSGoldwyn Rodrigues 	u64 parent_start = 0;
4376702ed49SChris Mason 
438925baeddSChris Mason 	if (*cow_ret == buf)
439925baeddSChris Mason 		unlock_orig = 1;
440925baeddSChris Mason 
441b9447ef8SChris Mason 	btrfs_assert_tree_locked(buf);
442925baeddSChris Mason 
44392a7cc42SQu Wenruo 	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
4440b246afaSJeff Mahoney 		trans->transid != fs_info->running_transaction->transid);
44592a7cc42SQu Wenruo 	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
44627cdeb70SMiao Xie 		trans->transid != root->last_trans);
4475f39d397SChris Mason 
4487bb86316SChris Mason 	level = btrfs_header_level(buf);
44931840ae1SZheng Yan 
4505d4f98a2SYan Zheng 	if (level == 0)
4515d4f98a2SYan Zheng 		btrfs_item_key(buf, &disk_key, 0);
4525d4f98a2SYan Zheng 	else
4535d4f98a2SYan Zheng 		btrfs_node_key(buf, &disk_key, 0);
4545d4f98a2SYan Zheng 
4550f5053ebSGoldwyn Rodrigues 	if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
4565d4f98a2SYan Zheng 		parent_start = parent->start;
4575d4f98a2SYan Zheng 
458a6279470SFilipe Manana 	cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
4599631e4ccSJosef Bacik 					   level, search_start, empty_size, nest);
4606702ed49SChris Mason 	if (IS_ERR(cow))
4616702ed49SChris Mason 		return PTR_ERR(cow);
4626702ed49SChris Mason 
463b4ce94deSChris Mason 	/* cow is set to blocking by btrfs_init_new_buffer */
464b4ce94deSChris Mason 
46558e8012cSDavid Sterba 	copy_extent_buffer_full(cow, buf);
466db94535dSChris Mason 	btrfs_set_header_bytenr(cow, cow->start);
4675f39d397SChris Mason 	btrfs_set_header_generation(cow, trans->transid);
4685d4f98a2SYan Zheng 	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
4695d4f98a2SYan Zheng 	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
4705d4f98a2SYan Zheng 				     BTRFS_HEADER_FLAG_RELOC);
4715d4f98a2SYan Zheng 	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
4725d4f98a2SYan Zheng 		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
4735d4f98a2SYan Zheng 	else
4745f39d397SChris Mason 		btrfs_set_header_owner(cow, root->root_key.objectid);
4756702ed49SChris Mason 
476de37aa51SNikolay Borisov 	write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
4772b82032cSYan Zheng 
478be1a5564SMark Fasheh 	ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
479b68dc2a9SMark Fasheh 	if (ret) {
480572c83acSJosef Bacik 		btrfs_tree_unlock(cow);
481572c83acSJosef Bacik 		free_extent_buffer(cow);
48266642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
483b68dc2a9SMark Fasheh 		return ret;
484b68dc2a9SMark Fasheh 	}
4851a40e23bSZheng Yan 
48692a7cc42SQu Wenruo 	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
48783d4cfd4SJosef Bacik 		ret = btrfs_reloc_cow_block(trans, root, buf, cow);
48893314e3bSZhaolei 		if (ret) {
489572c83acSJosef Bacik 			btrfs_tree_unlock(cow);
490572c83acSJosef Bacik 			free_extent_buffer(cow);
49166642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
49283d4cfd4SJosef Bacik 			return ret;
49383d4cfd4SJosef Bacik 		}
49493314e3bSZhaolei 	}
4953fd0a558SYan, Zheng 
4966702ed49SChris Mason 	if (buf == root->node) {
497925baeddSChris Mason 		WARN_ON(parent && parent != buf);
4985d4f98a2SYan Zheng 		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
4995d4f98a2SYan Zheng 		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
5005d4f98a2SYan Zheng 			parent_start = buf->start;
501925baeddSChris Mason 
50267439dadSDavid Sterba 		atomic_inc(&cow->refs);
503406808abSFilipe Manana 		ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
504d9d19a01SDavid Sterba 		BUG_ON(ret < 0);
505240f62c8SChris Mason 		rcu_assign_pointer(root->node, cow);
506925baeddSChris Mason 
507f0486c68SYan, Zheng 		btrfs_free_tree_block(trans, root, buf, parent_start,
5085581a51aSJan Schmidt 				      last_ref);
5095f39d397SChris Mason 		free_extent_buffer(buf);
5100b86a832SChris Mason 		add_root_to_dirty_list(root);
5116702ed49SChris Mason 	} else {
5125d4f98a2SYan Zheng 		WARN_ON(trans->transid != btrfs_header_generation(parent));
513f3a84ccdSFilipe Manana 		btrfs_tree_mod_log_insert_key(parent, parent_slot,
514f3a84ccdSFilipe Manana 					      BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
5155f39d397SChris Mason 		btrfs_set_node_blockptr(parent, parent_slot,
516db94535dSChris Mason 					cow->start);
51774493f7aSChris Mason 		btrfs_set_node_ptr_generation(parent, parent_slot,
51874493f7aSChris Mason 					      trans->transid);
5196702ed49SChris Mason 		btrfs_mark_buffer_dirty(parent);
5205de865eeSFilipe David Borba Manana 		if (last_ref) {
521f3a84ccdSFilipe Manana 			ret = btrfs_tree_mod_log_free_eb(buf);
5225de865eeSFilipe David Borba Manana 			if (ret) {
523572c83acSJosef Bacik 				btrfs_tree_unlock(cow);
524572c83acSJosef Bacik 				free_extent_buffer(cow);
52566642832SJeff Mahoney 				btrfs_abort_transaction(trans, ret);
5265de865eeSFilipe David Borba Manana 				return ret;
5275de865eeSFilipe David Borba Manana 			}
5285de865eeSFilipe David Borba Manana 		}
529f0486c68SYan, Zheng 		btrfs_free_tree_block(trans, root, buf, parent_start,
5305581a51aSJan Schmidt 				      last_ref);
5316702ed49SChris Mason 	}
532925baeddSChris Mason 	if (unlock_orig)
533925baeddSChris Mason 		btrfs_tree_unlock(buf);
5343083ee2eSJosef Bacik 	free_extent_buffer_stale(buf);
5356702ed49SChris Mason 	btrfs_mark_buffer_dirty(cow);
5366702ed49SChris Mason 	*cow_ret = cow;
5376702ed49SChris Mason 	return 0;
5386702ed49SChris Mason }
5396702ed49SChris Mason 
5405d4f98a2SYan Zheng static inline int should_cow_block(struct btrfs_trans_handle *trans,
5415d4f98a2SYan Zheng 				   struct btrfs_root *root,
5425d4f98a2SYan Zheng 				   struct extent_buffer *buf)
5435d4f98a2SYan Zheng {
544f5ee5c9aSJeff Mahoney 	if (btrfs_is_testing(root->fs_info))
545faa2dbf0SJosef Bacik 		return 0;
546fccb84c9SDavid Sterba 
547d1980131SDavid Sterba 	/* Ensure we can see the FORCE_COW bit */
548d1980131SDavid Sterba 	smp_mb__before_atomic();
549f1ebcc74SLiu Bo 
550f1ebcc74SLiu Bo 	/*
551f1ebcc74SLiu Bo 	 * We do not need to cow a block if
552f1ebcc74SLiu Bo 	 * 1) this block is not created or changed in this transaction;
553f1ebcc74SLiu Bo 	 * 2) this block does not belong to TREE_RELOC tree;
554f1ebcc74SLiu Bo 	 * 3) the root is not forced COW.
555f1ebcc74SLiu Bo 	 *
556f1ebcc74SLiu Bo 	 * What is forced COW:
55701327610SNicholas D Steeves 	 *    when we create snapshot during committing the transaction,
55852042d8eSAndrea Gelmini 	 *    after we've finished copying src root, we must COW the shared
559f1ebcc74SLiu Bo 	 *    block to ensure the metadata consistency.
560f1ebcc74SLiu Bo 	 */
5615d4f98a2SYan Zheng 	if (btrfs_header_generation(buf) == trans->transid &&
5625d4f98a2SYan Zheng 	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
5635d4f98a2SYan Zheng 	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
564f1ebcc74SLiu Bo 	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
56527cdeb70SMiao Xie 	    !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
5665d4f98a2SYan Zheng 		return 0;
5675d4f98a2SYan Zheng 	return 1;
5685d4f98a2SYan Zheng }
5695d4f98a2SYan Zheng 
570d352ac68SChris Mason /*
571d352ac68SChris Mason  * cows a single block, see __btrfs_cow_block for the real work.
57201327610SNicholas D Steeves  * This version of it has extra checks so that a block isn't COWed more than
573d352ac68SChris Mason  * once per transaction, as long as it hasn't been written yet
574d352ac68SChris Mason  */
575d397712bSChris Mason noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
5765f39d397SChris Mason 		    struct btrfs_root *root, struct extent_buffer *buf,
5775f39d397SChris Mason 		    struct extent_buffer *parent, int parent_slot,
5789631e4ccSJosef Bacik 		    struct extent_buffer **cow_ret,
5799631e4ccSJosef Bacik 		    enum btrfs_lock_nesting nest)
58002217ed2SChris Mason {
5810b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
5826702ed49SChris Mason 	u64 search_start;
583f510cfecSChris Mason 	int ret;
584dc17ff8fSChris Mason 
58583354f07SJosef Bacik 	if (test_bit(BTRFS_ROOT_DELETING, &root->state))
58683354f07SJosef Bacik 		btrfs_err(fs_info,
58783354f07SJosef Bacik 			"COW'ing blocks on a fs root that's being dropped");
58883354f07SJosef Bacik 
5890b246afaSJeff Mahoney 	if (trans->transaction != fs_info->running_transaction)
59031b1a2bdSJulia Lawall 		WARN(1, KERN_CRIT "trans %llu running %llu\n",
591c1c9ff7cSGeert Uytterhoeven 		       trans->transid,
5920b246afaSJeff Mahoney 		       fs_info->running_transaction->transid);
59331b1a2bdSJulia Lawall 
5940b246afaSJeff Mahoney 	if (trans->transid != fs_info->generation)
59531b1a2bdSJulia Lawall 		WARN(1, KERN_CRIT "trans %llu running %llu\n",
5960b246afaSJeff Mahoney 		       trans->transid, fs_info->generation);
597dc17ff8fSChris Mason 
5985d4f98a2SYan Zheng 	if (!should_cow_block(trans, root, buf)) {
59902217ed2SChris Mason 		*cow_ret = buf;
60002217ed2SChris Mason 		return 0;
60102217ed2SChris Mason 	}
602c487685dSChris Mason 
603ee22184bSByongho Lee 	search_start = buf->start & ~((u64)SZ_1G - 1);
604b4ce94deSChris Mason 
605f616f5cdSQu Wenruo 	/*
606f616f5cdSQu Wenruo 	 * Before CoWing this block for later modification, check if it's
607f616f5cdSQu Wenruo 	 * the subtree root and do the delayed subtree trace if needed.
608f616f5cdSQu Wenruo 	 *
609f616f5cdSQu Wenruo 	 * Also We don't care about the error, as it's handled internally.
610f616f5cdSQu Wenruo 	 */
611f616f5cdSQu Wenruo 	btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
612f510cfecSChris Mason 	ret = __btrfs_cow_block(trans, root, buf, parent,
6139631e4ccSJosef Bacik 				 parent_slot, cow_ret, search_start, 0, nest);
6141abe9b8aSliubo 
6151abe9b8aSliubo 	trace_btrfs_cow_block(root, buf, *cow_ret);
6161abe9b8aSliubo 
617f510cfecSChris Mason 	return ret;
6182c90e5d6SChris Mason }
619f75e2b79SJosef Bacik ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
6206702ed49SChris Mason 
621d352ac68SChris Mason /*
622d352ac68SChris Mason  * helper function for defrag to decide if two blocks pointed to by a
623d352ac68SChris Mason  * node are actually close by
624d352ac68SChris Mason  */
6256b80053dSChris Mason static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
6266702ed49SChris Mason {
6276b80053dSChris Mason 	if (blocknr < other && other - (blocknr + blocksize) < 32768)
6286702ed49SChris Mason 		return 1;
6296b80053dSChris Mason 	if (blocknr > other && blocknr - (other + blocksize) < 32768)
6306702ed49SChris Mason 		return 1;
63102217ed2SChris Mason 	return 0;
63202217ed2SChris Mason }
63302217ed2SChris Mason 
634ce6ef5abSDavid Sterba #ifdef __LITTLE_ENDIAN
635ce6ef5abSDavid Sterba 
636ce6ef5abSDavid Sterba /*
637ce6ef5abSDavid Sterba  * Compare two keys, on little-endian the disk order is same as CPU order and
638ce6ef5abSDavid Sterba  * we can avoid the conversion.
639ce6ef5abSDavid Sterba  */
640ce6ef5abSDavid Sterba static int comp_keys(const struct btrfs_disk_key *disk_key,
641ce6ef5abSDavid Sterba 		     const struct btrfs_key *k2)
642ce6ef5abSDavid Sterba {
643ce6ef5abSDavid Sterba 	const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
644ce6ef5abSDavid Sterba 
645ce6ef5abSDavid Sterba 	return btrfs_comp_cpu_keys(k1, k2);
646ce6ef5abSDavid Sterba }
647ce6ef5abSDavid Sterba 
648ce6ef5abSDavid Sterba #else
649ce6ef5abSDavid Sterba 
650081e9573SChris Mason /*
651081e9573SChris Mason  * compare two keys in a memcmp fashion
652081e9573SChris Mason  */
653310712b2SOmar Sandoval static int comp_keys(const struct btrfs_disk_key *disk,
654310712b2SOmar Sandoval 		     const struct btrfs_key *k2)
655081e9573SChris Mason {
656081e9573SChris Mason 	struct btrfs_key k1;
657081e9573SChris Mason 
658081e9573SChris Mason 	btrfs_disk_key_to_cpu(&k1, disk);
659081e9573SChris Mason 
66020736abaSDiego Calleja 	return btrfs_comp_cpu_keys(&k1, k2);
661081e9573SChris Mason }
662ce6ef5abSDavid Sterba #endif
663081e9573SChris Mason 
664f3465ca4SJosef Bacik /*
665f3465ca4SJosef Bacik  * same as comp_keys only with two btrfs_key's
666f3465ca4SJosef Bacik  */
667e1f60a65SDavid Sterba int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
668f3465ca4SJosef Bacik {
669f3465ca4SJosef Bacik 	if (k1->objectid > k2->objectid)
670f3465ca4SJosef Bacik 		return 1;
671f3465ca4SJosef Bacik 	if (k1->objectid < k2->objectid)
672f3465ca4SJosef Bacik 		return -1;
673f3465ca4SJosef Bacik 	if (k1->type > k2->type)
674f3465ca4SJosef Bacik 		return 1;
675f3465ca4SJosef Bacik 	if (k1->type < k2->type)
676f3465ca4SJosef Bacik 		return -1;
677f3465ca4SJosef Bacik 	if (k1->offset > k2->offset)
678f3465ca4SJosef Bacik 		return 1;
679f3465ca4SJosef Bacik 	if (k1->offset < k2->offset)
680f3465ca4SJosef Bacik 		return -1;
681f3465ca4SJosef Bacik 	return 0;
682f3465ca4SJosef Bacik }
683081e9573SChris Mason 
684d352ac68SChris Mason /*
685d352ac68SChris Mason  * this is used by the defrag code to go through all the
686d352ac68SChris Mason  * leaves pointed to by a node and reallocate them so that
687d352ac68SChris Mason  * disk order is close to key order
688d352ac68SChris Mason  */
6896702ed49SChris Mason int btrfs_realloc_node(struct btrfs_trans_handle *trans,
6905f39d397SChris Mason 		       struct btrfs_root *root, struct extent_buffer *parent,
691de78b51aSEric Sandeen 		       int start_slot, u64 *last_ret,
692a6b6e75eSChris Mason 		       struct btrfs_key *progress)
6936702ed49SChris Mason {
6940b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
6956b80053dSChris Mason 	struct extent_buffer *cur;
6966702ed49SChris Mason 	u64 blocknr;
697e9d0b13bSChris Mason 	u64 search_start = *last_ret;
698e9d0b13bSChris Mason 	u64 last_block = 0;
6996702ed49SChris Mason 	u64 other;
7006702ed49SChris Mason 	u32 parent_nritems;
7016702ed49SChris Mason 	int end_slot;
7026702ed49SChris Mason 	int i;
7036702ed49SChris Mason 	int err = 0;
7046b80053dSChris Mason 	u32 blocksize;
705081e9573SChris Mason 	int progress_passed = 0;
706081e9573SChris Mason 	struct btrfs_disk_key disk_key;
7076702ed49SChris Mason 
7080b246afaSJeff Mahoney 	WARN_ON(trans->transaction != fs_info->running_transaction);
7090b246afaSJeff Mahoney 	WARN_ON(trans->transid != fs_info->generation);
71086479a04SChris Mason 
7116b80053dSChris Mason 	parent_nritems = btrfs_header_nritems(parent);
7120b246afaSJeff Mahoney 	blocksize = fs_info->nodesize;
7135dfe2be7SFilipe Manana 	end_slot = parent_nritems - 1;
7146702ed49SChris Mason 
7155dfe2be7SFilipe Manana 	if (parent_nritems <= 1)
7166702ed49SChris Mason 		return 0;
7176702ed49SChris Mason 
7185dfe2be7SFilipe Manana 	for (i = start_slot; i <= end_slot; i++) {
7196702ed49SChris Mason 		int close = 1;
720a6b6e75eSChris Mason 
721081e9573SChris Mason 		btrfs_node_key(parent, &disk_key, i);
722081e9573SChris Mason 		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
723081e9573SChris Mason 			continue;
724081e9573SChris Mason 
725081e9573SChris Mason 		progress_passed = 1;
7266b80053dSChris Mason 		blocknr = btrfs_node_blockptr(parent, i);
727e9d0b13bSChris Mason 		if (last_block == 0)
728e9d0b13bSChris Mason 			last_block = blocknr;
7295708b959SChris Mason 
7306702ed49SChris Mason 		if (i > 0) {
7316b80053dSChris Mason 			other = btrfs_node_blockptr(parent, i - 1);
7326b80053dSChris Mason 			close = close_blocks(blocknr, other, blocksize);
7336702ed49SChris Mason 		}
7345dfe2be7SFilipe Manana 		if (!close && i < end_slot) {
7356b80053dSChris Mason 			other = btrfs_node_blockptr(parent, i + 1);
7366b80053dSChris Mason 			close = close_blocks(blocknr, other, blocksize);
7376702ed49SChris Mason 		}
738e9d0b13bSChris Mason 		if (close) {
739e9d0b13bSChris Mason 			last_block = blocknr;
7406702ed49SChris Mason 			continue;
741e9d0b13bSChris Mason 		}
7426702ed49SChris Mason 
743206983b7SJosef Bacik 		cur = btrfs_read_node_slot(parent, i);
744206983b7SJosef Bacik 		if (IS_ERR(cur))
74564c043deSLiu Bo 			return PTR_ERR(cur);
746e9d0b13bSChris Mason 		if (search_start == 0)
7476b80053dSChris Mason 			search_start = last_block;
748e9d0b13bSChris Mason 
749e7a84565SChris Mason 		btrfs_tree_lock(cur);
7506b80053dSChris Mason 		err = __btrfs_cow_block(trans, root, cur, parent, i,
751e7a84565SChris Mason 					&cur, search_start,
7526b80053dSChris Mason 					min(16 * blocksize,
7539631e4ccSJosef Bacik 					    (end_slot - i) * blocksize),
7549631e4ccSJosef Bacik 					BTRFS_NESTING_COW);
755252c38f0SYan 		if (err) {
756e7a84565SChris Mason 			btrfs_tree_unlock(cur);
7576b80053dSChris Mason 			free_extent_buffer(cur);
7586702ed49SChris Mason 			break;
759252c38f0SYan 		}
760e7a84565SChris Mason 		search_start = cur->start;
761e7a84565SChris Mason 		last_block = cur->start;
762f2183bdeSChris Mason 		*last_ret = search_start;
763e7a84565SChris Mason 		btrfs_tree_unlock(cur);
764e7a84565SChris Mason 		free_extent_buffer(cur);
7656702ed49SChris Mason 	}
7666702ed49SChris Mason 	return err;
7676702ed49SChris Mason }
7686702ed49SChris Mason 
76974123bd7SChris Mason /*
7705f39d397SChris Mason  * search for key in the extent_buffer.  The items start at offset p,
7715f39d397SChris Mason  * and they are item_size apart.  There are 'max' items in p.
7725f39d397SChris Mason  *
77374123bd7SChris Mason  * the slot in the array is returned via slot, and it points to
77474123bd7SChris Mason  * the place where you would insert key if it is not found in
77574123bd7SChris Mason  * the array.
77674123bd7SChris Mason  *
77774123bd7SChris Mason  * slot may point to max if the key is bigger than all of the keys
77874123bd7SChris Mason  */
779e02119d5SChris Mason static noinline int generic_bin_search(struct extent_buffer *eb,
780310712b2SOmar Sandoval 				       unsigned long p, int item_size,
781310712b2SOmar Sandoval 				       const struct btrfs_key *key,
782be0e5c09SChris Mason 				       int max, int *slot)
783be0e5c09SChris Mason {
784be0e5c09SChris Mason 	int low = 0;
785be0e5c09SChris Mason 	int high = max;
786be0e5c09SChris Mason 	int ret;
7875cd17f34SDavid Sterba 	const int key_size = sizeof(struct btrfs_disk_key);
788be0e5c09SChris Mason 
7895e24e9afSLiu Bo 	if (low > high) {
7905e24e9afSLiu Bo 		btrfs_err(eb->fs_info,
7915e24e9afSLiu Bo 		 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
7925e24e9afSLiu Bo 			  __func__, low, high, eb->start,
7935e24e9afSLiu Bo 			  btrfs_header_owner(eb), btrfs_header_level(eb));
7945e24e9afSLiu Bo 		return -EINVAL;
7955e24e9afSLiu Bo 	}
7965e24e9afSLiu Bo 
797be0e5c09SChris Mason 	while (low < high) {
7985cd17f34SDavid Sterba 		unsigned long oip;
7995cd17f34SDavid Sterba 		unsigned long offset;
8005cd17f34SDavid Sterba 		struct btrfs_disk_key *tmp;
8015cd17f34SDavid Sterba 		struct btrfs_disk_key unaligned;
8025cd17f34SDavid Sterba 		int mid;
8035cd17f34SDavid Sterba 
804be0e5c09SChris Mason 		mid = (low + high) / 2;
8055f39d397SChris Mason 		offset = p + mid * item_size;
8065cd17f34SDavid Sterba 		oip = offset_in_page(offset);
8075f39d397SChris Mason 
8085cd17f34SDavid Sterba 		if (oip + key_size <= PAGE_SIZE) {
809884b07d0SQu Wenruo 			const unsigned long idx = get_eb_page_index(offset);
8105cd17f34SDavid Sterba 			char *kaddr = page_address(eb->pages[idx]);
811934d375bSChris Mason 
812884b07d0SQu Wenruo 			oip = get_eb_offset_in_page(eb, offset);
8135cd17f34SDavid Sterba 			tmp = (struct btrfs_disk_key *)(kaddr + oip);
8145cd17f34SDavid Sterba 		} else {
8155cd17f34SDavid Sterba 			read_extent_buffer(eb, &unaligned, offset, key_size);
8165f39d397SChris Mason 			tmp = &unaligned;
817479965d6SChris Mason 		}
818479965d6SChris Mason 
819be0e5c09SChris Mason 		ret = comp_keys(tmp, key);
820be0e5c09SChris Mason 
821be0e5c09SChris Mason 		if (ret < 0)
822be0e5c09SChris Mason 			low = mid + 1;
823be0e5c09SChris Mason 		else if (ret > 0)
824be0e5c09SChris Mason 			high = mid;
825be0e5c09SChris Mason 		else {
826be0e5c09SChris Mason 			*slot = mid;
827be0e5c09SChris Mason 			return 0;
828be0e5c09SChris Mason 		}
829be0e5c09SChris Mason 	}
830be0e5c09SChris Mason 	*slot = low;
831be0e5c09SChris Mason 	return 1;
832be0e5c09SChris Mason }
833be0e5c09SChris Mason 
83497571fd0SChris Mason /*
83597571fd0SChris Mason  * simple bin_search frontend that does the right thing for
83697571fd0SChris Mason  * leaves vs nodes
83797571fd0SChris Mason  */
838a74b35ecSNikolay Borisov int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
839e3b83361SQu Wenruo 		     int *slot)
840be0e5c09SChris Mason {
841e3b83361SQu Wenruo 	if (btrfs_header_level(eb) == 0)
8425f39d397SChris Mason 		return generic_bin_search(eb,
8435f39d397SChris Mason 					  offsetof(struct btrfs_leaf, items),
8440783fcfcSChris Mason 					  sizeof(struct btrfs_item),
8455f39d397SChris Mason 					  key, btrfs_header_nritems(eb),
8467518a238SChris Mason 					  slot);
847f775738fSWang Sheng-Hui 	else
8485f39d397SChris Mason 		return generic_bin_search(eb,
8495f39d397SChris Mason 					  offsetof(struct btrfs_node, ptrs),
850123abc88SChris Mason 					  sizeof(struct btrfs_key_ptr),
8515f39d397SChris Mason 					  key, btrfs_header_nritems(eb),
8527518a238SChris Mason 					  slot);
853be0e5c09SChris Mason }
854be0e5c09SChris Mason 
855f0486c68SYan, Zheng static void root_add_used(struct btrfs_root *root, u32 size)
856f0486c68SYan, Zheng {
857f0486c68SYan, Zheng 	spin_lock(&root->accounting_lock);
858f0486c68SYan, Zheng 	btrfs_set_root_used(&root->root_item,
859f0486c68SYan, Zheng 			    btrfs_root_used(&root->root_item) + size);
860f0486c68SYan, Zheng 	spin_unlock(&root->accounting_lock);
861f0486c68SYan, Zheng }
862f0486c68SYan, Zheng 
863f0486c68SYan, Zheng static void root_sub_used(struct btrfs_root *root, u32 size)
864f0486c68SYan, Zheng {
865f0486c68SYan, Zheng 	spin_lock(&root->accounting_lock);
866f0486c68SYan, Zheng 	btrfs_set_root_used(&root->root_item,
867f0486c68SYan, Zheng 			    btrfs_root_used(&root->root_item) - size);
868f0486c68SYan, Zheng 	spin_unlock(&root->accounting_lock);
869f0486c68SYan, Zheng }
870f0486c68SYan, Zheng 
871d352ac68SChris Mason /* given a node and slot number, this reads the blocks it points to.  The
872d352ac68SChris Mason  * extent buffer is returned with a reference taken (but unlocked).
873d352ac68SChris Mason  */
8744b231ae4SDavid Sterba struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
8754b231ae4SDavid Sterba 					   int slot)
876bb803951SChris Mason {
877ca7a79adSChris Mason 	int level = btrfs_header_level(parent);
878416bc658SJosef Bacik 	struct extent_buffer *eb;
879581c1760SQu Wenruo 	struct btrfs_key first_key;
880416bc658SJosef Bacik 
881fb770ae4SLiu Bo 	if (slot < 0 || slot >= btrfs_header_nritems(parent))
882fb770ae4SLiu Bo 		return ERR_PTR(-ENOENT);
883ca7a79adSChris Mason 
884ca7a79adSChris Mason 	BUG_ON(level == 0);
885ca7a79adSChris Mason 
886581c1760SQu Wenruo 	btrfs_node_key_to_cpu(parent, &first_key, slot);
887d0d20b0fSDavid Sterba 	eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
8881b7ec85eSJosef Bacik 			     btrfs_header_owner(parent),
889581c1760SQu Wenruo 			     btrfs_node_ptr_generation(parent, slot),
890581c1760SQu Wenruo 			     level - 1, &first_key);
891fb770ae4SLiu Bo 	if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
892416bc658SJosef Bacik 		free_extent_buffer(eb);
893fb770ae4SLiu Bo 		eb = ERR_PTR(-EIO);
894416bc658SJosef Bacik 	}
895416bc658SJosef Bacik 
896416bc658SJosef Bacik 	return eb;
897bb803951SChris Mason }
898bb803951SChris Mason 
899d352ac68SChris Mason /*
900d352ac68SChris Mason  * node level balancing, used to make sure nodes are in proper order for
901d352ac68SChris Mason  * item deletion.  We balance from the top down, so we have to make sure
902d352ac68SChris Mason  * that a deletion won't leave an node completely empty later on.
903d352ac68SChris Mason  */
904e02119d5SChris Mason static noinline int balance_level(struct btrfs_trans_handle *trans,
90598ed5174SChris Mason 			 struct btrfs_root *root,
90698ed5174SChris Mason 			 struct btrfs_path *path, int level)
907bb803951SChris Mason {
9080b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
9095f39d397SChris Mason 	struct extent_buffer *right = NULL;
9105f39d397SChris Mason 	struct extent_buffer *mid;
9115f39d397SChris Mason 	struct extent_buffer *left = NULL;
9125f39d397SChris Mason 	struct extent_buffer *parent = NULL;
913bb803951SChris Mason 	int ret = 0;
914bb803951SChris Mason 	int wret;
915bb803951SChris Mason 	int pslot;
916bb803951SChris Mason 	int orig_slot = path->slots[level];
91779f95c82SChris Mason 	u64 orig_ptr;
918bb803951SChris Mason 
91998e6b1ebSLiu Bo 	ASSERT(level > 0);
920bb803951SChris Mason 
9215f39d397SChris Mason 	mid = path->nodes[level];
922b4ce94deSChris Mason 
923ac5887c8SJosef Bacik 	WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
9247bb86316SChris Mason 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
9257bb86316SChris Mason 
9261d4f8a0cSChris Mason 	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
92779f95c82SChris Mason 
928a05a9bb1SLi Zefan 	if (level < BTRFS_MAX_LEVEL - 1) {
9295f39d397SChris Mason 		parent = path->nodes[level + 1];
930bb803951SChris Mason 		pslot = path->slots[level + 1];
931a05a9bb1SLi Zefan 	}
932bb803951SChris Mason 
93340689478SChris Mason 	/*
93440689478SChris Mason 	 * deal with the case where there is only one pointer in the root
93540689478SChris Mason 	 * by promoting the node below to a root
93640689478SChris Mason 	 */
9375f39d397SChris Mason 	if (!parent) {
9385f39d397SChris Mason 		struct extent_buffer *child;
939bb803951SChris Mason 
9405f39d397SChris Mason 		if (btrfs_header_nritems(mid) != 1)
941bb803951SChris Mason 			return 0;
942bb803951SChris Mason 
943bb803951SChris Mason 		/* promote the child to a root */
9444b231ae4SDavid Sterba 		child = btrfs_read_node_slot(mid, 0);
945fb770ae4SLiu Bo 		if (IS_ERR(child)) {
946fb770ae4SLiu Bo 			ret = PTR_ERR(child);
9470b246afaSJeff Mahoney 			btrfs_handle_fs_error(fs_info, ret, NULL);
948305a26afSMark Fasheh 			goto enospc;
949305a26afSMark Fasheh 		}
950305a26afSMark Fasheh 
951925baeddSChris Mason 		btrfs_tree_lock(child);
9529631e4ccSJosef Bacik 		ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
9539631e4ccSJosef Bacik 				      BTRFS_NESTING_COW);
954f0486c68SYan, Zheng 		if (ret) {
955f0486c68SYan, Zheng 			btrfs_tree_unlock(child);
956f0486c68SYan, Zheng 			free_extent_buffer(child);
957f0486c68SYan, Zheng 			goto enospc;
958f0486c68SYan, Zheng 		}
9592f375ab9SYan 
960406808abSFilipe Manana 		ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
961d9d19a01SDavid Sterba 		BUG_ON(ret < 0);
962240f62c8SChris Mason 		rcu_assign_pointer(root->node, child);
963925baeddSChris Mason 
9640b86a832SChris Mason 		add_root_to_dirty_list(root);
965925baeddSChris Mason 		btrfs_tree_unlock(child);
966b4ce94deSChris Mason 
967925baeddSChris Mason 		path->locks[level] = 0;
968bb803951SChris Mason 		path->nodes[level] = NULL;
9696a884d7dSDavid Sterba 		btrfs_clean_tree_block(mid);
970925baeddSChris Mason 		btrfs_tree_unlock(mid);
971bb803951SChris Mason 		/* once for the path */
9725f39d397SChris Mason 		free_extent_buffer(mid);
973f0486c68SYan, Zheng 
974f0486c68SYan, Zheng 		root_sub_used(root, mid->len);
9755581a51aSJan Schmidt 		btrfs_free_tree_block(trans, root, mid, 0, 1);
976bb803951SChris Mason 		/* once for the root ptr */
9773083ee2eSJosef Bacik 		free_extent_buffer_stale(mid);
978f0486c68SYan, Zheng 		return 0;
979bb803951SChris Mason 	}
9805f39d397SChris Mason 	if (btrfs_header_nritems(mid) >
9810b246afaSJeff Mahoney 	    BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
982bb803951SChris Mason 		return 0;
983bb803951SChris Mason 
9844b231ae4SDavid Sterba 	left = btrfs_read_node_slot(parent, pslot - 1);
985fb770ae4SLiu Bo 	if (IS_ERR(left))
986fb770ae4SLiu Bo 		left = NULL;
987fb770ae4SLiu Bo 
9885f39d397SChris Mason 	if (left) {
989bf77467aSJosef Bacik 		__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
9905f39d397SChris Mason 		wret = btrfs_cow_block(trans, root, left,
9919631e4ccSJosef Bacik 				       parent, pslot - 1, &left,
992bf59a5a2SJosef Bacik 				       BTRFS_NESTING_LEFT_COW);
99354aa1f4dSChris Mason 		if (wret) {
99454aa1f4dSChris Mason 			ret = wret;
99554aa1f4dSChris Mason 			goto enospc;
99654aa1f4dSChris Mason 		}
9972cc58cf2SChris Mason 	}
998fb770ae4SLiu Bo 
9994b231ae4SDavid Sterba 	right = btrfs_read_node_slot(parent, pslot + 1);
1000fb770ae4SLiu Bo 	if (IS_ERR(right))
1001fb770ae4SLiu Bo 		right = NULL;
1002fb770ae4SLiu Bo 
10035f39d397SChris Mason 	if (right) {
1004bf77467aSJosef Bacik 		__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
10055f39d397SChris Mason 		wret = btrfs_cow_block(trans, root, right,
10069631e4ccSJosef Bacik 				       parent, pslot + 1, &right,
1007bf59a5a2SJosef Bacik 				       BTRFS_NESTING_RIGHT_COW);
10082cc58cf2SChris Mason 		if (wret) {
10092cc58cf2SChris Mason 			ret = wret;
10102cc58cf2SChris Mason 			goto enospc;
10112cc58cf2SChris Mason 		}
10122cc58cf2SChris Mason 	}
10132cc58cf2SChris Mason 
10142cc58cf2SChris Mason 	/* first, try to make some room in the middle buffer */
10155f39d397SChris Mason 	if (left) {
10165f39d397SChris Mason 		orig_slot += btrfs_header_nritems(left);
1017d30a668fSDavid Sterba 		wret = push_node_left(trans, left, mid, 1);
101879f95c82SChris Mason 		if (wret < 0)
101979f95c82SChris Mason 			ret = wret;
1020bb803951SChris Mason 	}
102179f95c82SChris Mason 
102279f95c82SChris Mason 	/*
102379f95c82SChris Mason 	 * then try to empty the right most buffer into the middle
102479f95c82SChris Mason 	 */
10255f39d397SChris Mason 	if (right) {
1026d30a668fSDavid Sterba 		wret = push_node_left(trans, mid, right, 1);
102754aa1f4dSChris Mason 		if (wret < 0 && wret != -ENOSPC)
102879f95c82SChris Mason 			ret = wret;
10295f39d397SChris Mason 		if (btrfs_header_nritems(right) == 0) {
10306a884d7dSDavid Sterba 			btrfs_clean_tree_block(right);
1031925baeddSChris Mason 			btrfs_tree_unlock(right);
1032afe5fea7STsutomu Itoh 			del_ptr(root, path, level + 1, pslot + 1);
1033f0486c68SYan, Zheng 			root_sub_used(root, right->len);
10345581a51aSJan Schmidt 			btrfs_free_tree_block(trans, root, right, 0, 1);
10353083ee2eSJosef Bacik 			free_extent_buffer_stale(right);
1036f0486c68SYan, Zheng 			right = NULL;
1037bb803951SChris Mason 		} else {
10385f39d397SChris Mason 			struct btrfs_disk_key right_key;
10395f39d397SChris Mason 			btrfs_node_key(right, &right_key, 0);
1040f3a84ccdSFilipe Manana 			ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1041f3a84ccdSFilipe Manana 					BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
10420e82bcfeSDavid Sterba 			BUG_ON(ret < 0);
10435f39d397SChris Mason 			btrfs_set_node_key(parent, &right_key, pslot + 1);
10445f39d397SChris Mason 			btrfs_mark_buffer_dirty(parent);
1045bb803951SChris Mason 		}
1046bb803951SChris Mason 	}
10475f39d397SChris Mason 	if (btrfs_header_nritems(mid) == 1) {
104879f95c82SChris Mason 		/*
104979f95c82SChris Mason 		 * we're not allowed to leave a node with one item in the
105079f95c82SChris Mason 		 * tree during a delete.  A deletion from lower in the tree
105179f95c82SChris Mason 		 * could try to delete the only pointer in this node.
105279f95c82SChris Mason 		 * So, pull some keys from the left.
105379f95c82SChris Mason 		 * There has to be a left pointer at this point because
105479f95c82SChris Mason 		 * otherwise we would have pulled some pointers from the
105579f95c82SChris Mason 		 * right
105679f95c82SChris Mason 		 */
1057305a26afSMark Fasheh 		if (!left) {
1058305a26afSMark Fasheh 			ret = -EROFS;
10590b246afaSJeff Mahoney 			btrfs_handle_fs_error(fs_info, ret, NULL);
1060305a26afSMark Fasheh 			goto enospc;
1061305a26afSMark Fasheh 		}
106255d32ed8SDavid Sterba 		wret = balance_node_right(trans, mid, left);
106354aa1f4dSChris Mason 		if (wret < 0) {
106479f95c82SChris Mason 			ret = wret;
106554aa1f4dSChris Mason 			goto enospc;
106654aa1f4dSChris Mason 		}
1067bce4eae9SChris Mason 		if (wret == 1) {
1068d30a668fSDavid Sterba 			wret = push_node_left(trans, left, mid, 1);
1069bce4eae9SChris Mason 			if (wret < 0)
1070bce4eae9SChris Mason 				ret = wret;
1071bce4eae9SChris Mason 		}
107279f95c82SChris Mason 		BUG_ON(wret == 1);
107379f95c82SChris Mason 	}
10745f39d397SChris Mason 	if (btrfs_header_nritems(mid) == 0) {
10756a884d7dSDavid Sterba 		btrfs_clean_tree_block(mid);
1076925baeddSChris Mason 		btrfs_tree_unlock(mid);
1077afe5fea7STsutomu Itoh 		del_ptr(root, path, level + 1, pslot);
1078f0486c68SYan, Zheng 		root_sub_used(root, mid->len);
10795581a51aSJan Schmidt 		btrfs_free_tree_block(trans, root, mid, 0, 1);
10803083ee2eSJosef Bacik 		free_extent_buffer_stale(mid);
1081f0486c68SYan, Zheng 		mid = NULL;
108279f95c82SChris Mason 	} else {
108379f95c82SChris Mason 		/* update the parent key to reflect our changes */
10845f39d397SChris Mason 		struct btrfs_disk_key mid_key;
10855f39d397SChris Mason 		btrfs_node_key(mid, &mid_key, 0);
1086f3a84ccdSFilipe Manana 		ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1087f3a84ccdSFilipe Manana 				BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
10880e82bcfeSDavid Sterba 		BUG_ON(ret < 0);
10895f39d397SChris Mason 		btrfs_set_node_key(parent, &mid_key, pslot);
10905f39d397SChris Mason 		btrfs_mark_buffer_dirty(parent);
109179f95c82SChris Mason 	}
1092bb803951SChris Mason 
109379f95c82SChris Mason 	/* update the path */
10945f39d397SChris Mason 	if (left) {
10955f39d397SChris Mason 		if (btrfs_header_nritems(left) > orig_slot) {
109667439dadSDavid Sterba 			atomic_inc(&left->refs);
1097925baeddSChris Mason 			/* left was locked after cow */
10985f39d397SChris Mason 			path->nodes[level] = left;
1099bb803951SChris Mason 			path->slots[level + 1] -= 1;
1100bb803951SChris Mason 			path->slots[level] = orig_slot;
1101925baeddSChris Mason 			if (mid) {
1102925baeddSChris Mason 				btrfs_tree_unlock(mid);
11035f39d397SChris Mason 				free_extent_buffer(mid);
1104925baeddSChris Mason 			}
1105bb803951SChris Mason 		} else {
11065f39d397SChris Mason 			orig_slot -= btrfs_header_nritems(left);
1107bb803951SChris Mason 			path->slots[level] = orig_slot;
1108bb803951SChris Mason 		}
1109bb803951SChris Mason 	}
111079f95c82SChris Mason 	/* double check we haven't messed things up */
1111e20d96d6SChris Mason 	if (orig_ptr !=
11125f39d397SChris Mason 	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
111379f95c82SChris Mason 		BUG();
111454aa1f4dSChris Mason enospc:
1115925baeddSChris Mason 	if (right) {
1116925baeddSChris Mason 		btrfs_tree_unlock(right);
11175f39d397SChris Mason 		free_extent_buffer(right);
1118925baeddSChris Mason 	}
1119925baeddSChris Mason 	if (left) {
1120925baeddSChris Mason 		if (path->nodes[level] != left)
1121925baeddSChris Mason 			btrfs_tree_unlock(left);
11225f39d397SChris Mason 		free_extent_buffer(left);
1123925baeddSChris Mason 	}
1124bb803951SChris Mason 	return ret;
1125bb803951SChris Mason }
1126bb803951SChris Mason 
1127d352ac68SChris Mason /* Node balancing for insertion.  Here we only split or push nodes around
1128d352ac68SChris Mason  * when they are completely full.  This is also done top down, so we
1129d352ac68SChris Mason  * have to be pessimistic.
1130d352ac68SChris Mason  */
1131d397712bSChris Mason static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1132e66f709bSChris Mason 					  struct btrfs_root *root,
1133e66f709bSChris Mason 					  struct btrfs_path *path, int level)
1134e66f709bSChris Mason {
11350b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
11365f39d397SChris Mason 	struct extent_buffer *right = NULL;
11375f39d397SChris Mason 	struct extent_buffer *mid;
11385f39d397SChris Mason 	struct extent_buffer *left = NULL;
11395f39d397SChris Mason 	struct extent_buffer *parent = NULL;
1140e66f709bSChris Mason 	int ret = 0;
1141e66f709bSChris Mason 	int wret;
1142e66f709bSChris Mason 	int pslot;
1143e66f709bSChris Mason 	int orig_slot = path->slots[level];
1144e66f709bSChris Mason 
1145e66f709bSChris Mason 	if (level == 0)
1146e66f709bSChris Mason 		return 1;
1147e66f709bSChris Mason 
11485f39d397SChris Mason 	mid = path->nodes[level];
11497bb86316SChris Mason 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
1150e66f709bSChris Mason 
1151a05a9bb1SLi Zefan 	if (level < BTRFS_MAX_LEVEL - 1) {
11525f39d397SChris Mason 		parent = path->nodes[level + 1];
1153e66f709bSChris Mason 		pslot = path->slots[level + 1];
1154a05a9bb1SLi Zefan 	}
1155e66f709bSChris Mason 
11565f39d397SChris Mason 	if (!parent)
1157e66f709bSChris Mason 		return 1;
1158e66f709bSChris Mason 
11594b231ae4SDavid Sterba 	left = btrfs_read_node_slot(parent, pslot - 1);
1160fb770ae4SLiu Bo 	if (IS_ERR(left))
1161fb770ae4SLiu Bo 		left = NULL;
1162e66f709bSChris Mason 
1163e66f709bSChris Mason 	/* first, try to make some room in the middle buffer */
11645f39d397SChris Mason 	if (left) {
1165e66f709bSChris Mason 		u32 left_nr;
1166925baeddSChris Mason 
1167bf77467aSJosef Bacik 		__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
1168b4ce94deSChris Mason 
11695f39d397SChris Mason 		left_nr = btrfs_header_nritems(left);
11700b246afaSJeff Mahoney 		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
117133ade1f8SChris Mason 			wret = 1;
117233ade1f8SChris Mason 		} else {
11735f39d397SChris Mason 			ret = btrfs_cow_block(trans, root, left, parent,
11749631e4ccSJosef Bacik 					      pslot - 1, &left,
1175bf59a5a2SJosef Bacik 					      BTRFS_NESTING_LEFT_COW);
117654aa1f4dSChris Mason 			if (ret)
117754aa1f4dSChris Mason 				wret = 1;
117854aa1f4dSChris Mason 			else {
1179d30a668fSDavid Sterba 				wret = push_node_left(trans, left, mid, 0);
118054aa1f4dSChris Mason 			}
118133ade1f8SChris Mason 		}
1182e66f709bSChris Mason 		if (wret < 0)
1183e66f709bSChris Mason 			ret = wret;
1184e66f709bSChris Mason 		if (wret == 0) {
11855f39d397SChris Mason 			struct btrfs_disk_key disk_key;
1186e66f709bSChris Mason 			orig_slot += left_nr;
11875f39d397SChris Mason 			btrfs_node_key(mid, &disk_key, 0);
1188f3a84ccdSFilipe Manana 			ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1189f3a84ccdSFilipe Manana 					BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
11900e82bcfeSDavid Sterba 			BUG_ON(ret < 0);
11915f39d397SChris Mason 			btrfs_set_node_key(parent, &disk_key, pslot);
11925f39d397SChris Mason 			btrfs_mark_buffer_dirty(parent);
11935f39d397SChris Mason 			if (btrfs_header_nritems(left) > orig_slot) {
11945f39d397SChris Mason 				path->nodes[level] = left;
1195e66f709bSChris Mason 				path->slots[level + 1] -= 1;
1196e66f709bSChris Mason 				path->slots[level] = orig_slot;
1197925baeddSChris Mason 				btrfs_tree_unlock(mid);
11985f39d397SChris Mason 				free_extent_buffer(mid);
1199e66f709bSChris Mason 			} else {
1200e66f709bSChris Mason 				orig_slot -=
12015f39d397SChris Mason 					btrfs_header_nritems(left);
1202e66f709bSChris Mason 				path->slots[level] = orig_slot;
1203925baeddSChris Mason 				btrfs_tree_unlock(left);
12045f39d397SChris Mason 				free_extent_buffer(left);
1205e66f709bSChris Mason 			}
1206e66f709bSChris Mason 			return 0;
1207e66f709bSChris Mason 		}
1208925baeddSChris Mason 		btrfs_tree_unlock(left);
12095f39d397SChris Mason 		free_extent_buffer(left);
1210e66f709bSChris Mason 	}
12114b231ae4SDavid Sterba 	right = btrfs_read_node_slot(parent, pslot + 1);
1212fb770ae4SLiu Bo 	if (IS_ERR(right))
1213fb770ae4SLiu Bo 		right = NULL;
1214e66f709bSChris Mason 
1215e66f709bSChris Mason 	/*
1216e66f709bSChris Mason 	 * then try to empty the right most buffer into the middle
1217e66f709bSChris Mason 	 */
12185f39d397SChris Mason 	if (right) {
121933ade1f8SChris Mason 		u32 right_nr;
1220b4ce94deSChris Mason 
1221bf77467aSJosef Bacik 		__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
1222b4ce94deSChris Mason 
12235f39d397SChris Mason 		right_nr = btrfs_header_nritems(right);
12240b246afaSJeff Mahoney 		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
122533ade1f8SChris Mason 			wret = 1;
122633ade1f8SChris Mason 		} else {
12275f39d397SChris Mason 			ret = btrfs_cow_block(trans, root, right,
12285f39d397SChris Mason 					      parent, pslot + 1,
1229bf59a5a2SJosef Bacik 					      &right, BTRFS_NESTING_RIGHT_COW);
123054aa1f4dSChris Mason 			if (ret)
123154aa1f4dSChris Mason 				wret = 1;
123254aa1f4dSChris Mason 			else {
123355d32ed8SDavid Sterba 				wret = balance_node_right(trans, right, mid);
123433ade1f8SChris Mason 			}
123554aa1f4dSChris Mason 		}
1236e66f709bSChris Mason 		if (wret < 0)
1237e66f709bSChris Mason 			ret = wret;
1238e66f709bSChris Mason 		if (wret == 0) {
12395f39d397SChris Mason 			struct btrfs_disk_key disk_key;
12405f39d397SChris Mason 
12415f39d397SChris Mason 			btrfs_node_key(right, &disk_key, 0);
1242f3a84ccdSFilipe Manana 			ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1243f3a84ccdSFilipe Manana 					BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
12440e82bcfeSDavid Sterba 			BUG_ON(ret < 0);
12455f39d397SChris Mason 			btrfs_set_node_key(parent, &disk_key, pslot + 1);
12465f39d397SChris Mason 			btrfs_mark_buffer_dirty(parent);
12475f39d397SChris Mason 
12485f39d397SChris Mason 			if (btrfs_header_nritems(mid) <= orig_slot) {
12495f39d397SChris Mason 				path->nodes[level] = right;
1250e66f709bSChris Mason 				path->slots[level + 1] += 1;
1251e66f709bSChris Mason 				path->slots[level] = orig_slot -
12525f39d397SChris Mason 					btrfs_header_nritems(mid);
1253925baeddSChris Mason 				btrfs_tree_unlock(mid);
12545f39d397SChris Mason 				free_extent_buffer(mid);
1255e66f709bSChris Mason 			} else {
1256925baeddSChris Mason 				btrfs_tree_unlock(right);
12575f39d397SChris Mason 				free_extent_buffer(right);
1258e66f709bSChris Mason 			}
1259e66f709bSChris Mason 			return 0;
1260e66f709bSChris Mason 		}
1261925baeddSChris Mason 		btrfs_tree_unlock(right);
12625f39d397SChris Mason 		free_extent_buffer(right);
1263e66f709bSChris Mason 	}
1264e66f709bSChris Mason 	return 1;
1265e66f709bSChris Mason }
1266e66f709bSChris Mason 
126774123bd7SChris Mason /*
1268d352ac68SChris Mason  * readahead one full node of leaves, finding things that are close
1269d352ac68SChris Mason  * to the block in 'slot', and triggering ra on them.
12703c69faecSChris Mason  */
12712ff7e61eSJeff Mahoney static void reada_for_search(struct btrfs_fs_info *fs_info,
1272e02119d5SChris Mason 			     struct btrfs_path *path,
127301f46658SChris Mason 			     int level, int slot, u64 objectid)
12743c69faecSChris Mason {
12755f39d397SChris Mason 	struct extent_buffer *node;
127601f46658SChris Mason 	struct btrfs_disk_key disk_key;
12773c69faecSChris Mason 	u32 nritems;
12783c69faecSChris Mason 	u64 search;
1279a7175319SChris Mason 	u64 target;
12806b80053dSChris Mason 	u64 nread = 0;
1281ace75066SFilipe Manana 	u64 nread_max;
12825f39d397SChris Mason 	struct extent_buffer *eb;
12836b80053dSChris Mason 	u32 nr;
12846b80053dSChris Mason 	u32 blocksize;
12856b80053dSChris Mason 	u32 nscan = 0;
1286db94535dSChris Mason 
1287ace75066SFilipe Manana 	if (level != 1 && path->reada != READA_FORWARD_ALWAYS)
12883c69faecSChris Mason 		return;
12893c69faecSChris Mason 
12906702ed49SChris Mason 	if (!path->nodes[level])
12916702ed49SChris Mason 		return;
12926702ed49SChris Mason 
12935f39d397SChris Mason 	node = path->nodes[level];
1294925baeddSChris Mason 
1295ace75066SFilipe Manana 	/*
1296ace75066SFilipe Manana 	 * Since the time between visiting leaves is much shorter than the time
1297ace75066SFilipe Manana 	 * between visiting nodes, limit read ahead of nodes to 1, to avoid too
1298ace75066SFilipe Manana 	 * much IO at once (possibly random).
1299ace75066SFilipe Manana 	 */
1300ace75066SFilipe Manana 	if (path->reada == READA_FORWARD_ALWAYS) {
1301ace75066SFilipe Manana 		if (level > 1)
1302ace75066SFilipe Manana 			nread_max = node->fs_info->nodesize;
1303ace75066SFilipe Manana 		else
1304ace75066SFilipe Manana 			nread_max = SZ_128K;
1305ace75066SFilipe Manana 	} else {
1306ace75066SFilipe Manana 		nread_max = SZ_64K;
1307ace75066SFilipe Manana 	}
1308ace75066SFilipe Manana 
13093c69faecSChris Mason 	search = btrfs_node_blockptr(node, slot);
13100b246afaSJeff Mahoney 	blocksize = fs_info->nodesize;
13110b246afaSJeff Mahoney 	eb = find_extent_buffer(fs_info, search);
13125f39d397SChris Mason 	if (eb) {
13135f39d397SChris Mason 		free_extent_buffer(eb);
13143c69faecSChris Mason 		return;
13153c69faecSChris Mason 	}
13163c69faecSChris Mason 
1317a7175319SChris Mason 	target = search;
13186b80053dSChris Mason 
13195f39d397SChris Mason 	nritems = btrfs_header_nritems(node);
13206b80053dSChris Mason 	nr = slot;
132125b8b936SJosef Bacik 
13223c69faecSChris Mason 	while (1) {
1323e4058b54SDavid Sterba 		if (path->reada == READA_BACK) {
13246b80053dSChris Mason 			if (nr == 0)
13253c69faecSChris Mason 				break;
13266b80053dSChris Mason 			nr--;
1327ace75066SFilipe Manana 		} else if (path->reada == READA_FORWARD ||
1328ace75066SFilipe Manana 			   path->reada == READA_FORWARD_ALWAYS) {
13296b80053dSChris Mason 			nr++;
13306b80053dSChris Mason 			if (nr >= nritems)
13316b80053dSChris Mason 				break;
13323c69faecSChris Mason 		}
1333e4058b54SDavid Sterba 		if (path->reada == READA_BACK && objectid) {
133401f46658SChris Mason 			btrfs_node_key(node, &disk_key, nr);
133501f46658SChris Mason 			if (btrfs_disk_key_objectid(&disk_key) != objectid)
133601f46658SChris Mason 				break;
133701f46658SChris Mason 		}
13386b80053dSChris Mason 		search = btrfs_node_blockptr(node, nr);
1339ace75066SFilipe Manana 		if (path->reada == READA_FORWARD_ALWAYS ||
1340ace75066SFilipe Manana 		    (search <= target && target - search <= 65536) ||
1341a7175319SChris Mason 		    (search > target && search - target <= 65536)) {
1342bfb484d9SJosef Bacik 			btrfs_readahead_node_child(node, nr);
13436b80053dSChris Mason 			nread += blocksize;
13443c69faecSChris Mason 		}
13456b80053dSChris Mason 		nscan++;
1346ace75066SFilipe Manana 		if (nread > nread_max || nscan > 32)
13476b80053dSChris Mason 			break;
13483c69faecSChris Mason 	}
13493c69faecSChris Mason }
1350925baeddSChris Mason 
1351bfb484d9SJosef Bacik static noinline void reada_for_balance(struct btrfs_path *path, int level)
1352b4ce94deSChris Mason {
1353bfb484d9SJosef Bacik 	struct extent_buffer *parent;
1354b4ce94deSChris Mason 	int slot;
1355b4ce94deSChris Mason 	int nritems;
1356b4ce94deSChris Mason 
13578c594ea8SChris Mason 	parent = path->nodes[level + 1];
1358b4ce94deSChris Mason 	if (!parent)
13590b08851fSJosef Bacik 		return;
1360b4ce94deSChris Mason 
1361b4ce94deSChris Mason 	nritems = btrfs_header_nritems(parent);
13628c594ea8SChris Mason 	slot = path->slots[level + 1];
1363b4ce94deSChris Mason 
1364bfb484d9SJosef Bacik 	if (slot > 0)
1365bfb484d9SJosef Bacik 		btrfs_readahead_node_child(parent, slot - 1);
1366bfb484d9SJosef Bacik 	if (slot + 1 < nritems)
1367bfb484d9SJosef Bacik 		btrfs_readahead_node_child(parent, slot + 1);
1368b4ce94deSChris Mason }
1369b4ce94deSChris Mason 
1370b4ce94deSChris Mason 
1371b4ce94deSChris Mason /*
1372d397712bSChris Mason  * when we walk down the tree, it is usually safe to unlock the higher layers
1373d397712bSChris Mason  * in the tree.  The exceptions are when our path goes through slot 0, because
1374d397712bSChris Mason  * operations on the tree might require changing key pointers higher up in the
1375d397712bSChris Mason  * tree.
1376d352ac68SChris Mason  *
1377d397712bSChris Mason  * callers might also have set path->keep_locks, which tells this code to keep
1378d397712bSChris Mason  * the lock if the path points to the last slot in the block.  This is part of
1379d397712bSChris Mason  * walking through the tree, and selecting the next slot in the higher block.
1380d352ac68SChris Mason  *
1381d397712bSChris Mason  * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
1382d397712bSChris Mason  * if lowest_unlock is 1, level 0 won't be unlocked
1383d352ac68SChris Mason  */
1384e02119d5SChris Mason static noinline void unlock_up(struct btrfs_path *path, int level,
1385f7c79f30SChris Mason 			       int lowest_unlock, int min_write_lock_level,
1386f7c79f30SChris Mason 			       int *write_lock_level)
1387925baeddSChris Mason {
1388925baeddSChris Mason 	int i;
1389925baeddSChris Mason 	int skip_level = level;
1390051e1b9fSChris Mason 	int no_skips = 0;
1391925baeddSChris Mason 	struct extent_buffer *t;
1392925baeddSChris Mason 
1393925baeddSChris Mason 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1394925baeddSChris Mason 		if (!path->nodes[i])
1395925baeddSChris Mason 			break;
1396925baeddSChris Mason 		if (!path->locks[i])
1397925baeddSChris Mason 			break;
1398051e1b9fSChris Mason 		if (!no_skips && path->slots[i] == 0) {
1399925baeddSChris Mason 			skip_level = i + 1;
1400925baeddSChris Mason 			continue;
1401925baeddSChris Mason 		}
1402051e1b9fSChris Mason 		if (!no_skips && path->keep_locks) {
1403925baeddSChris Mason 			u32 nritems;
1404925baeddSChris Mason 			t = path->nodes[i];
1405925baeddSChris Mason 			nritems = btrfs_header_nritems(t);
1406051e1b9fSChris Mason 			if (nritems < 1 || path->slots[i] >= nritems - 1) {
1407925baeddSChris Mason 				skip_level = i + 1;
1408925baeddSChris Mason 				continue;
1409925baeddSChris Mason 			}
1410925baeddSChris Mason 		}
1411051e1b9fSChris Mason 		if (skip_level < i && i >= lowest_unlock)
1412051e1b9fSChris Mason 			no_skips = 1;
1413051e1b9fSChris Mason 
1414925baeddSChris Mason 		t = path->nodes[i];
1415d80bb3f9SLiu Bo 		if (i >= lowest_unlock && i > skip_level) {
1416bd681513SChris Mason 			btrfs_tree_unlock_rw(t, path->locks[i]);
1417925baeddSChris Mason 			path->locks[i] = 0;
1418f7c79f30SChris Mason 			if (write_lock_level &&
1419f7c79f30SChris Mason 			    i > min_write_lock_level &&
1420f7c79f30SChris Mason 			    i <= *write_lock_level) {
1421f7c79f30SChris Mason 				*write_lock_level = i - 1;
1422f7c79f30SChris Mason 			}
1423925baeddSChris Mason 		}
1424925baeddSChris Mason 	}
1425925baeddSChris Mason }
1426925baeddSChris Mason 
14273c69faecSChris Mason /*
1428c8c42864SChris Mason  * helper function for btrfs_search_slot.  The goal is to find a block
1429c8c42864SChris Mason  * in cache without setting the path to blocking.  If we find the block
1430c8c42864SChris Mason  * we return zero and the path is unchanged.
1431c8c42864SChris Mason  *
1432c8c42864SChris Mason  * If we can't find the block, we set the path blocking and do some
1433c8c42864SChris Mason  * reada.  -EAGAIN is returned and the search must be repeated.
1434c8c42864SChris Mason  */
1435c8c42864SChris Mason static int
1436d07b8528SLiu Bo read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
1437c8c42864SChris Mason 		      struct extent_buffer **eb_ret, int level, int slot,
1438cda79c54SDavid Sterba 		      const struct btrfs_key *key)
1439c8c42864SChris Mason {
14400b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
1441c8c42864SChris Mason 	u64 blocknr;
1442c8c42864SChris Mason 	u64 gen;
1443c8c42864SChris Mason 	struct extent_buffer *tmp;
1444581c1760SQu Wenruo 	struct btrfs_key first_key;
144576a05b35SChris Mason 	int ret;
1446581c1760SQu Wenruo 	int parent_level;
1447c8c42864SChris Mason 
1448213ff4b7SNikolay Borisov 	blocknr = btrfs_node_blockptr(*eb_ret, slot);
1449213ff4b7SNikolay Borisov 	gen = btrfs_node_ptr_generation(*eb_ret, slot);
1450213ff4b7SNikolay Borisov 	parent_level = btrfs_header_level(*eb_ret);
1451213ff4b7SNikolay Borisov 	btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
1452c8c42864SChris Mason 
14530b246afaSJeff Mahoney 	tmp = find_extent_buffer(fs_info, blocknr);
1454cb44921aSChris Mason 	if (tmp) {
1455ace75066SFilipe Manana 		if (p->reada == READA_FORWARD_ALWAYS)
1456ace75066SFilipe Manana 			reada_for_search(fs_info, p, level, slot, key->objectid);
1457ace75066SFilipe Manana 
1458b9fab919SChris Mason 		/* first we do an atomic uptodate check */
1459b9fab919SChris Mason 		if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
1460448de471SQu Wenruo 			/*
1461448de471SQu Wenruo 			 * Do extra check for first_key, eb can be stale due to
1462448de471SQu Wenruo 			 * being cached, read from scrub, or have multiple
1463448de471SQu Wenruo 			 * parents (shared tree blocks).
1464448de471SQu Wenruo 			 */
1465e064d5e9SDavid Sterba 			if (btrfs_verify_level_key(tmp,
1466448de471SQu Wenruo 					parent_level - 1, &first_key, gen)) {
1467448de471SQu Wenruo 				free_extent_buffer(tmp);
1468448de471SQu Wenruo 				return -EUCLEAN;
1469448de471SQu Wenruo 			}
1470c8c42864SChris Mason 			*eb_ret = tmp;
1471c8c42864SChris Mason 			return 0;
1472c8c42864SChris Mason 		}
1473bdf7c00eSJosef Bacik 
1474b9fab919SChris Mason 		/* now we're allowed to do a blocking uptodate check */
1475581c1760SQu Wenruo 		ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
1476bdf7c00eSJosef Bacik 		if (!ret) {
1477cb44921aSChris Mason 			*eb_ret = tmp;
1478cb44921aSChris Mason 			return 0;
1479cb44921aSChris Mason 		}
1480cb44921aSChris Mason 		free_extent_buffer(tmp);
1481b3b4aa74SDavid Sterba 		btrfs_release_path(p);
1482cb44921aSChris Mason 		return -EIO;
1483cb44921aSChris Mason 	}
1484c8c42864SChris Mason 
1485c8c42864SChris Mason 	/*
1486c8c42864SChris Mason 	 * reduce lock contention at high levels
1487c8c42864SChris Mason 	 * of the btree by dropping locks before
148876a05b35SChris Mason 	 * we read.  Don't release the lock on the current
148976a05b35SChris Mason 	 * level because we need to walk this node to figure
149076a05b35SChris Mason 	 * out which blocks to read.
1491c8c42864SChris Mason 	 */
14928c594ea8SChris Mason 	btrfs_unlock_up_safe(p, level + 1);
14938c594ea8SChris Mason 
1494e4058b54SDavid Sterba 	if (p->reada != READA_NONE)
14952ff7e61eSJeff Mahoney 		reada_for_search(fs_info, p, level, slot, key->objectid);
1496c8c42864SChris Mason 
149776a05b35SChris Mason 	ret = -EAGAIN;
14981b7ec85eSJosef Bacik 	tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
14991b7ec85eSJosef Bacik 			      gen, parent_level - 1, &first_key);
150064c043deSLiu Bo 	if (!IS_ERR(tmp)) {
150176a05b35SChris Mason 		/*
150276a05b35SChris Mason 		 * If the read above didn't mark this buffer up to date,
150376a05b35SChris Mason 		 * it will never end up being up to date.  Set ret to EIO now
150476a05b35SChris Mason 		 * and give up so that our caller doesn't loop forever
150576a05b35SChris Mason 		 * on our EAGAINs.
150676a05b35SChris Mason 		 */
1507e6a1d6fdSLiu Bo 		if (!extent_buffer_uptodate(tmp))
150876a05b35SChris Mason 			ret = -EIO;
1509c8c42864SChris Mason 		free_extent_buffer(tmp);
1510c871b0f2SLiu Bo 	} else {
1511c871b0f2SLiu Bo 		ret = PTR_ERR(tmp);
151276a05b35SChris Mason 	}
151302a3307aSLiu Bo 
151402a3307aSLiu Bo 	btrfs_release_path(p);
151576a05b35SChris Mason 	return ret;
1516c8c42864SChris Mason }
1517c8c42864SChris Mason 
1518c8c42864SChris Mason /*
1519c8c42864SChris Mason  * helper function for btrfs_search_slot.  This does all of the checks
1520c8c42864SChris Mason  * for node-level blocks and does any balancing required based on
1521c8c42864SChris Mason  * the ins_len.
1522c8c42864SChris Mason  *
1523c8c42864SChris Mason  * If no extra work was required, zero is returned.  If we had to
1524c8c42864SChris Mason  * drop the path, -EAGAIN is returned and btrfs_search_slot must
1525c8c42864SChris Mason  * start over
1526c8c42864SChris Mason  */
1527c8c42864SChris Mason static int
1528c8c42864SChris Mason setup_nodes_for_search(struct btrfs_trans_handle *trans,
1529c8c42864SChris Mason 		       struct btrfs_root *root, struct btrfs_path *p,
1530bd681513SChris Mason 		       struct extent_buffer *b, int level, int ins_len,
1531bd681513SChris Mason 		       int *write_lock_level)
1532c8c42864SChris Mason {
15330b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
153495b982deSNikolay Borisov 	int ret = 0;
15350b246afaSJeff Mahoney 
1536c8c42864SChris Mason 	if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
15370b246afaSJeff Mahoney 	    BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
1538c8c42864SChris Mason 
1539bd681513SChris Mason 		if (*write_lock_level < level + 1) {
1540bd681513SChris Mason 			*write_lock_level = level + 1;
1541bd681513SChris Mason 			btrfs_release_path(p);
154295b982deSNikolay Borisov 			return -EAGAIN;
1543bd681513SChris Mason 		}
1544bd681513SChris Mason 
1545bfb484d9SJosef Bacik 		reada_for_balance(p, level);
154695b982deSNikolay Borisov 		ret = split_node(trans, root, p, level);
1547c8c42864SChris Mason 
1548c8c42864SChris Mason 		b = p->nodes[level];
1549c8c42864SChris Mason 	} else if (ins_len < 0 && btrfs_header_nritems(b) <
15500b246afaSJeff Mahoney 		   BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
1551c8c42864SChris Mason 
1552bd681513SChris Mason 		if (*write_lock_level < level + 1) {
1553bd681513SChris Mason 			*write_lock_level = level + 1;
1554bd681513SChris Mason 			btrfs_release_path(p);
155595b982deSNikolay Borisov 			return -EAGAIN;
1556bd681513SChris Mason 		}
1557bd681513SChris Mason 
1558bfb484d9SJosef Bacik 		reada_for_balance(p, level);
155995b982deSNikolay Borisov 		ret = balance_level(trans, root, p, level);
156095b982deSNikolay Borisov 		if (ret)
156195b982deSNikolay Borisov 			return ret;
1562c8c42864SChris Mason 
1563c8c42864SChris Mason 		b = p->nodes[level];
1564c8c42864SChris Mason 		if (!b) {
1565b3b4aa74SDavid Sterba 			btrfs_release_path(p);
156695b982deSNikolay Borisov 			return -EAGAIN;
1567c8c42864SChris Mason 		}
1568c8c42864SChris Mason 		BUG_ON(btrfs_header_nritems(b) == 1);
1569c8c42864SChris Mason 	}
1570c8c42864SChris Mason 	return ret;
1571c8c42864SChris Mason }
1572c8c42864SChris Mason 
1573381cf658SDavid Sterba int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
1574e33d5c3dSKelley Nielsen 		u64 iobjectid, u64 ioff, u8 key_type,
1575e33d5c3dSKelley Nielsen 		struct btrfs_key *found_key)
1576e33d5c3dSKelley Nielsen {
1577e33d5c3dSKelley Nielsen 	int ret;
1578e33d5c3dSKelley Nielsen 	struct btrfs_key key;
1579e33d5c3dSKelley Nielsen 	struct extent_buffer *eb;
1580381cf658SDavid Sterba 
1581381cf658SDavid Sterba 	ASSERT(path);
15821d4c08e0SDavid Sterba 	ASSERT(found_key);
1583e33d5c3dSKelley Nielsen 
1584e33d5c3dSKelley Nielsen 	key.type = key_type;
1585e33d5c3dSKelley Nielsen 	key.objectid = iobjectid;
1586e33d5c3dSKelley Nielsen 	key.offset = ioff;
1587e33d5c3dSKelley Nielsen 
1588e33d5c3dSKelley Nielsen 	ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
15891d4c08e0SDavid Sterba 	if (ret < 0)
1590e33d5c3dSKelley Nielsen 		return ret;
1591e33d5c3dSKelley Nielsen 
1592e33d5c3dSKelley Nielsen 	eb = path->nodes[0];
1593e33d5c3dSKelley Nielsen 	if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1594e33d5c3dSKelley Nielsen 		ret = btrfs_next_leaf(fs_root, path);
1595e33d5c3dSKelley Nielsen 		if (ret)
1596e33d5c3dSKelley Nielsen 			return ret;
1597e33d5c3dSKelley Nielsen 		eb = path->nodes[0];
1598e33d5c3dSKelley Nielsen 	}
1599e33d5c3dSKelley Nielsen 
1600e33d5c3dSKelley Nielsen 	btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1601e33d5c3dSKelley Nielsen 	if (found_key->type != key.type ||
1602e33d5c3dSKelley Nielsen 			found_key->objectid != key.objectid)
1603e33d5c3dSKelley Nielsen 		return 1;
1604e33d5c3dSKelley Nielsen 
1605e33d5c3dSKelley Nielsen 	return 0;
1606e33d5c3dSKelley Nielsen }
1607e33d5c3dSKelley Nielsen 
16081fc28d8eSLiu Bo static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
16091fc28d8eSLiu Bo 							struct btrfs_path *p,
16101fc28d8eSLiu Bo 							int write_lock_level)
16111fc28d8eSLiu Bo {
16121fc28d8eSLiu Bo 	struct btrfs_fs_info *fs_info = root->fs_info;
16131fc28d8eSLiu Bo 	struct extent_buffer *b;
16141fc28d8eSLiu Bo 	int root_lock;
16151fc28d8eSLiu Bo 	int level = 0;
16161fc28d8eSLiu Bo 
16171fc28d8eSLiu Bo 	/* We try very hard to do read locks on the root */
16181fc28d8eSLiu Bo 	root_lock = BTRFS_READ_LOCK;
16191fc28d8eSLiu Bo 
16201fc28d8eSLiu Bo 	if (p->search_commit_root) {
1621be6821f8SFilipe Manana 		/*
1622be6821f8SFilipe Manana 		 * The commit roots are read only so we always do read locks,
1623be6821f8SFilipe Manana 		 * and we always must hold the commit_root_sem when doing
1624be6821f8SFilipe Manana 		 * searches on them, the only exception is send where we don't
1625be6821f8SFilipe Manana 		 * want to block transaction commits for a long time, so
1626be6821f8SFilipe Manana 		 * we need to clone the commit root in order to avoid races
1627be6821f8SFilipe Manana 		 * with transaction commits that create a snapshot of one of
1628be6821f8SFilipe Manana 		 * the roots used by a send operation.
1629be6821f8SFilipe Manana 		 */
1630be6821f8SFilipe Manana 		if (p->need_commit_sem) {
16311fc28d8eSLiu Bo 			down_read(&fs_info->commit_root_sem);
1632be6821f8SFilipe Manana 			b = btrfs_clone_extent_buffer(root->commit_root);
1633be6821f8SFilipe Manana 			up_read(&fs_info->commit_root_sem);
1634be6821f8SFilipe Manana 			if (!b)
1635be6821f8SFilipe Manana 				return ERR_PTR(-ENOMEM);
1636be6821f8SFilipe Manana 
1637be6821f8SFilipe Manana 		} else {
16381fc28d8eSLiu Bo 			b = root->commit_root;
163967439dadSDavid Sterba 			atomic_inc(&b->refs);
1640be6821f8SFilipe Manana 		}
16411fc28d8eSLiu Bo 		level = btrfs_header_level(b);
1642f9ddfd05SLiu Bo 		/*
1643f9ddfd05SLiu Bo 		 * Ensure that all callers have set skip_locking when
1644f9ddfd05SLiu Bo 		 * p->search_commit_root = 1.
1645f9ddfd05SLiu Bo 		 */
1646f9ddfd05SLiu Bo 		ASSERT(p->skip_locking == 1);
16471fc28d8eSLiu Bo 
16481fc28d8eSLiu Bo 		goto out;
16491fc28d8eSLiu Bo 	}
16501fc28d8eSLiu Bo 
16511fc28d8eSLiu Bo 	if (p->skip_locking) {
16521fc28d8eSLiu Bo 		b = btrfs_root_node(root);
16531fc28d8eSLiu Bo 		level = btrfs_header_level(b);
16541fc28d8eSLiu Bo 		goto out;
16551fc28d8eSLiu Bo 	}
16561fc28d8eSLiu Bo 
16571fc28d8eSLiu Bo 	/*
1658662c653bSLiu Bo 	 * If the level is set to maximum, we can skip trying to get the read
1659662c653bSLiu Bo 	 * lock.
1660662c653bSLiu Bo 	 */
1661662c653bSLiu Bo 	if (write_lock_level < BTRFS_MAX_LEVEL) {
1662662c653bSLiu Bo 		/*
1663662c653bSLiu Bo 		 * We don't know the level of the root node until we actually
1664662c653bSLiu Bo 		 * have it read locked
16651fc28d8eSLiu Bo 		 */
16661bb96598SJosef Bacik 		b = btrfs_read_lock_root_node(root);
16671fc28d8eSLiu Bo 		level = btrfs_header_level(b);
16681fc28d8eSLiu Bo 		if (level > write_lock_level)
16691fc28d8eSLiu Bo 			goto out;
16701fc28d8eSLiu Bo 
1671662c653bSLiu Bo 		/* Whoops, must trade for write lock */
16721fc28d8eSLiu Bo 		btrfs_tree_read_unlock(b);
16731fc28d8eSLiu Bo 		free_extent_buffer(b);
1674662c653bSLiu Bo 	}
1675662c653bSLiu Bo 
16761fc28d8eSLiu Bo 	b = btrfs_lock_root_node(root);
16771fc28d8eSLiu Bo 	root_lock = BTRFS_WRITE_LOCK;
16781fc28d8eSLiu Bo 
16791fc28d8eSLiu Bo 	/* The level might have changed, check again */
16801fc28d8eSLiu Bo 	level = btrfs_header_level(b);
16811fc28d8eSLiu Bo 
16821fc28d8eSLiu Bo out:
16831fc28d8eSLiu Bo 	p->nodes[level] = b;
16841fc28d8eSLiu Bo 	if (!p->skip_locking)
16851fc28d8eSLiu Bo 		p->locks[level] = root_lock;
16861fc28d8eSLiu Bo 	/*
16871fc28d8eSLiu Bo 	 * Callers are responsible for dropping b's references.
16881fc28d8eSLiu Bo 	 */
16891fc28d8eSLiu Bo 	return b;
16901fc28d8eSLiu Bo }
16911fc28d8eSLiu Bo 
16921fc28d8eSLiu Bo 
1693c8c42864SChris Mason /*
16944271eceaSNikolay Borisov  * btrfs_search_slot - look for a key in a tree and perform necessary
16954271eceaSNikolay Borisov  * modifications to preserve tree invariants.
169674123bd7SChris Mason  *
16974271eceaSNikolay Borisov  * @trans:	Handle of transaction, used when modifying the tree
16984271eceaSNikolay Borisov  * @p:		Holds all btree nodes along the search path
16994271eceaSNikolay Borisov  * @root:	The root node of the tree
17004271eceaSNikolay Borisov  * @key:	The key we are looking for
17019a664971Sethanwu  * @ins_len:	Indicates purpose of search:
17029a664971Sethanwu  *              >0  for inserts it's size of item inserted (*)
17039a664971Sethanwu  *              <0  for deletions
17049a664971Sethanwu  *               0  for plain searches, not modifying the tree
17059a664971Sethanwu  *
17069a664971Sethanwu  *              (*) If size of item inserted doesn't include
17079a664971Sethanwu  *              sizeof(struct btrfs_item), then p->search_for_extension must
17089a664971Sethanwu  *              be set.
17094271eceaSNikolay Borisov  * @cow:	boolean should CoW operations be performed. Must always be 1
17104271eceaSNikolay Borisov  *		when modifying the tree.
171197571fd0SChris Mason  *
17124271eceaSNikolay Borisov  * If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
17134271eceaSNikolay Borisov  * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
17144271eceaSNikolay Borisov  *
17154271eceaSNikolay Borisov  * If @key is found, 0 is returned and you can find the item in the leaf level
17164271eceaSNikolay Borisov  * of the path (level 0)
17174271eceaSNikolay Borisov  *
17184271eceaSNikolay Borisov  * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
17194271eceaSNikolay Borisov  * points to the slot where it should be inserted
17204271eceaSNikolay Borisov  *
17214271eceaSNikolay Borisov  * If an error is encountered while searching the tree a negative error number
17224271eceaSNikolay Borisov  * is returned
172374123bd7SChris Mason  */
1724310712b2SOmar Sandoval int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1725310712b2SOmar Sandoval 		      const struct btrfs_key *key, struct btrfs_path *p,
1726310712b2SOmar Sandoval 		      int ins_len, int cow)
1727be0e5c09SChris Mason {
17285f39d397SChris Mason 	struct extent_buffer *b;
1729be0e5c09SChris Mason 	int slot;
1730be0e5c09SChris Mason 	int ret;
173133c66f43SYan Zheng 	int err;
1732be0e5c09SChris Mason 	int level;
1733925baeddSChris Mason 	int lowest_unlock = 1;
1734bd681513SChris Mason 	/* everything at write_lock_level or lower must be write locked */
1735bd681513SChris Mason 	int write_lock_level = 0;
17369f3a7427SChris Mason 	u8 lowest_level = 0;
1737f7c79f30SChris Mason 	int min_write_lock_level;
1738d7396f07SFilipe David Borba Manana 	int prev_cmp;
17399f3a7427SChris Mason 
17406702ed49SChris Mason 	lowest_level = p->lowest_level;
1741323ac95bSChris Mason 	WARN_ON(lowest_level && ins_len > 0);
174222b0ebdaSChris Mason 	WARN_ON(p->nodes[0] != NULL);
1743eb653de1SFilipe David Borba Manana 	BUG_ON(!cow && ins_len);
174425179201SJosef Bacik 
1745bd681513SChris Mason 	if (ins_len < 0) {
1746925baeddSChris Mason 		lowest_unlock = 2;
174765b51a00SChris Mason 
1748bd681513SChris Mason 		/* when we are removing items, we might have to go up to level
1749bd681513SChris Mason 		 * two as we update tree pointers  Make sure we keep write
1750bd681513SChris Mason 		 * for those levels as well
1751bd681513SChris Mason 		 */
1752bd681513SChris Mason 		write_lock_level = 2;
1753bd681513SChris Mason 	} else if (ins_len > 0) {
1754bd681513SChris Mason 		/*
1755bd681513SChris Mason 		 * for inserting items, make sure we have a write lock on
1756bd681513SChris Mason 		 * level 1 so we can update keys
1757bd681513SChris Mason 		 */
1758bd681513SChris Mason 		write_lock_level = 1;
1759bd681513SChris Mason 	}
1760bd681513SChris Mason 
1761bd681513SChris Mason 	if (!cow)
1762bd681513SChris Mason 		write_lock_level = -1;
1763bd681513SChris Mason 
176409a2a8f9SJosef Bacik 	if (cow && (p->keep_locks || p->lowest_level))
1765bd681513SChris Mason 		write_lock_level = BTRFS_MAX_LEVEL;
1766bd681513SChris Mason 
1767f7c79f30SChris Mason 	min_write_lock_level = write_lock_level;
1768f7c79f30SChris Mason 
1769bb803951SChris Mason again:
1770d7396f07SFilipe David Borba Manana 	prev_cmp = -1;
17711fc28d8eSLiu Bo 	b = btrfs_search_slot_get_root(root, p, write_lock_level);
1772be6821f8SFilipe Manana 	if (IS_ERR(b)) {
1773be6821f8SFilipe Manana 		ret = PTR_ERR(b);
1774be6821f8SFilipe Manana 		goto done;
1775be6821f8SFilipe Manana 	}
1776925baeddSChris Mason 
1777eb60ceacSChris Mason 	while (b) {
1778f624d976SQu Wenruo 		int dec = 0;
1779f624d976SQu Wenruo 
17805f39d397SChris Mason 		level = btrfs_header_level(b);
178165b51a00SChris Mason 
178202217ed2SChris Mason 		if (cow) {
17839ea2c7c9SNikolay Borisov 			bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
17849ea2c7c9SNikolay Borisov 
1785c8c42864SChris Mason 			/*
1786c8c42864SChris Mason 			 * if we don't really need to cow this block
1787c8c42864SChris Mason 			 * then we don't want to set the path blocking,
1788c8c42864SChris Mason 			 * so we test it here
1789c8c42864SChris Mason 			 */
1790*5963ffcaSJosef Bacik 			if (!should_cow_block(trans, root, b))
179165b51a00SChris Mason 				goto cow_done;
17925d4f98a2SYan Zheng 
1793bd681513SChris Mason 			/*
1794bd681513SChris Mason 			 * must have write locks on this node and the
1795bd681513SChris Mason 			 * parent
1796bd681513SChris Mason 			 */
17975124e00eSJosef Bacik 			if (level > write_lock_level ||
17985124e00eSJosef Bacik 			    (level + 1 > write_lock_level &&
17995124e00eSJosef Bacik 			    level + 1 < BTRFS_MAX_LEVEL &&
18005124e00eSJosef Bacik 			    p->nodes[level + 1])) {
1801bd681513SChris Mason 				write_lock_level = level + 1;
1802bd681513SChris Mason 				btrfs_release_path(p);
1803bd681513SChris Mason 				goto again;
1804bd681513SChris Mason 			}
1805bd681513SChris Mason 
18069ea2c7c9SNikolay Borisov 			if (last_level)
18079ea2c7c9SNikolay Borisov 				err = btrfs_cow_block(trans, root, b, NULL, 0,
18089631e4ccSJosef Bacik 						      &b,
18099631e4ccSJosef Bacik 						      BTRFS_NESTING_COW);
18109ea2c7c9SNikolay Borisov 			else
181133c66f43SYan Zheng 				err = btrfs_cow_block(trans, root, b,
1812e20d96d6SChris Mason 						      p->nodes[level + 1],
18139631e4ccSJosef Bacik 						      p->slots[level + 1], &b,
18149631e4ccSJosef Bacik 						      BTRFS_NESTING_COW);
181533c66f43SYan Zheng 			if (err) {
181633c66f43SYan Zheng 				ret = err;
181765b51a00SChris Mason 				goto done;
181854aa1f4dSChris Mason 			}
181902217ed2SChris Mason 		}
182065b51a00SChris Mason cow_done:
1821eb60ceacSChris Mason 		p->nodes[level] = b;
182252398340SLiu Bo 		/*
182352398340SLiu Bo 		 * Leave path with blocking locks to avoid massive
182452398340SLiu Bo 		 * lock context switch, this is made on purpose.
182552398340SLiu Bo 		 */
1826b4ce94deSChris Mason 
1827b4ce94deSChris Mason 		/*
1828b4ce94deSChris Mason 		 * we have a lock on b and as long as we aren't changing
1829b4ce94deSChris Mason 		 * the tree, there is no way to for the items in b to change.
1830b4ce94deSChris Mason 		 * It is safe to drop the lock on our parent before we
1831b4ce94deSChris Mason 		 * go through the expensive btree search on b.
1832b4ce94deSChris Mason 		 *
1833eb653de1SFilipe David Borba Manana 		 * If we're inserting or deleting (ins_len != 0), then we might
1834eb653de1SFilipe David Borba Manana 		 * be changing slot zero, which may require changing the parent.
1835eb653de1SFilipe David Borba Manana 		 * So, we can't drop the lock until after we know which slot
1836eb653de1SFilipe David Borba Manana 		 * we're operating on.
1837b4ce94deSChris Mason 		 */
1838eb653de1SFilipe David Borba Manana 		if (!ins_len && !p->keep_locks) {
1839eb653de1SFilipe David Borba Manana 			int u = level + 1;
1840eb653de1SFilipe David Borba Manana 
1841eb653de1SFilipe David Borba Manana 			if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
1842eb653de1SFilipe David Borba Manana 				btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
1843eb653de1SFilipe David Borba Manana 				p->locks[u] = 0;
1844eb653de1SFilipe David Borba Manana 			}
1845eb653de1SFilipe David Borba Manana 		}
1846b4ce94deSChris Mason 
1847995e9a16SNikolay Borisov 		/*
1848995e9a16SNikolay Borisov 		 * If btrfs_bin_search returns an exact match (prev_cmp == 0)
1849995e9a16SNikolay Borisov 		 * we can safely assume the target key will always be in slot 0
1850995e9a16SNikolay Borisov 		 * on lower levels due to the invariants BTRFS' btree provides,
1851995e9a16SNikolay Borisov 		 * namely that a btrfs_key_ptr entry always points to the
1852995e9a16SNikolay Borisov 		 * lowest key in the child node, thus we can skip searching
1853995e9a16SNikolay Borisov 		 * lower levels
1854995e9a16SNikolay Borisov 		 */
1855995e9a16SNikolay Borisov 		if (prev_cmp == 0) {
1856995e9a16SNikolay Borisov 			slot = 0;
1857995e9a16SNikolay Borisov 			ret = 0;
1858995e9a16SNikolay Borisov 		} else {
1859995e9a16SNikolay Borisov 			ret = btrfs_bin_search(b, key, &slot);
1860995e9a16SNikolay Borisov 			prev_cmp = ret;
1861415b35a5SLiu Bo 			if (ret < 0)
1862415b35a5SLiu Bo 				goto done;
1863995e9a16SNikolay Borisov 		}
1864b4ce94deSChris Mason 
1865f624d976SQu Wenruo 		if (level == 0) {
1866be0e5c09SChris Mason 			p->slots[level] = slot;
18679a664971Sethanwu 			/*
18689a664971Sethanwu 			 * Item key already exists. In this case, if we are
18699a664971Sethanwu 			 * allowed to insert the item (for example, in dir_item
18709a664971Sethanwu 			 * case, item key collision is allowed), it will be
18719a664971Sethanwu 			 * merged with the original item. Only the item size
18729a664971Sethanwu 			 * grows, no new btrfs item will be added. If
18739a664971Sethanwu 			 * search_for_extension is not set, ins_len already
18749a664971Sethanwu 			 * accounts the size btrfs_item, deduct it here so leaf
18759a664971Sethanwu 			 * space check will be correct.
18769a664971Sethanwu 			 */
18779a664971Sethanwu 			if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
18789a664971Sethanwu 				ASSERT(ins_len >= sizeof(struct btrfs_item));
18799a664971Sethanwu 				ins_len -= sizeof(struct btrfs_item);
18809a664971Sethanwu 			}
188187b29b20SYan Zheng 			if (ins_len > 0 &&
1882e902baacSDavid Sterba 			    btrfs_leaf_free_space(b) < ins_len) {
1883bd681513SChris Mason 				if (write_lock_level < 1) {
1884bd681513SChris Mason 					write_lock_level = 1;
1885bd681513SChris Mason 					btrfs_release_path(p);
1886bd681513SChris Mason 					goto again;
1887bd681513SChris Mason 				}
1888bd681513SChris Mason 
188933c66f43SYan Zheng 				err = split_leaf(trans, root, key,
1890cc0c5538SChris Mason 						 p, ins_len, ret == 0);
1891b4ce94deSChris Mason 
189233c66f43SYan Zheng 				BUG_ON(err > 0);
189333c66f43SYan Zheng 				if (err) {
189433c66f43SYan Zheng 					ret = err;
189565b51a00SChris Mason 					goto done;
189665b51a00SChris Mason 				}
18975c680ed6SChris Mason 			}
1898459931ecSChris Mason 			if (!p->search_for_split)
1899f7c79f30SChris Mason 				unlock_up(p, level, lowest_unlock,
19004b6f8e96SLiu Bo 					  min_write_lock_level, NULL);
190165b51a00SChris Mason 			goto done;
190265b51a00SChris Mason 		}
1903f624d976SQu Wenruo 		if (ret && slot > 0) {
1904f624d976SQu Wenruo 			dec = 1;
1905f624d976SQu Wenruo 			slot--;
1906f624d976SQu Wenruo 		}
1907f624d976SQu Wenruo 		p->slots[level] = slot;
1908f624d976SQu Wenruo 		err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
1909f624d976SQu Wenruo 					     &write_lock_level);
1910f624d976SQu Wenruo 		if (err == -EAGAIN)
1911f624d976SQu Wenruo 			goto again;
1912f624d976SQu Wenruo 		if (err) {
1913f624d976SQu Wenruo 			ret = err;
1914f624d976SQu Wenruo 			goto done;
1915f624d976SQu Wenruo 		}
1916f624d976SQu Wenruo 		b = p->nodes[level];
1917f624d976SQu Wenruo 		slot = p->slots[level];
1918f624d976SQu Wenruo 
1919f624d976SQu Wenruo 		/*
1920f624d976SQu Wenruo 		 * Slot 0 is special, if we change the key we have to update
1921f624d976SQu Wenruo 		 * the parent pointer which means we must have a write lock on
1922f624d976SQu Wenruo 		 * the parent
1923f624d976SQu Wenruo 		 */
1924f624d976SQu Wenruo 		if (slot == 0 && ins_len && write_lock_level < level + 1) {
1925f624d976SQu Wenruo 			write_lock_level = level + 1;
1926f624d976SQu Wenruo 			btrfs_release_path(p);
1927f624d976SQu Wenruo 			goto again;
1928f624d976SQu Wenruo 		}
1929f624d976SQu Wenruo 
1930f624d976SQu Wenruo 		unlock_up(p, level, lowest_unlock, min_write_lock_level,
1931f624d976SQu Wenruo 			  &write_lock_level);
1932f624d976SQu Wenruo 
1933f624d976SQu Wenruo 		if (level == lowest_level) {
1934f624d976SQu Wenruo 			if (dec)
1935f624d976SQu Wenruo 				p->slots[level]++;
1936f624d976SQu Wenruo 			goto done;
1937f624d976SQu Wenruo 		}
1938f624d976SQu Wenruo 
1939f624d976SQu Wenruo 		err = read_block_for_search(root, p, &b, level, slot, key);
1940f624d976SQu Wenruo 		if (err == -EAGAIN)
1941f624d976SQu Wenruo 			goto again;
1942f624d976SQu Wenruo 		if (err) {
1943f624d976SQu Wenruo 			ret = err;
1944f624d976SQu Wenruo 			goto done;
1945f624d976SQu Wenruo 		}
1946f624d976SQu Wenruo 
1947f624d976SQu Wenruo 		if (!p->skip_locking) {
1948f624d976SQu Wenruo 			level = btrfs_header_level(b);
1949f624d976SQu Wenruo 			if (level <= write_lock_level) {
1950f624d976SQu Wenruo 				btrfs_tree_lock(b);
1951f624d976SQu Wenruo 				p->locks[level] = BTRFS_WRITE_LOCK;
1952f624d976SQu Wenruo 			} else {
1953fe596ca3SJosef Bacik 				btrfs_tree_read_lock(b);
1954f624d976SQu Wenruo 				p->locks[level] = BTRFS_READ_LOCK;
1955f624d976SQu Wenruo 			}
1956f624d976SQu Wenruo 			p->nodes[level] = b;
1957f624d976SQu Wenruo 		}
195865b51a00SChris Mason 	}
195965b51a00SChris Mason 	ret = 1;
196065b51a00SChris Mason done:
19615f5bc6b1SFilipe Manana 	if (ret < 0 && !p->skip_release_on_error)
1962b3b4aa74SDavid Sterba 		btrfs_release_path(p);
1963be0e5c09SChris Mason 	return ret;
1964be0e5c09SChris Mason }
1965f75e2b79SJosef Bacik ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
1966be0e5c09SChris Mason 
196774123bd7SChris Mason /*
19685d9e75c4SJan Schmidt  * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
19695d9e75c4SJan Schmidt  * current state of the tree together with the operations recorded in the tree
19705d9e75c4SJan Schmidt  * modification log to search for the key in a previous version of this tree, as
19715d9e75c4SJan Schmidt  * denoted by the time_seq parameter.
19725d9e75c4SJan Schmidt  *
19735d9e75c4SJan Schmidt  * Naturally, there is no support for insert, delete or cow operations.
19745d9e75c4SJan Schmidt  *
19755d9e75c4SJan Schmidt  * The resulting path and return value will be set up as if we called
19765d9e75c4SJan Schmidt  * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
19775d9e75c4SJan Schmidt  */
1978310712b2SOmar Sandoval int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
19795d9e75c4SJan Schmidt 			  struct btrfs_path *p, u64 time_seq)
19805d9e75c4SJan Schmidt {
19810b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
19825d9e75c4SJan Schmidt 	struct extent_buffer *b;
19835d9e75c4SJan Schmidt 	int slot;
19845d9e75c4SJan Schmidt 	int ret;
19855d9e75c4SJan Schmidt 	int err;
19865d9e75c4SJan Schmidt 	int level;
19875d9e75c4SJan Schmidt 	int lowest_unlock = 1;
19885d9e75c4SJan Schmidt 	u8 lowest_level = 0;
19895d9e75c4SJan Schmidt 
19905d9e75c4SJan Schmidt 	lowest_level = p->lowest_level;
19915d9e75c4SJan Schmidt 	WARN_ON(p->nodes[0] != NULL);
19925d9e75c4SJan Schmidt 
19935d9e75c4SJan Schmidt 	if (p->search_commit_root) {
19945d9e75c4SJan Schmidt 		BUG_ON(time_seq);
19955d9e75c4SJan Schmidt 		return btrfs_search_slot(NULL, root, key, p, 0, 0);
19965d9e75c4SJan Schmidt 	}
19975d9e75c4SJan Schmidt 
19985d9e75c4SJan Schmidt again:
1999f3a84ccdSFilipe Manana 	b = btrfs_get_old_root(root, time_seq);
2000315bed43SNikolay Borisov 	if (!b) {
2001315bed43SNikolay Borisov 		ret = -EIO;
2002315bed43SNikolay Borisov 		goto done;
2003315bed43SNikolay Borisov 	}
20045d9e75c4SJan Schmidt 	level = btrfs_header_level(b);
20055d9e75c4SJan Schmidt 	p->locks[level] = BTRFS_READ_LOCK;
20065d9e75c4SJan Schmidt 
20075d9e75c4SJan Schmidt 	while (b) {
2008abe9339dSQu Wenruo 		int dec = 0;
2009abe9339dSQu Wenruo 
20105d9e75c4SJan Schmidt 		level = btrfs_header_level(b);
20115d9e75c4SJan Schmidt 		p->nodes[level] = b;
20125d9e75c4SJan Schmidt 
20135d9e75c4SJan Schmidt 		/*
20145d9e75c4SJan Schmidt 		 * we have a lock on b and as long as we aren't changing
20155d9e75c4SJan Schmidt 		 * the tree, there is no way to for the items in b to change.
20165d9e75c4SJan Schmidt 		 * It is safe to drop the lock on our parent before we
20175d9e75c4SJan Schmidt 		 * go through the expensive btree search on b.
20185d9e75c4SJan Schmidt 		 */
20195d9e75c4SJan Schmidt 		btrfs_unlock_up_safe(p, level + 1);
20205d9e75c4SJan Schmidt 
2021995e9a16SNikolay Borisov 		ret = btrfs_bin_search(b, key, &slot);
2022cbca7d59SFilipe Manana 		if (ret < 0)
2023cbca7d59SFilipe Manana 			goto done;
20245d9e75c4SJan Schmidt 
2025abe9339dSQu Wenruo 		if (level == 0) {
2026abe9339dSQu Wenruo 			p->slots[level] = slot;
2027abe9339dSQu Wenruo 			unlock_up(p, level, lowest_unlock, 0, NULL);
2028abe9339dSQu Wenruo 			goto done;
2029abe9339dSQu Wenruo 		}
2030abe9339dSQu Wenruo 
20315d9e75c4SJan Schmidt 		if (ret && slot > 0) {
20325d9e75c4SJan Schmidt 			dec = 1;
2033abe9339dSQu Wenruo 			slot--;
20345d9e75c4SJan Schmidt 		}
20355d9e75c4SJan Schmidt 		p->slots[level] = slot;
20365d9e75c4SJan Schmidt 		unlock_up(p, level, lowest_unlock, 0, NULL);
20375d9e75c4SJan Schmidt 
20385d9e75c4SJan Schmidt 		if (level == lowest_level) {
20395d9e75c4SJan Schmidt 			if (dec)
20405d9e75c4SJan Schmidt 				p->slots[level]++;
20415d9e75c4SJan Schmidt 			goto done;
20425d9e75c4SJan Schmidt 		}
20435d9e75c4SJan Schmidt 
2044abe9339dSQu Wenruo 		err = read_block_for_search(root, p, &b, level, slot, key);
20455d9e75c4SJan Schmidt 		if (err == -EAGAIN)
20465d9e75c4SJan Schmidt 			goto again;
20475d9e75c4SJan Schmidt 		if (err) {
20485d9e75c4SJan Schmidt 			ret = err;
20495d9e75c4SJan Schmidt 			goto done;
20505d9e75c4SJan Schmidt 		}
20515d9e75c4SJan Schmidt 
20525d9e75c4SJan Schmidt 		level = btrfs_header_level(b);
20535d9e75c4SJan Schmidt 		btrfs_tree_read_lock(b);
2054f3a84ccdSFilipe Manana 		b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
2055db7f3436SJosef Bacik 		if (!b) {
2056db7f3436SJosef Bacik 			ret = -ENOMEM;
2057db7f3436SJosef Bacik 			goto done;
2058db7f3436SJosef Bacik 		}
20595d9e75c4SJan Schmidt 		p->locks[level] = BTRFS_READ_LOCK;
20605d9e75c4SJan Schmidt 		p->nodes[level] = b;
20615d9e75c4SJan Schmidt 	}
20625d9e75c4SJan Schmidt 	ret = 1;
20635d9e75c4SJan Schmidt done:
20645d9e75c4SJan Schmidt 	if (ret < 0)
20655d9e75c4SJan Schmidt 		btrfs_release_path(p);
20665d9e75c4SJan Schmidt 
20675d9e75c4SJan Schmidt 	return ret;
20685d9e75c4SJan Schmidt }
20695d9e75c4SJan Schmidt 
20705d9e75c4SJan Schmidt /*
20712f38b3e1SArne Jansen  * helper to use instead of search slot if no exact match is needed but
20722f38b3e1SArne Jansen  * instead the next or previous item should be returned.
20732f38b3e1SArne Jansen  * When find_higher is true, the next higher item is returned, the next lower
20742f38b3e1SArne Jansen  * otherwise.
20752f38b3e1SArne Jansen  * When return_any and find_higher are both true, and no higher item is found,
20762f38b3e1SArne Jansen  * return the next lower instead.
20772f38b3e1SArne Jansen  * When return_any is true and find_higher is false, and no lower item is found,
20782f38b3e1SArne Jansen  * return the next higher instead.
20792f38b3e1SArne Jansen  * It returns 0 if any item is found, 1 if none is found (tree empty), and
20802f38b3e1SArne Jansen  * < 0 on error
20812f38b3e1SArne Jansen  */
20822f38b3e1SArne Jansen int btrfs_search_slot_for_read(struct btrfs_root *root,
2083310712b2SOmar Sandoval 			       const struct btrfs_key *key,
2084310712b2SOmar Sandoval 			       struct btrfs_path *p, int find_higher,
2085310712b2SOmar Sandoval 			       int return_any)
20862f38b3e1SArne Jansen {
20872f38b3e1SArne Jansen 	int ret;
20882f38b3e1SArne Jansen 	struct extent_buffer *leaf;
20892f38b3e1SArne Jansen 
20902f38b3e1SArne Jansen again:
20912f38b3e1SArne Jansen 	ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
20922f38b3e1SArne Jansen 	if (ret <= 0)
20932f38b3e1SArne Jansen 		return ret;
20942f38b3e1SArne Jansen 	/*
20952f38b3e1SArne Jansen 	 * a return value of 1 means the path is at the position where the
20962f38b3e1SArne Jansen 	 * item should be inserted. Normally this is the next bigger item,
20972f38b3e1SArne Jansen 	 * but in case the previous item is the last in a leaf, path points
20982f38b3e1SArne Jansen 	 * to the first free slot in the previous leaf, i.e. at an invalid
20992f38b3e1SArne Jansen 	 * item.
21002f38b3e1SArne Jansen 	 */
21012f38b3e1SArne Jansen 	leaf = p->nodes[0];
21022f38b3e1SArne Jansen 
21032f38b3e1SArne Jansen 	if (find_higher) {
21042f38b3e1SArne Jansen 		if (p->slots[0] >= btrfs_header_nritems(leaf)) {
21052f38b3e1SArne Jansen 			ret = btrfs_next_leaf(root, p);
21062f38b3e1SArne Jansen 			if (ret <= 0)
21072f38b3e1SArne Jansen 				return ret;
21082f38b3e1SArne Jansen 			if (!return_any)
21092f38b3e1SArne Jansen 				return 1;
21102f38b3e1SArne Jansen 			/*
21112f38b3e1SArne Jansen 			 * no higher item found, return the next
21122f38b3e1SArne Jansen 			 * lower instead
21132f38b3e1SArne Jansen 			 */
21142f38b3e1SArne Jansen 			return_any = 0;
21152f38b3e1SArne Jansen 			find_higher = 0;
21162f38b3e1SArne Jansen 			btrfs_release_path(p);
21172f38b3e1SArne Jansen 			goto again;
21182f38b3e1SArne Jansen 		}
21192f38b3e1SArne Jansen 	} else {
21202f38b3e1SArne Jansen 		if (p->slots[0] == 0) {
21212f38b3e1SArne Jansen 			ret = btrfs_prev_leaf(root, p);
2122e6793769SArne Jansen 			if (ret < 0)
21232f38b3e1SArne Jansen 				return ret;
2124e6793769SArne Jansen 			if (!ret) {
212523c6bf6aSFilipe David Borba Manana 				leaf = p->nodes[0];
212623c6bf6aSFilipe David Borba Manana 				if (p->slots[0] == btrfs_header_nritems(leaf))
212723c6bf6aSFilipe David Borba Manana 					p->slots[0]--;
2128e6793769SArne Jansen 				return 0;
2129e6793769SArne Jansen 			}
21302f38b3e1SArne Jansen 			if (!return_any)
21312f38b3e1SArne Jansen 				return 1;
21322f38b3e1SArne Jansen 			/*
21332f38b3e1SArne Jansen 			 * no lower item found, return the next
21342f38b3e1SArne Jansen 			 * higher instead
21352f38b3e1SArne Jansen 			 */
21362f38b3e1SArne Jansen 			return_any = 0;
21372f38b3e1SArne Jansen 			find_higher = 1;
21382f38b3e1SArne Jansen 			btrfs_release_path(p);
21392f38b3e1SArne Jansen 			goto again;
2140e6793769SArne Jansen 		} else {
21412f38b3e1SArne Jansen 			--p->slots[0];
21422f38b3e1SArne Jansen 		}
21432f38b3e1SArne Jansen 	}
21442f38b3e1SArne Jansen 	return 0;
21452f38b3e1SArne Jansen }
21462f38b3e1SArne Jansen 
21472f38b3e1SArne Jansen /*
214874123bd7SChris Mason  * adjust the pointers going up the tree, starting at level
214974123bd7SChris Mason  * making sure the right key of each node is points to 'key'.
215074123bd7SChris Mason  * This is used after shifting pointers to the left, so it stops
215174123bd7SChris Mason  * fixing up pointers when a given leaf/node is not in slot 0 of the
215274123bd7SChris Mason  * higher levels
2153aa5d6bedSChris Mason  *
215474123bd7SChris Mason  */
2155b167fa91SNikolay Borisov static void fixup_low_keys(struct btrfs_path *path,
21565f39d397SChris Mason 			   struct btrfs_disk_key *key, int level)
2157be0e5c09SChris Mason {
2158be0e5c09SChris Mason 	int i;
21595f39d397SChris Mason 	struct extent_buffer *t;
21600e82bcfeSDavid Sterba 	int ret;
21615f39d397SChris Mason 
2162234b63a0SChris Mason 	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2163be0e5c09SChris Mason 		int tslot = path->slots[i];
21640e82bcfeSDavid Sterba 
2165eb60ceacSChris Mason 		if (!path->nodes[i])
2166be0e5c09SChris Mason 			break;
21675f39d397SChris Mason 		t = path->nodes[i];
2168f3a84ccdSFilipe Manana 		ret = btrfs_tree_mod_log_insert_key(t, tslot,
2169f3a84ccdSFilipe Manana 				BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC);
21700e82bcfeSDavid Sterba 		BUG_ON(ret < 0);
21715f39d397SChris Mason 		btrfs_set_node_key(t, key, tslot);
2172d6025579SChris Mason 		btrfs_mark_buffer_dirty(path->nodes[i]);
2173be0e5c09SChris Mason 		if (tslot != 0)
2174be0e5c09SChris Mason 			break;
2175be0e5c09SChris Mason 	}
2176be0e5c09SChris Mason }
2177be0e5c09SChris Mason 
217874123bd7SChris Mason /*
217931840ae1SZheng Yan  * update item key.
218031840ae1SZheng Yan  *
218131840ae1SZheng Yan  * This function isn't completely safe. It's the caller's responsibility
218231840ae1SZheng Yan  * that the new key won't break the order
218331840ae1SZheng Yan  */
2184b7a0365eSDaniel Dressler void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
2185b7a0365eSDaniel Dressler 			     struct btrfs_path *path,
2186310712b2SOmar Sandoval 			     const struct btrfs_key *new_key)
218731840ae1SZheng Yan {
218831840ae1SZheng Yan 	struct btrfs_disk_key disk_key;
218931840ae1SZheng Yan 	struct extent_buffer *eb;
219031840ae1SZheng Yan 	int slot;
219131840ae1SZheng Yan 
219231840ae1SZheng Yan 	eb = path->nodes[0];
219331840ae1SZheng Yan 	slot = path->slots[0];
219431840ae1SZheng Yan 	if (slot > 0) {
219531840ae1SZheng Yan 		btrfs_item_key(eb, &disk_key, slot - 1);
21967c15d410SQu Wenruo 		if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
21977c15d410SQu Wenruo 			btrfs_crit(fs_info,
21987c15d410SQu Wenruo 		"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
21997c15d410SQu Wenruo 				   slot, btrfs_disk_key_objectid(&disk_key),
22007c15d410SQu Wenruo 				   btrfs_disk_key_type(&disk_key),
22017c15d410SQu Wenruo 				   btrfs_disk_key_offset(&disk_key),
22027c15d410SQu Wenruo 				   new_key->objectid, new_key->type,
22037c15d410SQu Wenruo 				   new_key->offset);
22047c15d410SQu Wenruo 			btrfs_print_leaf(eb);
22057c15d410SQu Wenruo 			BUG();
22067c15d410SQu Wenruo 		}
220731840ae1SZheng Yan 	}
220831840ae1SZheng Yan 	if (slot < btrfs_header_nritems(eb) - 1) {
220931840ae1SZheng Yan 		btrfs_item_key(eb, &disk_key, slot + 1);
22107c15d410SQu Wenruo 		if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
22117c15d410SQu Wenruo 			btrfs_crit(fs_info,
22127c15d410SQu Wenruo 		"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
22137c15d410SQu Wenruo 				   slot, btrfs_disk_key_objectid(&disk_key),
22147c15d410SQu Wenruo 				   btrfs_disk_key_type(&disk_key),
22157c15d410SQu Wenruo 				   btrfs_disk_key_offset(&disk_key),
22167c15d410SQu Wenruo 				   new_key->objectid, new_key->type,
22177c15d410SQu Wenruo 				   new_key->offset);
22187c15d410SQu Wenruo 			btrfs_print_leaf(eb);
22197c15d410SQu Wenruo 			BUG();
22207c15d410SQu Wenruo 		}
222131840ae1SZheng Yan 	}
222231840ae1SZheng Yan 
222331840ae1SZheng Yan 	btrfs_cpu_key_to_disk(&disk_key, new_key);
222431840ae1SZheng Yan 	btrfs_set_item_key(eb, &disk_key, slot);
222531840ae1SZheng Yan 	btrfs_mark_buffer_dirty(eb);
222631840ae1SZheng Yan 	if (slot == 0)
2227b167fa91SNikolay Borisov 		fixup_low_keys(path, &disk_key, 1);
222831840ae1SZheng Yan }
222931840ae1SZheng Yan 
223031840ae1SZheng Yan /*
2231d16c702fSQu Wenruo  * Check key order of two sibling extent buffers.
2232d16c702fSQu Wenruo  *
2233d16c702fSQu Wenruo  * Return true if something is wrong.
2234d16c702fSQu Wenruo  * Return false if everything is fine.
2235d16c702fSQu Wenruo  *
2236d16c702fSQu Wenruo  * Tree-checker only works inside one tree block, thus the following
2237d16c702fSQu Wenruo  * corruption can not be detected by tree-checker:
2238d16c702fSQu Wenruo  *
2239d16c702fSQu Wenruo  * Leaf @left			| Leaf @right
2240d16c702fSQu Wenruo  * --------------------------------------------------------------
2241d16c702fSQu Wenruo  * | 1 | 2 | 3 | 4 | 5 | f6 |   | 7 | 8 |
2242d16c702fSQu Wenruo  *
2243d16c702fSQu Wenruo  * Key f6 in leaf @left itself is valid, but not valid when the next
2244d16c702fSQu Wenruo  * key in leaf @right is 7.
2245d16c702fSQu Wenruo  * This can only be checked at tree block merge time.
2246d16c702fSQu Wenruo  * And since tree checker has ensured all key order in each tree block
2247d16c702fSQu Wenruo  * is correct, we only need to bother the last key of @left and the first
2248d16c702fSQu Wenruo  * key of @right.
2249d16c702fSQu Wenruo  */
2250d16c702fSQu Wenruo static bool check_sibling_keys(struct extent_buffer *left,
2251d16c702fSQu Wenruo 			       struct extent_buffer *right)
2252d16c702fSQu Wenruo {
2253d16c702fSQu Wenruo 	struct btrfs_key left_last;
2254d16c702fSQu Wenruo 	struct btrfs_key right_first;
2255d16c702fSQu Wenruo 	int level = btrfs_header_level(left);
2256d16c702fSQu Wenruo 	int nr_left = btrfs_header_nritems(left);
2257d16c702fSQu Wenruo 	int nr_right = btrfs_header_nritems(right);
2258d16c702fSQu Wenruo 
2259d16c702fSQu Wenruo 	/* No key to check in one of the tree blocks */
2260d16c702fSQu Wenruo 	if (!nr_left || !nr_right)
2261d16c702fSQu Wenruo 		return false;
2262d16c702fSQu Wenruo 
2263d16c702fSQu Wenruo 	if (level) {
2264d16c702fSQu Wenruo 		btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
2265d16c702fSQu Wenruo 		btrfs_node_key_to_cpu(right, &right_first, 0);
2266d16c702fSQu Wenruo 	} else {
2267d16c702fSQu Wenruo 		btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
2268d16c702fSQu Wenruo 		btrfs_item_key_to_cpu(right, &right_first, 0);
2269d16c702fSQu Wenruo 	}
2270d16c702fSQu Wenruo 
2271d16c702fSQu Wenruo 	if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
2272d16c702fSQu Wenruo 		btrfs_crit(left->fs_info,
2273d16c702fSQu Wenruo "bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
2274d16c702fSQu Wenruo 			   left_last.objectid, left_last.type,
2275d16c702fSQu Wenruo 			   left_last.offset, right_first.objectid,
2276d16c702fSQu Wenruo 			   right_first.type, right_first.offset);
2277d16c702fSQu Wenruo 		return true;
2278d16c702fSQu Wenruo 	}
2279d16c702fSQu Wenruo 	return false;
2280d16c702fSQu Wenruo }
2281d16c702fSQu Wenruo 
2282d16c702fSQu Wenruo /*
228374123bd7SChris Mason  * try to push data from one node into the next node left in the
228479f95c82SChris Mason  * tree.
2285aa5d6bedSChris Mason  *
2286aa5d6bedSChris Mason  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
2287aa5d6bedSChris Mason  * error, and > 0 if there was no room in the left hand block.
228874123bd7SChris Mason  */
228998ed5174SChris Mason static int push_node_left(struct btrfs_trans_handle *trans,
22902ff7e61eSJeff Mahoney 			  struct extent_buffer *dst,
2291971a1f66SChris Mason 			  struct extent_buffer *src, int empty)
2292be0e5c09SChris Mason {
2293d30a668fSDavid Sterba 	struct btrfs_fs_info *fs_info = trans->fs_info;
2294be0e5c09SChris Mason 	int push_items = 0;
2295bb803951SChris Mason 	int src_nritems;
2296bb803951SChris Mason 	int dst_nritems;
2297aa5d6bedSChris Mason 	int ret = 0;
2298be0e5c09SChris Mason 
22995f39d397SChris Mason 	src_nritems = btrfs_header_nritems(src);
23005f39d397SChris Mason 	dst_nritems = btrfs_header_nritems(dst);
23010b246afaSJeff Mahoney 	push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
23027bb86316SChris Mason 	WARN_ON(btrfs_header_generation(src) != trans->transid);
23037bb86316SChris Mason 	WARN_ON(btrfs_header_generation(dst) != trans->transid);
230454aa1f4dSChris Mason 
2305bce4eae9SChris Mason 	if (!empty && src_nritems <= 8)
2306971a1f66SChris Mason 		return 1;
2307971a1f66SChris Mason 
2308d397712bSChris Mason 	if (push_items <= 0)
2309be0e5c09SChris Mason 		return 1;
2310be0e5c09SChris Mason 
2311bce4eae9SChris Mason 	if (empty) {
2312971a1f66SChris Mason 		push_items = min(src_nritems, push_items);
2313bce4eae9SChris Mason 		if (push_items < src_nritems) {
2314bce4eae9SChris Mason 			/* leave at least 8 pointers in the node if
2315bce4eae9SChris Mason 			 * we aren't going to empty it
2316bce4eae9SChris Mason 			 */
2317bce4eae9SChris Mason 			if (src_nritems - push_items < 8) {
2318bce4eae9SChris Mason 				if (push_items <= 8)
2319bce4eae9SChris Mason 					return 1;
2320bce4eae9SChris Mason 				push_items -= 8;
2321bce4eae9SChris Mason 			}
2322bce4eae9SChris Mason 		}
2323bce4eae9SChris Mason 	} else
2324bce4eae9SChris Mason 		push_items = min(src_nritems - 8, push_items);
232579f95c82SChris Mason 
2326d16c702fSQu Wenruo 	/* dst is the left eb, src is the middle eb */
2327d16c702fSQu Wenruo 	if (check_sibling_keys(dst, src)) {
2328d16c702fSQu Wenruo 		ret = -EUCLEAN;
2329d16c702fSQu Wenruo 		btrfs_abort_transaction(trans, ret);
2330d16c702fSQu Wenruo 		return ret;
2331d16c702fSQu Wenruo 	}
2332f3a84ccdSFilipe Manana 	ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
23335de865eeSFilipe David Borba Manana 	if (ret) {
233466642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
23355de865eeSFilipe David Borba Manana 		return ret;
23365de865eeSFilipe David Borba Manana 	}
23375f39d397SChris Mason 	copy_extent_buffer(dst, src,
23385f39d397SChris Mason 			   btrfs_node_key_ptr_offset(dst_nritems),
23395f39d397SChris Mason 			   btrfs_node_key_ptr_offset(0),
2340123abc88SChris Mason 			   push_items * sizeof(struct btrfs_key_ptr));
23415f39d397SChris Mason 
2342bb803951SChris Mason 	if (push_items < src_nritems) {
234357911b8bSJan Schmidt 		/*
2344f3a84ccdSFilipe Manana 		 * Don't call btrfs_tree_mod_log_insert_move() here, key removal
2345f3a84ccdSFilipe Manana 		 * was already fully logged by btrfs_tree_mod_log_eb_copy() above.
234657911b8bSJan Schmidt 		 */
23475f39d397SChris Mason 		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
23485f39d397SChris Mason 				      btrfs_node_key_ptr_offset(push_items),
2349e2fa7227SChris Mason 				      (src_nritems - push_items) *
2350123abc88SChris Mason 				      sizeof(struct btrfs_key_ptr));
2351bb803951SChris Mason 	}
23525f39d397SChris Mason 	btrfs_set_header_nritems(src, src_nritems - push_items);
23535f39d397SChris Mason 	btrfs_set_header_nritems(dst, dst_nritems + push_items);
23545f39d397SChris Mason 	btrfs_mark_buffer_dirty(src);
23555f39d397SChris Mason 	btrfs_mark_buffer_dirty(dst);
235631840ae1SZheng Yan 
2357bb803951SChris Mason 	return ret;
2358be0e5c09SChris Mason }
2359be0e5c09SChris Mason 
236097571fd0SChris Mason /*
236179f95c82SChris Mason  * try to push data from one node into the next node right in the
236279f95c82SChris Mason  * tree.
236379f95c82SChris Mason  *
236479f95c82SChris Mason  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
236579f95c82SChris Mason  * error, and > 0 if there was no room in the right hand block.
236679f95c82SChris Mason  *
236779f95c82SChris Mason  * this will  only push up to 1/2 the contents of the left node over
236879f95c82SChris Mason  */
23695f39d397SChris Mason static int balance_node_right(struct btrfs_trans_handle *trans,
23705f39d397SChris Mason 			      struct extent_buffer *dst,
23715f39d397SChris Mason 			      struct extent_buffer *src)
237279f95c82SChris Mason {
237355d32ed8SDavid Sterba 	struct btrfs_fs_info *fs_info = trans->fs_info;
237479f95c82SChris Mason 	int push_items = 0;
237579f95c82SChris Mason 	int max_push;
237679f95c82SChris Mason 	int src_nritems;
237779f95c82SChris Mason 	int dst_nritems;
237879f95c82SChris Mason 	int ret = 0;
237979f95c82SChris Mason 
23807bb86316SChris Mason 	WARN_ON(btrfs_header_generation(src) != trans->transid);
23817bb86316SChris Mason 	WARN_ON(btrfs_header_generation(dst) != trans->transid);
23827bb86316SChris Mason 
23835f39d397SChris Mason 	src_nritems = btrfs_header_nritems(src);
23845f39d397SChris Mason 	dst_nritems = btrfs_header_nritems(dst);
23850b246afaSJeff Mahoney 	push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
2386d397712bSChris Mason 	if (push_items <= 0)
238779f95c82SChris Mason 		return 1;
2388bce4eae9SChris Mason 
2389d397712bSChris Mason 	if (src_nritems < 4)
2390bce4eae9SChris Mason 		return 1;
239179f95c82SChris Mason 
239279f95c82SChris Mason 	max_push = src_nritems / 2 + 1;
239379f95c82SChris Mason 	/* don't try to empty the node */
2394d397712bSChris Mason 	if (max_push >= src_nritems)
239579f95c82SChris Mason 		return 1;
2396252c38f0SYan 
239779f95c82SChris Mason 	if (max_push < push_items)
239879f95c82SChris Mason 		push_items = max_push;
239979f95c82SChris Mason 
2400d16c702fSQu Wenruo 	/* dst is the right eb, src is the middle eb */
2401d16c702fSQu Wenruo 	if (check_sibling_keys(src, dst)) {
2402d16c702fSQu Wenruo 		ret = -EUCLEAN;
2403d16c702fSQu Wenruo 		btrfs_abort_transaction(trans, ret);
2404d16c702fSQu Wenruo 		return ret;
2405d16c702fSQu Wenruo 	}
2406f3a84ccdSFilipe Manana 	ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
2407bf1d3425SDavid Sterba 	BUG_ON(ret < 0);
24085f39d397SChris Mason 	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
24095f39d397SChris Mason 				      btrfs_node_key_ptr_offset(0),
24105f39d397SChris Mason 				      (dst_nritems) *
24115f39d397SChris Mason 				      sizeof(struct btrfs_key_ptr));
2412d6025579SChris Mason 
2413f3a84ccdSFilipe Manana 	ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
2414ed874f0dSDavid Sterba 					 push_items);
24155de865eeSFilipe David Borba Manana 	if (ret) {
241666642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
24175de865eeSFilipe David Borba Manana 		return ret;
24185de865eeSFilipe David Borba Manana 	}
24195f39d397SChris Mason 	copy_extent_buffer(dst, src,
24205f39d397SChris Mason 			   btrfs_node_key_ptr_offset(0),
24215f39d397SChris Mason 			   btrfs_node_key_ptr_offset(src_nritems - push_items),
2422123abc88SChris Mason 			   push_items * sizeof(struct btrfs_key_ptr));
242379f95c82SChris Mason 
24245f39d397SChris Mason 	btrfs_set_header_nritems(src, src_nritems - push_items);
24255f39d397SChris Mason 	btrfs_set_header_nritems(dst, dst_nritems + push_items);
242679f95c82SChris Mason 
24275f39d397SChris Mason 	btrfs_mark_buffer_dirty(src);
24285f39d397SChris Mason 	btrfs_mark_buffer_dirty(dst);
242931840ae1SZheng Yan 
243079f95c82SChris Mason 	return ret;
243179f95c82SChris Mason }
243279f95c82SChris Mason 
243379f95c82SChris Mason /*
243497571fd0SChris Mason  * helper function to insert a new root level in the tree.
243597571fd0SChris Mason  * A new node is allocated, and a single item is inserted to
243697571fd0SChris Mason  * point to the existing root
2437aa5d6bedSChris Mason  *
2438aa5d6bedSChris Mason  * returns zero on success or < 0 on failure.
243997571fd0SChris Mason  */
2440d397712bSChris Mason static noinline int insert_new_root(struct btrfs_trans_handle *trans,
24415f39d397SChris Mason 			   struct btrfs_root *root,
2442fdd99c72SLiu Bo 			   struct btrfs_path *path, int level)
244374123bd7SChris Mason {
24440b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
24457bb86316SChris Mason 	u64 lower_gen;
24465f39d397SChris Mason 	struct extent_buffer *lower;
24475f39d397SChris Mason 	struct extent_buffer *c;
2448925baeddSChris Mason 	struct extent_buffer *old;
24495f39d397SChris Mason 	struct btrfs_disk_key lower_key;
2450d9d19a01SDavid Sterba 	int ret;
24515c680ed6SChris Mason 
24525c680ed6SChris Mason 	BUG_ON(path->nodes[level]);
24535c680ed6SChris Mason 	BUG_ON(path->nodes[level-1] != root->node);
24545c680ed6SChris Mason 
24557bb86316SChris Mason 	lower = path->nodes[level-1];
24567bb86316SChris Mason 	if (level == 1)
24577bb86316SChris Mason 		btrfs_item_key(lower, &lower_key, 0);
24587bb86316SChris Mason 	else
24597bb86316SChris Mason 		btrfs_node_key(lower, &lower_key, 0);
24607bb86316SChris Mason 
2461a6279470SFilipe Manana 	c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
24629631e4ccSJosef Bacik 					 root->node->start, 0,
2463cf6f34aaSJosef Bacik 					 BTRFS_NESTING_NEW_ROOT);
24645f39d397SChris Mason 	if (IS_ERR(c))
24655f39d397SChris Mason 		return PTR_ERR(c);
2466925baeddSChris Mason 
24670b246afaSJeff Mahoney 	root_add_used(root, fs_info->nodesize);
2468f0486c68SYan, Zheng 
24695f39d397SChris Mason 	btrfs_set_header_nritems(c, 1);
24705f39d397SChris Mason 	btrfs_set_node_key(c, &lower_key, 0);
2471db94535dSChris Mason 	btrfs_set_node_blockptr(c, 0, lower->start);
24727bb86316SChris Mason 	lower_gen = btrfs_header_generation(lower);
247331840ae1SZheng Yan 	WARN_ON(lower_gen != trans->transid);
24747bb86316SChris Mason 
24757bb86316SChris Mason 	btrfs_set_node_ptr_generation(c, 0, lower_gen);
24765f39d397SChris Mason 
24775f39d397SChris Mason 	btrfs_mark_buffer_dirty(c);
2478d5719762SChris Mason 
2479925baeddSChris Mason 	old = root->node;
2480406808abSFilipe Manana 	ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
2481d9d19a01SDavid Sterba 	BUG_ON(ret < 0);
2482240f62c8SChris Mason 	rcu_assign_pointer(root->node, c);
2483925baeddSChris Mason 
2484925baeddSChris Mason 	/* the super has an extra ref to root->node */
2485925baeddSChris Mason 	free_extent_buffer(old);
2486925baeddSChris Mason 
24870b86a832SChris Mason 	add_root_to_dirty_list(root);
248867439dadSDavid Sterba 	atomic_inc(&c->refs);
24895f39d397SChris Mason 	path->nodes[level] = c;
2490ac5887c8SJosef Bacik 	path->locks[level] = BTRFS_WRITE_LOCK;
249174123bd7SChris Mason 	path->slots[level] = 0;
249274123bd7SChris Mason 	return 0;
249374123bd7SChris Mason }
24945c680ed6SChris Mason 
24955c680ed6SChris Mason /*
24965c680ed6SChris Mason  * worker function to insert a single pointer in a node.
24975c680ed6SChris Mason  * the node should have enough room for the pointer already
249897571fd0SChris Mason  *
24995c680ed6SChris Mason  * slot and level indicate where you want the key to go, and
25005c680ed6SChris Mason  * blocknr is the block the key points to.
25015c680ed6SChris Mason  */
2502143bede5SJeff Mahoney static void insert_ptr(struct btrfs_trans_handle *trans,
25036ad3cf6dSDavid Sterba 		       struct btrfs_path *path,
2504143bede5SJeff Mahoney 		       struct btrfs_disk_key *key, u64 bytenr,
2505c3e06965SJan Schmidt 		       int slot, int level)
25065c680ed6SChris Mason {
25075f39d397SChris Mason 	struct extent_buffer *lower;
25085c680ed6SChris Mason 	int nritems;
2509f3ea38daSJan Schmidt 	int ret;
25105c680ed6SChris Mason 
25115c680ed6SChris Mason 	BUG_ON(!path->nodes[level]);
2512f0486c68SYan, Zheng 	btrfs_assert_tree_locked(path->nodes[level]);
25135f39d397SChris Mason 	lower = path->nodes[level];
25145f39d397SChris Mason 	nritems = btrfs_header_nritems(lower);
2515c293498bSStoyan Gaydarov 	BUG_ON(slot > nritems);
25166ad3cf6dSDavid Sterba 	BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
251774123bd7SChris Mason 	if (slot != nritems) {
2518bf1d3425SDavid Sterba 		if (level) {
2519f3a84ccdSFilipe Manana 			ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
2520f3a84ccdSFilipe Manana 					slot, nritems - slot);
2521bf1d3425SDavid Sterba 			BUG_ON(ret < 0);
2522bf1d3425SDavid Sterba 		}
25235f39d397SChris Mason 		memmove_extent_buffer(lower,
25245f39d397SChris Mason 			      btrfs_node_key_ptr_offset(slot + 1),
25255f39d397SChris Mason 			      btrfs_node_key_ptr_offset(slot),
2526123abc88SChris Mason 			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
252774123bd7SChris Mason 	}
2528c3e06965SJan Schmidt 	if (level) {
2529f3a84ccdSFilipe Manana 		ret = btrfs_tree_mod_log_insert_key(lower, slot,
2530f3a84ccdSFilipe Manana 					    BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS);
2531f3ea38daSJan Schmidt 		BUG_ON(ret < 0);
2532f3ea38daSJan Schmidt 	}
25335f39d397SChris Mason 	btrfs_set_node_key(lower, key, slot);
2534db94535dSChris Mason 	btrfs_set_node_blockptr(lower, slot, bytenr);
253574493f7aSChris Mason 	WARN_ON(trans->transid == 0);
253674493f7aSChris Mason 	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
25375f39d397SChris Mason 	btrfs_set_header_nritems(lower, nritems + 1);
25385f39d397SChris Mason 	btrfs_mark_buffer_dirty(lower);
253974123bd7SChris Mason }
254074123bd7SChris Mason 
254197571fd0SChris Mason /*
254297571fd0SChris Mason  * split the node at the specified level in path in two.
254397571fd0SChris Mason  * The path is corrected to point to the appropriate node after the split
254497571fd0SChris Mason  *
254597571fd0SChris Mason  * Before splitting this tries to make some room in the node by pushing
254697571fd0SChris Mason  * left and right, if either one works, it returns right away.
2547aa5d6bedSChris Mason  *
2548aa5d6bedSChris Mason  * returns 0 on success and < 0 on failure
254997571fd0SChris Mason  */
2550e02119d5SChris Mason static noinline int split_node(struct btrfs_trans_handle *trans,
2551e02119d5SChris Mason 			       struct btrfs_root *root,
2552e02119d5SChris Mason 			       struct btrfs_path *path, int level)
2553be0e5c09SChris Mason {
25540b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
25555f39d397SChris Mason 	struct extent_buffer *c;
25565f39d397SChris Mason 	struct extent_buffer *split;
25575f39d397SChris Mason 	struct btrfs_disk_key disk_key;
2558be0e5c09SChris Mason 	int mid;
25595c680ed6SChris Mason 	int ret;
25607518a238SChris Mason 	u32 c_nritems;
2561be0e5c09SChris Mason 
25625f39d397SChris Mason 	c = path->nodes[level];
25637bb86316SChris Mason 	WARN_ON(btrfs_header_generation(c) != trans->transid);
25645f39d397SChris Mason 	if (c == root->node) {
2565d9abbf1cSJan Schmidt 		/*
256690f8d62eSJan Schmidt 		 * trying to split the root, lets make a new one
256790f8d62eSJan Schmidt 		 *
2568fdd99c72SLiu Bo 		 * tree mod log: We don't log_removal old root in
256990f8d62eSJan Schmidt 		 * insert_new_root, because that root buffer will be kept as a
257090f8d62eSJan Schmidt 		 * normal node. We are going to log removal of half of the
2571f3a84ccdSFilipe Manana 		 * elements below with btrfs_tree_mod_log_eb_copy(). We're
2572f3a84ccdSFilipe Manana 		 * holding a tree lock on the buffer, which is why we cannot
2573f3a84ccdSFilipe Manana 		 * race with other tree_mod_log users.
2574d9abbf1cSJan Schmidt 		 */
2575fdd99c72SLiu Bo 		ret = insert_new_root(trans, root, path, level + 1);
25765c680ed6SChris Mason 		if (ret)
25775c680ed6SChris Mason 			return ret;
2578b3612421SChris Mason 	} else {
2579e66f709bSChris Mason 		ret = push_nodes_for_insert(trans, root, path, level);
25805f39d397SChris Mason 		c = path->nodes[level];
25815f39d397SChris Mason 		if (!ret && btrfs_header_nritems(c) <
25820b246afaSJeff Mahoney 		    BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
2583e66f709bSChris Mason 			return 0;
258454aa1f4dSChris Mason 		if (ret < 0)
258554aa1f4dSChris Mason 			return ret;
25865c680ed6SChris Mason 	}
2587e66f709bSChris Mason 
25885f39d397SChris Mason 	c_nritems = btrfs_header_nritems(c);
25895d4f98a2SYan Zheng 	mid = (c_nritems + 1) / 2;
25905d4f98a2SYan Zheng 	btrfs_node_key(c, &disk_key, mid);
25917bb86316SChris Mason 
2592a6279470SFilipe Manana 	split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
25934dff97e6SJosef Bacik 					     c->start, 0, BTRFS_NESTING_SPLIT);
25945f39d397SChris Mason 	if (IS_ERR(split))
25955f39d397SChris Mason 		return PTR_ERR(split);
259654aa1f4dSChris Mason 
25970b246afaSJeff Mahoney 	root_add_used(root, fs_info->nodesize);
2598bc877d28SNikolay Borisov 	ASSERT(btrfs_header_level(c) == level);
25995f39d397SChris Mason 
2600f3a84ccdSFilipe Manana 	ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
26015de865eeSFilipe David Borba Manana 	if (ret) {
260266642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
26035de865eeSFilipe David Borba Manana 		return ret;
26045de865eeSFilipe David Borba Manana 	}
26055f39d397SChris Mason 	copy_extent_buffer(split, c,
26065f39d397SChris Mason 			   btrfs_node_key_ptr_offset(0),
26075f39d397SChris Mason 			   btrfs_node_key_ptr_offset(mid),
2608123abc88SChris Mason 			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
26095f39d397SChris Mason 	btrfs_set_header_nritems(split, c_nritems - mid);
26105f39d397SChris Mason 	btrfs_set_header_nritems(c, mid);
2611aa5d6bedSChris Mason 
26125f39d397SChris Mason 	btrfs_mark_buffer_dirty(c);
26135f39d397SChris Mason 	btrfs_mark_buffer_dirty(split);
26145f39d397SChris Mason 
26156ad3cf6dSDavid Sterba 	insert_ptr(trans, path, &disk_key, split->start,
2616c3e06965SJan Schmidt 		   path->slots[level + 1] + 1, level + 1);
2617aa5d6bedSChris Mason 
26185de08d7dSChris Mason 	if (path->slots[level] >= mid) {
26195c680ed6SChris Mason 		path->slots[level] -= mid;
2620925baeddSChris Mason 		btrfs_tree_unlock(c);
26215f39d397SChris Mason 		free_extent_buffer(c);
26225f39d397SChris Mason 		path->nodes[level] = split;
26235c680ed6SChris Mason 		path->slots[level + 1] += 1;
2624eb60ceacSChris Mason 	} else {
2625925baeddSChris Mason 		btrfs_tree_unlock(split);
26265f39d397SChris Mason 		free_extent_buffer(split);
2627be0e5c09SChris Mason 	}
2628d5286a92SNikolay Borisov 	return 0;
2629be0e5c09SChris Mason }
2630be0e5c09SChris Mason 
263174123bd7SChris Mason /*
263274123bd7SChris Mason  * how many bytes are required to store the items in a leaf.  start
263374123bd7SChris Mason  * and nr indicate which items in the leaf to check.  This totals up the
263474123bd7SChris Mason  * space used both by the item structs and the item data
263574123bd7SChris Mason  */
26365f39d397SChris Mason static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2637be0e5c09SChris Mason {
263841be1f3bSJosef Bacik 	struct btrfs_item *start_item;
263941be1f3bSJosef Bacik 	struct btrfs_item *end_item;
2640be0e5c09SChris Mason 	int data_len;
26415f39d397SChris Mason 	int nritems = btrfs_header_nritems(l);
2642d4dbff95SChris Mason 	int end = min(nritems, start + nr) - 1;
2643be0e5c09SChris Mason 
2644be0e5c09SChris Mason 	if (!nr)
2645be0e5c09SChris Mason 		return 0;
2646dd3cc16bSRoss Kirk 	start_item = btrfs_item_nr(start);
2647dd3cc16bSRoss Kirk 	end_item = btrfs_item_nr(end);
2648a31356b9SDavid Sterba 	data_len = btrfs_item_offset(l, start_item) +
2649a31356b9SDavid Sterba 		   btrfs_item_size(l, start_item);
2650a31356b9SDavid Sterba 	data_len = data_len - btrfs_item_offset(l, end_item);
26510783fcfcSChris Mason 	data_len += sizeof(struct btrfs_item) * nr;
2652d4dbff95SChris Mason 	WARN_ON(data_len < 0);
2653be0e5c09SChris Mason 	return data_len;
2654be0e5c09SChris Mason }
2655be0e5c09SChris Mason 
265674123bd7SChris Mason /*
2657d4dbff95SChris Mason  * The space between the end of the leaf items and
2658d4dbff95SChris Mason  * the start of the leaf data.  IOW, how much room
2659d4dbff95SChris Mason  * the leaf has left for both items and data
2660d4dbff95SChris Mason  */
2661e902baacSDavid Sterba noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
2662d4dbff95SChris Mason {
2663e902baacSDavid Sterba 	struct btrfs_fs_info *fs_info = leaf->fs_info;
26645f39d397SChris Mason 	int nritems = btrfs_header_nritems(leaf);
26655f39d397SChris Mason 	int ret;
26660b246afaSJeff Mahoney 
26670b246afaSJeff Mahoney 	ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
26685f39d397SChris Mason 	if (ret < 0) {
26690b246afaSJeff Mahoney 		btrfs_crit(fs_info,
2670efe120a0SFrank Holton 			   "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
2671da17066cSJeff Mahoney 			   ret,
26720b246afaSJeff Mahoney 			   (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
26735f39d397SChris Mason 			   leaf_space_used(leaf, 0, nritems), nritems);
26745f39d397SChris Mason 	}
26755f39d397SChris Mason 	return ret;
2676d4dbff95SChris Mason }
2677d4dbff95SChris Mason 
267899d8f83cSChris Mason /*
267999d8f83cSChris Mason  * min slot controls the lowest index we're willing to push to the
268099d8f83cSChris Mason  * right.  We'll push up to and including min_slot, but no lower
268199d8f83cSChris Mason  */
2682f72f0010SDavid Sterba static noinline int __push_leaf_right(struct btrfs_path *path,
268344871b1bSChris Mason 				      int data_size, int empty,
268444871b1bSChris Mason 				      struct extent_buffer *right,
268599d8f83cSChris Mason 				      int free_space, u32 left_nritems,
268699d8f83cSChris Mason 				      u32 min_slot)
268700ec4c51SChris Mason {
2688f72f0010SDavid Sterba 	struct btrfs_fs_info *fs_info = right->fs_info;
26895f39d397SChris Mason 	struct extent_buffer *left = path->nodes[0];
269044871b1bSChris Mason 	struct extent_buffer *upper = path->nodes[1];
2691cfed81a0SChris Mason 	struct btrfs_map_token token;
26925f39d397SChris Mason 	struct btrfs_disk_key disk_key;
269300ec4c51SChris Mason 	int slot;
269434a38218SChris Mason 	u32 i;
269500ec4c51SChris Mason 	int push_space = 0;
269600ec4c51SChris Mason 	int push_items = 0;
26970783fcfcSChris Mason 	struct btrfs_item *item;
269834a38218SChris Mason 	u32 nr;
26997518a238SChris Mason 	u32 right_nritems;
27005f39d397SChris Mason 	u32 data_end;
2701db94535dSChris Mason 	u32 this_item_size;
270200ec4c51SChris Mason 
270334a38218SChris Mason 	if (empty)
270434a38218SChris Mason 		nr = 0;
270534a38218SChris Mason 	else
270699d8f83cSChris Mason 		nr = max_t(u32, 1, min_slot);
270734a38218SChris Mason 
270831840ae1SZheng Yan 	if (path->slots[0] >= left_nritems)
270987b29b20SYan Zheng 		push_space += data_size;
271031840ae1SZheng Yan 
271144871b1bSChris Mason 	slot = path->slots[1];
271234a38218SChris Mason 	i = left_nritems - 1;
271334a38218SChris Mason 	while (i >= nr) {
2714dd3cc16bSRoss Kirk 		item = btrfs_item_nr(i);
2715db94535dSChris Mason 
271631840ae1SZheng Yan 		if (!empty && push_items > 0) {
271731840ae1SZheng Yan 			if (path->slots[0] > i)
271831840ae1SZheng Yan 				break;
271931840ae1SZheng Yan 			if (path->slots[0] == i) {
2720e902baacSDavid Sterba 				int space = btrfs_leaf_free_space(left);
2721e902baacSDavid Sterba 
272231840ae1SZheng Yan 				if (space + push_space * 2 > free_space)
272331840ae1SZheng Yan 					break;
272431840ae1SZheng Yan 			}
272531840ae1SZheng Yan 		}
272631840ae1SZheng Yan 
272700ec4c51SChris Mason 		if (path->slots[0] == i)
272887b29b20SYan Zheng 			push_space += data_size;
2729db94535dSChris Mason 
2730db94535dSChris Mason 		this_item_size = btrfs_item_size(left, item);
2731db94535dSChris Mason 		if (this_item_size + sizeof(*item) + push_space > free_space)
273200ec4c51SChris Mason 			break;
273331840ae1SZheng Yan 
273400ec4c51SChris Mason 		push_items++;
2735db94535dSChris Mason 		push_space += this_item_size + sizeof(*item);
273634a38218SChris Mason 		if (i == 0)
273734a38218SChris Mason 			break;
273834a38218SChris Mason 		i--;
2739db94535dSChris Mason 	}
27405f39d397SChris Mason 
2741925baeddSChris Mason 	if (push_items == 0)
2742925baeddSChris Mason 		goto out_unlock;
27435f39d397SChris Mason 
27446c1500f2SJulia Lawall 	WARN_ON(!empty && push_items == left_nritems);
27455f39d397SChris Mason 
274600ec4c51SChris Mason 	/* push left to right */
27475f39d397SChris Mason 	right_nritems = btrfs_header_nritems(right);
274834a38218SChris Mason 
27495f39d397SChris Mason 	push_space = btrfs_item_end_nr(left, left_nritems - push_items);
27508f881e8cSDavid Sterba 	push_space -= leaf_data_end(left);
27515f39d397SChris Mason 
275200ec4c51SChris Mason 	/* make room in the right data area */
27538f881e8cSDavid Sterba 	data_end = leaf_data_end(right);
27545f39d397SChris Mason 	memmove_extent_buffer(right,
27553d9ec8c4SNikolay Borisov 			      BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
27563d9ec8c4SNikolay Borisov 			      BTRFS_LEAF_DATA_OFFSET + data_end,
27570b246afaSJeff Mahoney 			      BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
27585f39d397SChris Mason 
275900ec4c51SChris Mason 	/* copy from the left data area */
27603d9ec8c4SNikolay Borisov 	copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
27610b246afaSJeff Mahoney 		     BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
27628f881e8cSDavid Sterba 		     BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
2763d6025579SChris Mason 		     push_space);
27645f39d397SChris Mason 
27655f39d397SChris Mason 	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
27665f39d397SChris Mason 			      btrfs_item_nr_offset(0),
27670783fcfcSChris Mason 			      right_nritems * sizeof(struct btrfs_item));
27685f39d397SChris Mason 
276900ec4c51SChris Mason 	/* copy the items from left to right */
27705f39d397SChris Mason 	copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
27715f39d397SChris Mason 		   btrfs_item_nr_offset(left_nritems - push_items),
27720783fcfcSChris Mason 		   push_items * sizeof(struct btrfs_item));
277300ec4c51SChris Mason 
277400ec4c51SChris Mason 	/* update the item pointers */
2775c82f823cSDavid Sterba 	btrfs_init_map_token(&token, right);
27767518a238SChris Mason 	right_nritems += push_items;
27775f39d397SChris Mason 	btrfs_set_header_nritems(right, right_nritems);
27780b246afaSJeff Mahoney 	push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
27797518a238SChris Mason 	for (i = 0; i < right_nritems; i++) {
2780dd3cc16bSRoss Kirk 		item = btrfs_item_nr(i);
2781cc4c13d5SDavid Sterba 		push_space -= btrfs_token_item_size(&token, item);
2782cc4c13d5SDavid Sterba 		btrfs_set_token_item_offset(&token, item, push_space);
2783db94535dSChris Mason 	}
2784db94535dSChris Mason 
27857518a238SChris Mason 	left_nritems -= push_items;
27865f39d397SChris Mason 	btrfs_set_header_nritems(left, left_nritems);
278700ec4c51SChris Mason 
278834a38218SChris Mason 	if (left_nritems)
27895f39d397SChris Mason 		btrfs_mark_buffer_dirty(left);
2790f0486c68SYan, Zheng 	else
27916a884d7dSDavid Sterba 		btrfs_clean_tree_block(left);
2792f0486c68SYan, Zheng 
27935f39d397SChris Mason 	btrfs_mark_buffer_dirty(right);
2794a429e513SChris Mason 
27955f39d397SChris Mason 	btrfs_item_key(right, &disk_key, 0);
27965f39d397SChris Mason 	btrfs_set_node_key(upper, &disk_key, slot + 1);
2797d6025579SChris Mason 	btrfs_mark_buffer_dirty(upper);
279802217ed2SChris Mason 
279900ec4c51SChris Mason 	/* then fixup the leaf pointer in the path */
28007518a238SChris Mason 	if (path->slots[0] >= left_nritems) {
28017518a238SChris Mason 		path->slots[0] -= left_nritems;
2802925baeddSChris Mason 		if (btrfs_header_nritems(path->nodes[0]) == 0)
28036a884d7dSDavid Sterba 			btrfs_clean_tree_block(path->nodes[0]);
2804925baeddSChris Mason 		btrfs_tree_unlock(path->nodes[0]);
28055f39d397SChris Mason 		free_extent_buffer(path->nodes[0]);
28065f39d397SChris Mason 		path->nodes[0] = right;
280700ec4c51SChris Mason 		path->slots[1] += 1;
280800ec4c51SChris Mason 	} else {
2809925baeddSChris Mason 		btrfs_tree_unlock(right);
28105f39d397SChris Mason 		free_extent_buffer(right);
281100ec4c51SChris Mason 	}
281200ec4c51SChris Mason 	return 0;
2813925baeddSChris Mason 
2814925baeddSChris Mason out_unlock:
2815925baeddSChris Mason 	btrfs_tree_unlock(right);
2816925baeddSChris Mason 	free_extent_buffer(right);
2817925baeddSChris Mason 	return 1;
281800ec4c51SChris Mason }
2819925baeddSChris Mason 
282000ec4c51SChris Mason /*
282144871b1bSChris Mason  * push some data in the path leaf to the right, trying to free up at
282274123bd7SChris Mason  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
282344871b1bSChris Mason  *
282444871b1bSChris Mason  * returns 1 if the push failed because the other node didn't have enough
282544871b1bSChris Mason  * room, 0 if everything worked out and < 0 if there were major errors.
282699d8f83cSChris Mason  *
282799d8f83cSChris Mason  * this will push starting from min_slot to the end of the leaf.  It won't
282899d8f83cSChris Mason  * push any slot lower than min_slot
282974123bd7SChris Mason  */
283044871b1bSChris Mason static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
283199d8f83cSChris Mason 			   *root, struct btrfs_path *path,
283299d8f83cSChris Mason 			   int min_data_size, int data_size,
283399d8f83cSChris Mason 			   int empty, u32 min_slot)
2834be0e5c09SChris Mason {
283544871b1bSChris Mason 	struct extent_buffer *left = path->nodes[0];
283644871b1bSChris Mason 	struct extent_buffer *right;
283744871b1bSChris Mason 	struct extent_buffer *upper;
283844871b1bSChris Mason 	int slot;
283944871b1bSChris Mason 	int free_space;
284044871b1bSChris Mason 	u32 left_nritems;
284144871b1bSChris Mason 	int ret;
284244871b1bSChris Mason 
284344871b1bSChris Mason 	if (!path->nodes[1])
284444871b1bSChris Mason 		return 1;
284544871b1bSChris Mason 
284644871b1bSChris Mason 	slot = path->slots[1];
284744871b1bSChris Mason 	upper = path->nodes[1];
284844871b1bSChris Mason 	if (slot >= btrfs_header_nritems(upper) - 1)
284944871b1bSChris Mason 		return 1;
285044871b1bSChris Mason 
285144871b1bSChris Mason 	btrfs_assert_tree_locked(path->nodes[1]);
285244871b1bSChris Mason 
28534b231ae4SDavid Sterba 	right = btrfs_read_node_slot(upper, slot + 1);
2854fb770ae4SLiu Bo 	/*
2855fb770ae4SLiu Bo 	 * slot + 1 is not valid or we fail to read the right node,
2856fb770ae4SLiu Bo 	 * no big deal, just return.
2857fb770ae4SLiu Bo 	 */
2858fb770ae4SLiu Bo 	if (IS_ERR(right))
285991ca338dSTsutomu Itoh 		return 1;
286091ca338dSTsutomu Itoh 
2861bf77467aSJosef Bacik 	__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
286244871b1bSChris Mason 
2863e902baacSDavid Sterba 	free_space = btrfs_leaf_free_space(right);
286444871b1bSChris Mason 	if (free_space < data_size)
286544871b1bSChris Mason 		goto out_unlock;
286644871b1bSChris Mason 
286744871b1bSChris Mason 	/* cow and double check */
286844871b1bSChris Mason 	ret = btrfs_cow_block(trans, root, right, upper,
2869bf59a5a2SJosef Bacik 			      slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
287044871b1bSChris Mason 	if (ret)
287144871b1bSChris Mason 		goto out_unlock;
287244871b1bSChris Mason 
2873e902baacSDavid Sterba 	free_space = btrfs_leaf_free_space(right);
287444871b1bSChris Mason 	if (free_space < data_size)
287544871b1bSChris Mason 		goto out_unlock;
287644871b1bSChris Mason 
287744871b1bSChris Mason 	left_nritems = btrfs_header_nritems(left);
287844871b1bSChris Mason 	if (left_nritems == 0)
287944871b1bSChris Mason 		goto out_unlock;
288044871b1bSChris Mason 
2881d16c702fSQu Wenruo 	if (check_sibling_keys(left, right)) {
2882d16c702fSQu Wenruo 		ret = -EUCLEAN;
2883d16c702fSQu Wenruo 		btrfs_tree_unlock(right);
2884d16c702fSQu Wenruo 		free_extent_buffer(right);
2885d16c702fSQu Wenruo 		return ret;
2886d16c702fSQu Wenruo 	}
28872ef1fed2SFilipe David Borba Manana 	if (path->slots[0] == left_nritems && !empty) {
28882ef1fed2SFilipe David Borba Manana 		/* Key greater than all keys in the leaf, right neighbor has
28892ef1fed2SFilipe David Borba Manana 		 * enough room for it and we're not emptying our leaf to delete
28902ef1fed2SFilipe David Borba Manana 		 * it, therefore use right neighbor to insert the new item and
289152042d8eSAndrea Gelmini 		 * no need to touch/dirty our left leaf. */
28922ef1fed2SFilipe David Borba Manana 		btrfs_tree_unlock(left);
28932ef1fed2SFilipe David Borba Manana 		free_extent_buffer(left);
28942ef1fed2SFilipe David Borba Manana 		path->nodes[0] = right;
28952ef1fed2SFilipe David Borba Manana 		path->slots[0] = 0;
28962ef1fed2SFilipe David Borba Manana 		path->slots[1]++;
28972ef1fed2SFilipe David Borba Manana 		return 0;
28982ef1fed2SFilipe David Borba Manana 	}
28992ef1fed2SFilipe David Borba Manana 
2900f72f0010SDavid Sterba 	return __push_leaf_right(path, min_data_size, empty,
290199d8f83cSChris Mason 				right, free_space, left_nritems, min_slot);
290244871b1bSChris Mason out_unlock:
290344871b1bSChris Mason 	btrfs_tree_unlock(right);
290444871b1bSChris Mason 	free_extent_buffer(right);
290544871b1bSChris Mason 	return 1;
290644871b1bSChris Mason }
290744871b1bSChris Mason 
290844871b1bSChris Mason /*
290944871b1bSChris Mason  * push some data in the path leaf to the left, trying to free up at
291044871b1bSChris Mason  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
291199d8f83cSChris Mason  *
291299d8f83cSChris Mason  * max_slot can put a limit on how far into the leaf we'll push items.  The
291399d8f83cSChris Mason  * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
291499d8f83cSChris Mason  * items
291544871b1bSChris Mason  */
29168087c193SDavid Sterba static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
291744871b1bSChris Mason 				     int empty, struct extent_buffer *left,
291899d8f83cSChris Mason 				     int free_space, u32 right_nritems,
291999d8f83cSChris Mason 				     u32 max_slot)
292044871b1bSChris Mason {
29218087c193SDavid Sterba 	struct btrfs_fs_info *fs_info = left->fs_info;
29225f39d397SChris Mason 	struct btrfs_disk_key disk_key;
29235f39d397SChris Mason 	struct extent_buffer *right = path->nodes[0];
2924be0e5c09SChris Mason 	int i;
2925be0e5c09SChris Mason 	int push_space = 0;
2926be0e5c09SChris Mason 	int push_items = 0;
29270783fcfcSChris Mason 	struct btrfs_item *item;
29287518a238SChris Mason 	u32 old_left_nritems;
292934a38218SChris Mason 	u32 nr;
2930aa5d6bedSChris Mason 	int ret = 0;
2931db94535dSChris Mason 	u32 this_item_size;
2932db94535dSChris Mason 	u32 old_left_item_size;
2933cfed81a0SChris Mason 	struct btrfs_map_token token;
2934cfed81a0SChris Mason 
293534a38218SChris Mason 	if (empty)
293699d8f83cSChris Mason 		nr = min(right_nritems, max_slot);
293734a38218SChris Mason 	else
293899d8f83cSChris Mason 		nr = min(right_nritems - 1, max_slot);
293934a38218SChris Mason 
294034a38218SChris Mason 	for (i = 0; i < nr; i++) {
2941dd3cc16bSRoss Kirk 		item = btrfs_item_nr(i);
2942db94535dSChris Mason 
294331840ae1SZheng Yan 		if (!empty && push_items > 0) {
294431840ae1SZheng Yan 			if (path->slots[0] < i)
294531840ae1SZheng Yan 				break;
294631840ae1SZheng Yan 			if (path->slots[0] == i) {
2947e902baacSDavid Sterba 				int space = btrfs_leaf_free_space(right);
2948e902baacSDavid Sterba 
294931840ae1SZheng Yan 				if (space + push_space * 2 > free_space)
295031840ae1SZheng Yan 					break;
295131840ae1SZheng Yan 			}
295231840ae1SZheng Yan 		}
295331840ae1SZheng Yan 
2954be0e5c09SChris Mason 		if (path->slots[0] == i)
295587b29b20SYan Zheng 			push_space += data_size;
2956db94535dSChris Mason 
2957db94535dSChris Mason 		this_item_size = btrfs_item_size(right, item);
2958db94535dSChris Mason 		if (this_item_size + sizeof(*item) + push_space > free_space)
2959be0e5c09SChris Mason 			break;
2960db94535dSChris Mason 
2961be0e5c09SChris Mason 		push_items++;
2962db94535dSChris Mason 		push_space += this_item_size + sizeof(*item);
2963be0e5c09SChris Mason 	}
2964db94535dSChris Mason 
2965be0e5c09SChris Mason 	if (push_items == 0) {
2966925baeddSChris Mason 		ret = 1;
2967925baeddSChris Mason 		goto out;
2968be0e5c09SChris Mason 	}
2969fae7f21cSDulshani Gunawardhana 	WARN_ON(!empty && push_items == btrfs_header_nritems(right));
29705f39d397SChris Mason 
2971be0e5c09SChris Mason 	/* push data from right to left */
29725f39d397SChris Mason 	copy_extent_buffer(left, right,
29735f39d397SChris Mason 			   btrfs_item_nr_offset(btrfs_header_nritems(left)),
29745f39d397SChris Mason 			   btrfs_item_nr_offset(0),
29755f39d397SChris Mason 			   push_items * sizeof(struct btrfs_item));
29765f39d397SChris Mason 
29770b246afaSJeff Mahoney 	push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
29785f39d397SChris Mason 		     btrfs_item_offset_nr(right, push_items - 1);
29795f39d397SChris Mason 
29803d9ec8c4SNikolay Borisov 	copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
29818f881e8cSDavid Sterba 		     leaf_data_end(left) - push_space,
29823d9ec8c4SNikolay Borisov 		     BTRFS_LEAF_DATA_OFFSET +
29835f39d397SChris Mason 		     btrfs_item_offset_nr(right, push_items - 1),
2984be0e5c09SChris Mason 		     push_space);
29855f39d397SChris Mason 	old_left_nritems = btrfs_header_nritems(left);
298687b29b20SYan Zheng 	BUG_ON(old_left_nritems <= 0);
2987eb60ceacSChris Mason 
2988c82f823cSDavid Sterba 	btrfs_init_map_token(&token, left);
2989db94535dSChris Mason 	old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2990be0e5c09SChris Mason 	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
29915f39d397SChris Mason 		u32 ioff;
2992db94535dSChris Mason 
2993dd3cc16bSRoss Kirk 		item = btrfs_item_nr(i);
2994db94535dSChris Mason 
2995cc4c13d5SDavid Sterba 		ioff = btrfs_token_item_offset(&token, item);
2996cc4c13d5SDavid Sterba 		btrfs_set_token_item_offset(&token, item,
2997cc4c13d5SDavid Sterba 		      ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
2998be0e5c09SChris Mason 	}
29995f39d397SChris Mason 	btrfs_set_header_nritems(left, old_left_nritems + push_items);
3000be0e5c09SChris Mason 
3001be0e5c09SChris Mason 	/* fixup right node */
300231b1a2bdSJulia Lawall 	if (push_items > right_nritems)
300331b1a2bdSJulia Lawall 		WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
3004d397712bSChris Mason 		       right_nritems);
300534a38218SChris Mason 
300634a38218SChris Mason 	if (push_items < right_nritems) {
30075f39d397SChris Mason 		push_space = btrfs_item_offset_nr(right, push_items - 1) -
30088f881e8cSDavid Sterba 						  leaf_data_end(right);
30093d9ec8c4SNikolay Borisov 		memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
30100b246afaSJeff Mahoney 				      BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
30113d9ec8c4SNikolay Borisov 				      BTRFS_LEAF_DATA_OFFSET +
30128f881e8cSDavid Sterba 				      leaf_data_end(right), push_space);
30135f39d397SChris Mason 
30145f39d397SChris Mason 		memmove_extent_buffer(right, btrfs_item_nr_offset(0),
30155f39d397SChris Mason 			      btrfs_item_nr_offset(push_items),
30165f39d397SChris Mason 			     (btrfs_header_nritems(right) - push_items) *
30170783fcfcSChris Mason 			     sizeof(struct btrfs_item));
301834a38218SChris Mason 	}
3019c82f823cSDavid Sterba 
3020c82f823cSDavid Sterba 	btrfs_init_map_token(&token, right);
3021eef1c494SYan 	right_nritems -= push_items;
3022eef1c494SYan 	btrfs_set_header_nritems(right, right_nritems);
30230b246afaSJeff Mahoney 	push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
30245f39d397SChris Mason 	for (i = 0; i < right_nritems; i++) {
3025dd3cc16bSRoss Kirk 		item = btrfs_item_nr(i);
3026db94535dSChris Mason 
3027cc4c13d5SDavid Sterba 		push_space = push_space - btrfs_token_item_size(&token, item);
3028cc4c13d5SDavid Sterba 		btrfs_set_token_item_offset(&token, item, push_space);
3029db94535dSChris Mason 	}
3030eb60ceacSChris Mason 
30315f39d397SChris Mason 	btrfs_mark_buffer_dirty(left);
303234a38218SChris Mason 	if (right_nritems)
30335f39d397SChris Mason 		btrfs_mark_buffer_dirty(right);
3034f0486c68SYan, Zheng 	else
30356a884d7dSDavid Sterba 		btrfs_clean_tree_block(right);
3036098f59c2SChris Mason 
30375f39d397SChris Mason 	btrfs_item_key(right, &disk_key, 0);
3038b167fa91SNikolay Borisov 	fixup_low_keys(path, &disk_key, 1);
3039be0e5c09SChris Mason 
3040be0e5c09SChris Mason 	/* then fixup the leaf pointer in the path */
3041be0e5c09SChris Mason 	if (path->slots[0] < push_items) {
3042be0e5c09SChris Mason 		path->slots[0] += old_left_nritems;
3043925baeddSChris Mason 		btrfs_tree_unlock(path->nodes[0]);
30445f39d397SChris Mason 		free_extent_buffer(path->nodes[0]);
30455f39d397SChris Mason 		path->nodes[0] = left;
3046be0e5c09SChris Mason 		path->slots[1] -= 1;
3047be0e5c09SChris Mason 	} else {
3048925baeddSChris Mason 		btrfs_tree_unlock(left);
30495f39d397SChris Mason 		free_extent_buffer(left);
3050be0e5c09SChris Mason 		path->slots[0] -= push_items;
3051be0e5c09SChris Mason 	}
3052eb60ceacSChris Mason 	BUG_ON(path->slots[0] < 0);
3053aa5d6bedSChris Mason 	return ret;
3054925baeddSChris Mason out:
3055925baeddSChris Mason 	btrfs_tree_unlock(left);
3056925baeddSChris Mason 	free_extent_buffer(left);
3057925baeddSChris Mason 	return ret;
3058be0e5c09SChris Mason }
3059be0e5c09SChris Mason 
306074123bd7SChris Mason /*
306144871b1bSChris Mason  * push some data in the path leaf to the left, trying to free up at
306244871b1bSChris Mason  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
306399d8f83cSChris Mason  *
306499d8f83cSChris Mason  * max_slot can put a limit on how far into the leaf we'll push items.  The
306599d8f83cSChris Mason  * item at 'max_slot' won't be touched.  Use (u32)-1 to make us push all the
306699d8f83cSChris Mason  * items
306744871b1bSChris Mason  */
306844871b1bSChris Mason static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
306999d8f83cSChris Mason 			  *root, struct btrfs_path *path, int min_data_size,
307099d8f83cSChris Mason 			  int data_size, int empty, u32 max_slot)
307144871b1bSChris Mason {
307244871b1bSChris Mason 	struct extent_buffer *right = path->nodes[0];
307344871b1bSChris Mason 	struct extent_buffer *left;
307444871b1bSChris Mason 	int slot;
307544871b1bSChris Mason 	int free_space;
307644871b1bSChris Mason 	u32 right_nritems;
307744871b1bSChris Mason 	int ret = 0;
307844871b1bSChris Mason 
307944871b1bSChris Mason 	slot = path->slots[1];
308044871b1bSChris Mason 	if (slot == 0)
308144871b1bSChris Mason 		return 1;
308244871b1bSChris Mason 	if (!path->nodes[1])
308344871b1bSChris Mason 		return 1;
308444871b1bSChris Mason 
308544871b1bSChris Mason 	right_nritems = btrfs_header_nritems(right);
308644871b1bSChris Mason 	if (right_nritems == 0)
308744871b1bSChris Mason 		return 1;
308844871b1bSChris Mason 
308944871b1bSChris Mason 	btrfs_assert_tree_locked(path->nodes[1]);
309044871b1bSChris Mason 
30914b231ae4SDavid Sterba 	left = btrfs_read_node_slot(path->nodes[1], slot - 1);
3092fb770ae4SLiu Bo 	/*
3093fb770ae4SLiu Bo 	 * slot - 1 is not valid or we fail to read the left node,
3094fb770ae4SLiu Bo 	 * no big deal, just return.
3095fb770ae4SLiu Bo 	 */
3096fb770ae4SLiu Bo 	if (IS_ERR(left))
309791ca338dSTsutomu Itoh 		return 1;
309891ca338dSTsutomu Itoh 
3099bf77467aSJosef Bacik 	__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
310044871b1bSChris Mason 
3101e902baacSDavid Sterba 	free_space = btrfs_leaf_free_space(left);
310244871b1bSChris Mason 	if (free_space < data_size) {
310344871b1bSChris Mason 		ret = 1;
310444871b1bSChris Mason 		goto out;
310544871b1bSChris Mason 	}
310644871b1bSChris Mason 
310744871b1bSChris Mason 	/* cow and double check */
310844871b1bSChris Mason 	ret = btrfs_cow_block(trans, root, left,
31099631e4ccSJosef Bacik 			      path->nodes[1], slot - 1, &left,
3110bf59a5a2SJosef Bacik 			      BTRFS_NESTING_LEFT_COW);
311144871b1bSChris Mason 	if (ret) {
311244871b1bSChris Mason 		/* we hit -ENOSPC, but it isn't fatal here */
311379787eaaSJeff Mahoney 		if (ret == -ENOSPC)
311444871b1bSChris Mason 			ret = 1;
311544871b1bSChris Mason 		goto out;
311644871b1bSChris Mason 	}
311744871b1bSChris Mason 
3118e902baacSDavid Sterba 	free_space = btrfs_leaf_free_space(left);
311944871b1bSChris Mason 	if (free_space < data_size) {
312044871b1bSChris Mason 		ret = 1;
312144871b1bSChris Mason 		goto out;
312244871b1bSChris Mason 	}
312344871b1bSChris Mason 
3124d16c702fSQu Wenruo 	if (check_sibling_keys(left, right)) {
3125d16c702fSQu Wenruo 		ret = -EUCLEAN;
3126d16c702fSQu Wenruo 		goto out;
3127d16c702fSQu Wenruo 	}
31288087c193SDavid Sterba 	return __push_leaf_left(path, min_data_size,
312999d8f83cSChris Mason 			       empty, left, free_space, right_nritems,
313099d8f83cSChris Mason 			       max_slot);
313144871b1bSChris Mason out:
313244871b1bSChris Mason 	btrfs_tree_unlock(left);
313344871b1bSChris Mason 	free_extent_buffer(left);
313444871b1bSChris Mason 	return ret;
313544871b1bSChris Mason }
313644871b1bSChris Mason 
313744871b1bSChris Mason /*
313874123bd7SChris Mason  * split the path's leaf in two, making sure there is at least data_size
313974123bd7SChris Mason  * available for the resulting leaf level of the path.
314074123bd7SChris Mason  */
3141143bede5SJeff Mahoney static noinline void copy_for_split(struct btrfs_trans_handle *trans,
314244871b1bSChris Mason 				    struct btrfs_path *path,
314344871b1bSChris Mason 				    struct extent_buffer *l,
314444871b1bSChris Mason 				    struct extent_buffer *right,
314544871b1bSChris Mason 				    int slot, int mid, int nritems)
3146be0e5c09SChris Mason {
314794f94ad9SDavid Sterba 	struct btrfs_fs_info *fs_info = trans->fs_info;
3148be0e5c09SChris Mason 	int data_copy_size;
3149be0e5c09SChris Mason 	int rt_data_off;
3150be0e5c09SChris Mason 	int i;
3151d4dbff95SChris Mason 	struct btrfs_disk_key disk_key;
3152cfed81a0SChris Mason 	struct btrfs_map_token token;
3153cfed81a0SChris Mason 
31545f39d397SChris Mason 	nritems = nritems - mid;
31555f39d397SChris Mason 	btrfs_set_header_nritems(right, nritems);
31568f881e8cSDavid Sterba 	data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
31575f39d397SChris Mason 
31585f39d397SChris Mason 	copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
31595f39d397SChris Mason 			   btrfs_item_nr_offset(mid),
31605f39d397SChris Mason 			   nritems * sizeof(struct btrfs_item));
31615f39d397SChris Mason 
31625f39d397SChris Mason 	copy_extent_buffer(right, l,
31633d9ec8c4SNikolay Borisov 		     BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
31643d9ec8c4SNikolay Borisov 		     data_copy_size, BTRFS_LEAF_DATA_OFFSET +
31658f881e8cSDavid Sterba 		     leaf_data_end(l), data_copy_size);
316674123bd7SChris Mason 
31670b246afaSJeff Mahoney 	rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
31685f39d397SChris Mason 
3169c82f823cSDavid Sterba 	btrfs_init_map_token(&token, right);
31705f39d397SChris Mason 	for (i = 0; i < nritems; i++) {
3171dd3cc16bSRoss Kirk 		struct btrfs_item *item = btrfs_item_nr(i);
3172db94535dSChris Mason 		u32 ioff;
3173db94535dSChris Mason 
3174cc4c13d5SDavid Sterba 		ioff = btrfs_token_item_offset(&token, item);
3175cc4c13d5SDavid Sterba 		btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
31760783fcfcSChris Mason 	}
317774123bd7SChris Mason 
31785f39d397SChris Mason 	btrfs_set_header_nritems(l, mid);
31795f39d397SChris Mason 	btrfs_item_key(right, &disk_key, 0);
31806ad3cf6dSDavid Sterba 	insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
31815f39d397SChris Mason 
31825f39d397SChris Mason 	btrfs_mark_buffer_dirty(right);
31835f39d397SChris Mason 	btrfs_mark_buffer_dirty(l);
3184eb60ceacSChris Mason 	BUG_ON(path->slots[0] != slot);
31855f39d397SChris Mason 
3186be0e5c09SChris Mason 	if (mid <= slot) {
3187925baeddSChris Mason 		btrfs_tree_unlock(path->nodes[0]);
31885f39d397SChris Mason 		free_extent_buffer(path->nodes[0]);
31895f39d397SChris Mason 		path->nodes[0] = right;
3190be0e5c09SChris Mason 		path->slots[0] -= mid;
3191be0e5c09SChris Mason 		path->slots[1] += 1;
3192925baeddSChris Mason 	} else {
3193925baeddSChris Mason 		btrfs_tree_unlock(right);
31945f39d397SChris Mason 		free_extent_buffer(right);
3195925baeddSChris Mason 	}
31965f39d397SChris Mason 
3197eb60ceacSChris Mason 	BUG_ON(path->slots[0] < 0);
319844871b1bSChris Mason }
319944871b1bSChris Mason 
320044871b1bSChris Mason /*
320199d8f83cSChris Mason  * double splits happen when we need to insert a big item in the middle
320299d8f83cSChris Mason  * of a leaf.  A double split can leave us with 3 mostly empty leaves:
320399d8f83cSChris Mason  * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
320499d8f83cSChris Mason  *          A                 B                 C
320599d8f83cSChris Mason  *
320699d8f83cSChris Mason  * We avoid this by trying to push the items on either side of our target
320799d8f83cSChris Mason  * into the adjacent leaves.  If all goes well we can avoid the double split
320899d8f83cSChris Mason  * completely.
320999d8f83cSChris Mason  */
321099d8f83cSChris Mason static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
321199d8f83cSChris Mason 					  struct btrfs_root *root,
321299d8f83cSChris Mason 					  struct btrfs_path *path,
321399d8f83cSChris Mason 					  int data_size)
321499d8f83cSChris Mason {
321599d8f83cSChris Mason 	int ret;
321699d8f83cSChris Mason 	int progress = 0;
321799d8f83cSChris Mason 	int slot;
321899d8f83cSChris Mason 	u32 nritems;
32195a4267caSFilipe David Borba Manana 	int space_needed = data_size;
322099d8f83cSChris Mason 
322199d8f83cSChris Mason 	slot = path->slots[0];
32225a4267caSFilipe David Borba Manana 	if (slot < btrfs_header_nritems(path->nodes[0]))
3223e902baacSDavid Sterba 		space_needed -= btrfs_leaf_free_space(path->nodes[0]);
322499d8f83cSChris Mason 
322599d8f83cSChris Mason 	/*
322699d8f83cSChris Mason 	 * try to push all the items after our slot into the
322799d8f83cSChris Mason 	 * right leaf
322899d8f83cSChris Mason 	 */
32295a4267caSFilipe David Borba Manana 	ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
323099d8f83cSChris Mason 	if (ret < 0)
323199d8f83cSChris Mason 		return ret;
323299d8f83cSChris Mason 
323399d8f83cSChris Mason 	if (ret == 0)
323499d8f83cSChris Mason 		progress++;
323599d8f83cSChris Mason 
323699d8f83cSChris Mason 	nritems = btrfs_header_nritems(path->nodes[0]);
323799d8f83cSChris Mason 	/*
323899d8f83cSChris Mason 	 * our goal is to get our slot at the start or end of a leaf.  If
323999d8f83cSChris Mason 	 * we've done so we're done
324099d8f83cSChris Mason 	 */
324199d8f83cSChris Mason 	if (path->slots[0] == 0 || path->slots[0] == nritems)
324299d8f83cSChris Mason 		return 0;
324399d8f83cSChris Mason 
3244e902baacSDavid Sterba 	if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
324599d8f83cSChris Mason 		return 0;
324699d8f83cSChris Mason 
324799d8f83cSChris Mason 	/* try to push all the items before our slot into the next leaf */
324899d8f83cSChris Mason 	slot = path->slots[0];
3249263d3995SFilipe Manana 	space_needed = data_size;
3250263d3995SFilipe Manana 	if (slot > 0)
3251e902baacSDavid Sterba 		space_needed -= btrfs_leaf_free_space(path->nodes[0]);
32525a4267caSFilipe David Borba Manana 	ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
325399d8f83cSChris Mason 	if (ret < 0)
325499d8f83cSChris Mason 		return ret;
325599d8f83cSChris Mason 
325699d8f83cSChris Mason 	if (ret == 0)
325799d8f83cSChris Mason 		progress++;
325899d8f83cSChris Mason 
325999d8f83cSChris Mason 	if (progress)
326099d8f83cSChris Mason 		return 0;
326199d8f83cSChris Mason 	return 1;
326299d8f83cSChris Mason }
326399d8f83cSChris Mason 
326499d8f83cSChris Mason /*
326544871b1bSChris Mason  * split the path's leaf in two, making sure there is at least data_size
326644871b1bSChris Mason  * available for the resulting leaf level of the path.
326744871b1bSChris Mason  *
326844871b1bSChris Mason  * returns 0 if all went well and < 0 on failure.
326944871b1bSChris Mason  */
327044871b1bSChris Mason static noinline int split_leaf(struct btrfs_trans_handle *trans,
327144871b1bSChris Mason 			       struct btrfs_root *root,
3272310712b2SOmar Sandoval 			       const struct btrfs_key *ins_key,
327344871b1bSChris Mason 			       struct btrfs_path *path, int data_size,
327444871b1bSChris Mason 			       int extend)
327544871b1bSChris Mason {
32765d4f98a2SYan Zheng 	struct btrfs_disk_key disk_key;
327744871b1bSChris Mason 	struct extent_buffer *l;
327844871b1bSChris Mason 	u32 nritems;
327944871b1bSChris Mason 	int mid;
328044871b1bSChris Mason 	int slot;
328144871b1bSChris Mason 	struct extent_buffer *right;
3282b7a0365eSDaniel Dressler 	struct btrfs_fs_info *fs_info = root->fs_info;
328344871b1bSChris Mason 	int ret = 0;
328444871b1bSChris Mason 	int wret;
32855d4f98a2SYan Zheng 	int split;
328644871b1bSChris Mason 	int num_doubles = 0;
328799d8f83cSChris Mason 	int tried_avoid_double = 0;
328844871b1bSChris Mason 
3289a5719521SYan, Zheng 	l = path->nodes[0];
3290a5719521SYan, Zheng 	slot = path->slots[0];
3291a5719521SYan, Zheng 	if (extend && data_size + btrfs_item_size_nr(l, slot) +
32920b246afaSJeff Mahoney 	    sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
3293a5719521SYan, Zheng 		return -EOVERFLOW;
3294a5719521SYan, Zheng 
329544871b1bSChris Mason 	/* first try to make some room by pushing left and right */
329633157e05SLiu Bo 	if (data_size && path->nodes[1]) {
32975a4267caSFilipe David Borba Manana 		int space_needed = data_size;
32985a4267caSFilipe David Borba Manana 
32995a4267caSFilipe David Borba Manana 		if (slot < btrfs_header_nritems(l))
3300e902baacSDavid Sterba 			space_needed -= btrfs_leaf_free_space(l);
33015a4267caSFilipe David Borba Manana 
33025a4267caSFilipe David Borba Manana 		wret = push_leaf_right(trans, root, path, space_needed,
33035a4267caSFilipe David Borba Manana 				       space_needed, 0, 0);
330444871b1bSChris Mason 		if (wret < 0)
330544871b1bSChris Mason 			return wret;
330644871b1bSChris Mason 		if (wret) {
3307263d3995SFilipe Manana 			space_needed = data_size;
3308263d3995SFilipe Manana 			if (slot > 0)
3309e902baacSDavid Sterba 				space_needed -= btrfs_leaf_free_space(l);
33105a4267caSFilipe David Borba Manana 			wret = push_leaf_left(trans, root, path, space_needed,
33115a4267caSFilipe David Borba Manana 					      space_needed, 0, (u32)-1);
331244871b1bSChris Mason 			if (wret < 0)
331344871b1bSChris Mason 				return wret;
331444871b1bSChris Mason 		}
331544871b1bSChris Mason 		l = path->nodes[0];
331644871b1bSChris Mason 
331744871b1bSChris Mason 		/* did the pushes work? */
3318e902baacSDavid Sterba 		if (btrfs_leaf_free_space(l) >= data_size)
331944871b1bSChris Mason 			return 0;
332044871b1bSChris Mason 	}
332144871b1bSChris Mason 
332244871b1bSChris Mason 	if (!path->nodes[1]) {
3323fdd99c72SLiu Bo 		ret = insert_new_root(trans, root, path, 1);
332444871b1bSChris Mason 		if (ret)
332544871b1bSChris Mason 			return ret;
332644871b1bSChris Mason 	}
332744871b1bSChris Mason again:
33285d4f98a2SYan Zheng 	split = 1;
332944871b1bSChris Mason 	l = path->nodes[0];
333044871b1bSChris Mason 	slot = path->slots[0];
333144871b1bSChris Mason 	nritems = btrfs_header_nritems(l);
333244871b1bSChris Mason 	mid = (nritems + 1) / 2;
333344871b1bSChris Mason 
33345d4f98a2SYan Zheng 	if (mid <= slot) {
33355d4f98a2SYan Zheng 		if (nritems == 1 ||
33365d4f98a2SYan Zheng 		    leaf_space_used(l, mid, nritems - mid) + data_size >
33370b246afaSJeff Mahoney 			BTRFS_LEAF_DATA_SIZE(fs_info)) {
33385d4f98a2SYan Zheng 			if (slot >= nritems) {
33395d4f98a2SYan Zheng 				split = 0;
33405d4f98a2SYan Zheng 			} else {
33415d4f98a2SYan Zheng 				mid = slot;
33425d4f98a2SYan Zheng 				if (mid != nritems &&
33435d4f98a2SYan Zheng 				    leaf_space_used(l, mid, nritems - mid) +
33440b246afaSJeff Mahoney 				    data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
334599d8f83cSChris Mason 					if (data_size && !tried_avoid_double)
334699d8f83cSChris Mason 						goto push_for_double;
33475d4f98a2SYan Zheng 					split = 2;
33485d4f98a2SYan Zheng 				}
33495d4f98a2SYan Zheng 			}
33505d4f98a2SYan Zheng 		}
33515d4f98a2SYan Zheng 	} else {
33525d4f98a2SYan Zheng 		if (leaf_space_used(l, 0, mid) + data_size >
33530b246afaSJeff Mahoney 			BTRFS_LEAF_DATA_SIZE(fs_info)) {
33545d4f98a2SYan Zheng 			if (!extend && data_size && slot == 0) {
33555d4f98a2SYan Zheng 				split = 0;
33565d4f98a2SYan Zheng 			} else if ((extend || !data_size) && slot == 0) {
33575d4f98a2SYan Zheng 				mid = 1;
33585d4f98a2SYan Zheng 			} else {
33595d4f98a2SYan Zheng 				mid = slot;
33605d4f98a2SYan Zheng 				if (mid != nritems &&
33615d4f98a2SYan Zheng 				    leaf_space_used(l, mid, nritems - mid) +
33620b246afaSJeff Mahoney 				    data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
336399d8f83cSChris Mason 					if (data_size && !tried_avoid_double)
336499d8f83cSChris Mason 						goto push_for_double;
33655d4f98a2SYan Zheng 					split = 2;
33665d4f98a2SYan Zheng 				}
33675d4f98a2SYan Zheng 			}
33685d4f98a2SYan Zheng 		}
33695d4f98a2SYan Zheng 	}
33705d4f98a2SYan Zheng 
33715d4f98a2SYan Zheng 	if (split == 0)
33725d4f98a2SYan Zheng 		btrfs_cpu_key_to_disk(&disk_key, ins_key);
33735d4f98a2SYan Zheng 	else
33745d4f98a2SYan Zheng 		btrfs_item_key(l, &disk_key, mid);
33755d4f98a2SYan Zheng 
3376ca9d473aSJosef Bacik 	/*
3377ca9d473aSJosef Bacik 	 * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double
3378ca9d473aSJosef Bacik 	 * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES
3379ca9d473aSJosef Bacik 	 * subclasses, which is 8 at the time of this patch, and we've maxed it
3380ca9d473aSJosef Bacik 	 * out.  In the future we could add a
3381ca9d473aSJosef Bacik 	 * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just
3382ca9d473aSJosef Bacik 	 * use BTRFS_NESTING_NEW_ROOT.
3383ca9d473aSJosef Bacik 	 */
3384a6279470SFilipe Manana 	right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
3385ca9d473aSJosef Bacik 					     l->start, 0, num_doubles ?
3386ca9d473aSJosef Bacik 					     BTRFS_NESTING_NEW_ROOT :
3387ca9d473aSJosef Bacik 					     BTRFS_NESTING_SPLIT);
3388f0486c68SYan, Zheng 	if (IS_ERR(right))
338944871b1bSChris Mason 		return PTR_ERR(right);
3390f0486c68SYan, Zheng 
33910b246afaSJeff Mahoney 	root_add_used(root, fs_info->nodesize);
339244871b1bSChris Mason 
33935d4f98a2SYan Zheng 	if (split == 0) {
339444871b1bSChris Mason 		if (mid <= slot) {
339544871b1bSChris Mason 			btrfs_set_header_nritems(right, 0);
33966ad3cf6dSDavid Sterba 			insert_ptr(trans, path, &disk_key,
33972ff7e61eSJeff Mahoney 				   right->start, path->slots[1] + 1, 1);
339844871b1bSChris Mason 			btrfs_tree_unlock(path->nodes[0]);
339944871b1bSChris Mason 			free_extent_buffer(path->nodes[0]);
340044871b1bSChris Mason 			path->nodes[0] = right;
340144871b1bSChris Mason 			path->slots[0] = 0;
340244871b1bSChris Mason 			path->slots[1] += 1;
340344871b1bSChris Mason 		} else {
340444871b1bSChris Mason 			btrfs_set_header_nritems(right, 0);
34056ad3cf6dSDavid Sterba 			insert_ptr(trans, path, &disk_key,
34062ff7e61eSJeff Mahoney 				   right->start, path->slots[1], 1);
340744871b1bSChris Mason 			btrfs_tree_unlock(path->nodes[0]);
340844871b1bSChris Mason 			free_extent_buffer(path->nodes[0]);
340944871b1bSChris Mason 			path->nodes[0] = right;
341044871b1bSChris Mason 			path->slots[0] = 0;
3411143bede5SJeff Mahoney 			if (path->slots[1] == 0)
3412b167fa91SNikolay Borisov 				fixup_low_keys(path, &disk_key, 1);
34135d4f98a2SYan Zheng 		}
3414196e0249SLiu Bo 		/*
3415196e0249SLiu Bo 		 * We create a new leaf 'right' for the required ins_len and
3416196e0249SLiu Bo 		 * we'll do btrfs_mark_buffer_dirty() on this leaf after copying
3417196e0249SLiu Bo 		 * the content of ins_len to 'right'.
3418196e0249SLiu Bo 		 */
341944871b1bSChris Mason 		return ret;
342044871b1bSChris Mason 	}
342144871b1bSChris Mason 
342294f94ad9SDavid Sterba 	copy_for_split(trans, path, l, right, slot, mid, nritems);
342344871b1bSChris Mason 
34245d4f98a2SYan Zheng 	if (split == 2) {
3425cc0c5538SChris Mason 		BUG_ON(num_doubles != 0);
3426cc0c5538SChris Mason 		num_doubles++;
3427cc0c5538SChris Mason 		goto again;
34283326d1b0SChris Mason 	}
342944871b1bSChris Mason 
3430143bede5SJeff Mahoney 	return 0;
343199d8f83cSChris Mason 
343299d8f83cSChris Mason push_for_double:
343399d8f83cSChris Mason 	push_for_double_split(trans, root, path, data_size);
343499d8f83cSChris Mason 	tried_avoid_double = 1;
3435e902baacSDavid Sterba 	if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
343699d8f83cSChris Mason 		return 0;
343799d8f83cSChris Mason 	goto again;
3438be0e5c09SChris Mason }
3439be0e5c09SChris Mason 
3440ad48fd75SYan, Zheng static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3441ad48fd75SYan, Zheng 					 struct btrfs_root *root,
3442ad48fd75SYan, Zheng 					 struct btrfs_path *path, int ins_len)
3443ad48fd75SYan, Zheng {
3444ad48fd75SYan, Zheng 	struct btrfs_key key;
3445ad48fd75SYan, Zheng 	struct extent_buffer *leaf;
3446ad48fd75SYan, Zheng 	struct btrfs_file_extent_item *fi;
3447ad48fd75SYan, Zheng 	u64 extent_len = 0;
3448ad48fd75SYan, Zheng 	u32 item_size;
3449ad48fd75SYan, Zheng 	int ret;
3450ad48fd75SYan, Zheng 
3451ad48fd75SYan, Zheng 	leaf = path->nodes[0];
3452ad48fd75SYan, Zheng 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3453ad48fd75SYan, Zheng 
3454ad48fd75SYan, Zheng 	BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3455ad48fd75SYan, Zheng 	       key.type != BTRFS_EXTENT_CSUM_KEY);
3456ad48fd75SYan, Zheng 
3457e902baacSDavid Sterba 	if (btrfs_leaf_free_space(leaf) >= ins_len)
3458ad48fd75SYan, Zheng 		return 0;
3459ad48fd75SYan, Zheng 
3460ad48fd75SYan, Zheng 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3461ad48fd75SYan, Zheng 	if (key.type == BTRFS_EXTENT_DATA_KEY) {
3462ad48fd75SYan, Zheng 		fi = btrfs_item_ptr(leaf, path->slots[0],
3463ad48fd75SYan, Zheng 				    struct btrfs_file_extent_item);
3464ad48fd75SYan, Zheng 		extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3465ad48fd75SYan, Zheng 	}
3466b3b4aa74SDavid Sterba 	btrfs_release_path(path);
3467ad48fd75SYan, Zheng 
3468ad48fd75SYan, Zheng 	path->keep_locks = 1;
3469ad48fd75SYan, Zheng 	path->search_for_split = 1;
3470ad48fd75SYan, Zheng 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
3471ad48fd75SYan, Zheng 	path->search_for_split = 0;
3472a8df6fe6SFilipe Manana 	if (ret > 0)
3473a8df6fe6SFilipe Manana 		ret = -EAGAIN;
3474ad48fd75SYan, Zheng 	if (ret < 0)
3475ad48fd75SYan, Zheng 		goto err;
3476ad48fd75SYan, Zheng 
3477ad48fd75SYan, Zheng 	ret = -EAGAIN;
3478ad48fd75SYan, Zheng 	leaf = path->nodes[0];
3479a8df6fe6SFilipe Manana 	/* if our item isn't there, return now */
3480a8df6fe6SFilipe Manana 	if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3481ad48fd75SYan, Zheng 		goto err;
3482ad48fd75SYan, Zheng 
3483109f6aefSChris Mason 	/* the leaf has  changed, it now has room.  return now */
3484e902baacSDavid Sterba 	if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
3485109f6aefSChris Mason 		goto err;
3486109f6aefSChris Mason 
3487ad48fd75SYan, Zheng 	if (key.type == BTRFS_EXTENT_DATA_KEY) {
3488ad48fd75SYan, Zheng 		fi = btrfs_item_ptr(leaf, path->slots[0],
3489ad48fd75SYan, Zheng 				    struct btrfs_file_extent_item);
3490ad48fd75SYan, Zheng 		if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3491ad48fd75SYan, Zheng 			goto err;
3492ad48fd75SYan, Zheng 	}
3493ad48fd75SYan, Zheng 
3494ad48fd75SYan, Zheng 	ret = split_leaf(trans, root, &key, path, ins_len, 1);
3495f0486c68SYan, Zheng 	if (ret)
3496f0486c68SYan, Zheng 		goto err;
3497ad48fd75SYan, Zheng 
3498ad48fd75SYan, Zheng 	path->keep_locks = 0;
3499ad48fd75SYan, Zheng 	btrfs_unlock_up_safe(path, 1);
3500ad48fd75SYan, Zheng 	return 0;
3501ad48fd75SYan, Zheng err:
3502ad48fd75SYan, Zheng 	path->keep_locks = 0;
3503ad48fd75SYan, Zheng 	return ret;
3504ad48fd75SYan, Zheng }
3505ad48fd75SYan, Zheng 
350625263cd7SDavid Sterba static noinline int split_item(struct btrfs_path *path,
3507310712b2SOmar Sandoval 			       const struct btrfs_key *new_key,
3508459931ecSChris Mason 			       unsigned long split_offset)
3509459931ecSChris Mason {
3510459931ecSChris Mason 	struct extent_buffer *leaf;
3511459931ecSChris Mason 	struct btrfs_item *item;
3512459931ecSChris Mason 	struct btrfs_item *new_item;
3513459931ecSChris Mason 	int slot;
3514ad48fd75SYan, Zheng 	char *buf;
3515459931ecSChris Mason 	u32 nritems;
3516ad48fd75SYan, Zheng 	u32 item_size;
3517459931ecSChris Mason 	u32 orig_offset;
3518459931ecSChris Mason 	struct btrfs_disk_key disk_key;
3519459931ecSChris Mason 
3520459931ecSChris Mason 	leaf = path->nodes[0];
3521e902baacSDavid Sterba 	BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
3522b9473439SChris Mason 
3523dd3cc16bSRoss Kirk 	item = btrfs_item_nr(path->slots[0]);
3524459931ecSChris Mason 	orig_offset = btrfs_item_offset(leaf, item);
3525459931ecSChris Mason 	item_size = btrfs_item_size(leaf, item);
3526459931ecSChris Mason 
3527459931ecSChris Mason 	buf = kmalloc(item_size, GFP_NOFS);
3528ad48fd75SYan, Zheng 	if (!buf)
3529ad48fd75SYan, Zheng 		return -ENOMEM;
3530ad48fd75SYan, Zheng 
3531459931ecSChris Mason 	read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3532459931ecSChris Mason 			    path->slots[0]), item_size);
3533ad48fd75SYan, Zheng 
3534459931ecSChris Mason 	slot = path->slots[0] + 1;
3535459931ecSChris Mason 	nritems = btrfs_header_nritems(leaf);
3536459931ecSChris Mason 	if (slot != nritems) {
3537459931ecSChris Mason 		/* shift the items */
3538459931ecSChris Mason 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3539459931ecSChris Mason 				btrfs_item_nr_offset(slot),
3540459931ecSChris Mason 				(nritems - slot) * sizeof(struct btrfs_item));
3541459931ecSChris Mason 	}
3542459931ecSChris Mason 
3543459931ecSChris Mason 	btrfs_cpu_key_to_disk(&disk_key, new_key);
3544459931ecSChris Mason 	btrfs_set_item_key(leaf, &disk_key, slot);
3545459931ecSChris Mason 
3546dd3cc16bSRoss Kirk 	new_item = btrfs_item_nr(slot);
3547459931ecSChris Mason 
3548459931ecSChris Mason 	btrfs_set_item_offset(leaf, new_item, orig_offset);
3549459931ecSChris Mason 	btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3550459931ecSChris Mason 
3551459931ecSChris Mason 	btrfs_set_item_offset(leaf, item,
3552459931ecSChris Mason 			      orig_offset + item_size - split_offset);
3553459931ecSChris Mason 	btrfs_set_item_size(leaf, item, split_offset);
3554459931ecSChris Mason 
3555459931ecSChris Mason 	btrfs_set_header_nritems(leaf, nritems + 1);
3556459931ecSChris Mason 
3557459931ecSChris Mason 	/* write the data for the start of the original item */
3558459931ecSChris Mason 	write_extent_buffer(leaf, buf,
3559459931ecSChris Mason 			    btrfs_item_ptr_offset(leaf, path->slots[0]),
3560459931ecSChris Mason 			    split_offset);
3561459931ecSChris Mason 
3562459931ecSChris Mason 	/* write the data for the new item */
3563459931ecSChris Mason 	write_extent_buffer(leaf, buf + split_offset,
3564459931ecSChris Mason 			    btrfs_item_ptr_offset(leaf, slot),
3565459931ecSChris Mason 			    item_size - split_offset);
3566459931ecSChris Mason 	btrfs_mark_buffer_dirty(leaf);
3567459931ecSChris Mason 
3568e902baacSDavid Sterba 	BUG_ON(btrfs_leaf_free_space(leaf) < 0);
3569459931ecSChris Mason 	kfree(buf);
3570ad48fd75SYan, Zheng 	return 0;
3571ad48fd75SYan, Zheng }
3572ad48fd75SYan, Zheng 
3573ad48fd75SYan, Zheng /*
3574ad48fd75SYan, Zheng  * This function splits a single item into two items,
3575ad48fd75SYan, Zheng  * giving 'new_key' to the new item and splitting the
3576ad48fd75SYan, Zheng  * old one at split_offset (from the start of the item).
3577ad48fd75SYan, Zheng  *
3578ad48fd75SYan, Zheng  * The path may be released by this operation.  After
3579ad48fd75SYan, Zheng  * the split, the path is pointing to the old item.  The
3580ad48fd75SYan, Zheng  * new item is going to be in the same node as the old one.
3581ad48fd75SYan, Zheng  *
3582ad48fd75SYan, Zheng  * Note, the item being split must be smaller enough to live alone on
3583ad48fd75SYan, Zheng  * a tree block with room for one extra struct btrfs_item
3584ad48fd75SYan, Zheng  *
3585ad48fd75SYan, Zheng  * This allows us to split the item in place, keeping a lock on the
3586ad48fd75SYan, Zheng  * leaf the entire time.
3587ad48fd75SYan, Zheng  */
3588ad48fd75SYan, Zheng int btrfs_split_item(struct btrfs_trans_handle *trans,
3589ad48fd75SYan, Zheng 		     struct btrfs_root *root,
3590ad48fd75SYan, Zheng 		     struct btrfs_path *path,
3591310712b2SOmar Sandoval 		     const struct btrfs_key *new_key,
3592ad48fd75SYan, Zheng 		     unsigned long split_offset)
3593ad48fd75SYan, Zheng {
3594ad48fd75SYan, Zheng 	int ret;
3595ad48fd75SYan, Zheng 	ret = setup_leaf_for_split(trans, root, path,
3596ad48fd75SYan, Zheng 				   sizeof(struct btrfs_item));
3597ad48fd75SYan, Zheng 	if (ret)
3598459931ecSChris Mason 		return ret;
3599ad48fd75SYan, Zheng 
360025263cd7SDavid Sterba 	ret = split_item(path, new_key, split_offset);
3601ad48fd75SYan, Zheng 	return ret;
3602ad48fd75SYan, Zheng }
3603ad48fd75SYan, Zheng 
3604ad48fd75SYan, Zheng /*
3605ad48fd75SYan, Zheng  * This function duplicate a item, giving 'new_key' to the new item.
3606ad48fd75SYan, Zheng  * It guarantees both items live in the same tree leaf and the new item
3607ad48fd75SYan, Zheng  * is contiguous with the original item.
3608ad48fd75SYan, Zheng  *
3609ad48fd75SYan, Zheng  * This allows us to split file extent in place, keeping a lock on the
3610ad48fd75SYan, Zheng  * leaf the entire time.
3611ad48fd75SYan, Zheng  */
3612ad48fd75SYan, Zheng int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
3613ad48fd75SYan, Zheng 			 struct btrfs_root *root,
3614ad48fd75SYan, Zheng 			 struct btrfs_path *path,
3615310712b2SOmar Sandoval 			 const struct btrfs_key *new_key)
3616ad48fd75SYan, Zheng {
3617ad48fd75SYan, Zheng 	struct extent_buffer *leaf;
3618ad48fd75SYan, Zheng 	int ret;
3619ad48fd75SYan, Zheng 	u32 item_size;
3620ad48fd75SYan, Zheng 
3621ad48fd75SYan, Zheng 	leaf = path->nodes[0];
3622ad48fd75SYan, Zheng 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3623ad48fd75SYan, Zheng 	ret = setup_leaf_for_split(trans, root, path,
3624ad48fd75SYan, Zheng 				   item_size + sizeof(struct btrfs_item));
3625ad48fd75SYan, Zheng 	if (ret)
3626ad48fd75SYan, Zheng 		return ret;
3627ad48fd75SYan, Zheng 
3628ad48fd75SYan, Zheng 	path->slots[0]++;
3629fc0d82e1SNikolay Borisov 	setup_items_for_insert(root, path, new_key, &item_size, 1);
3630ad48fd75SYan, Zheng 	leaf = path->nodes[0];
3631ad48fd75SYan, Zheng 	memcpy_extent_buffer(leaf,
3632ad48fd75SYan, Zheng 			     btrfs_item_ptr_offset(leaf, path->slots[0]),
3633ad48fd75SYan, Zheng 			     btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
3634ad48fd75SYan, Zheng 			     item_size);
3635ad48fd75SYan, Zheng 	return 0;
3636459931ecSChris Mason }
3637459931ecSChris Mason 
3638459931ecSChris Mason /*
3639d352ac68SChris Mason  * make the item pointed to by the path smaller.  new_size indicates
3640d352ac68SChris Mason  * how small to make it, and from_end tells us if we just chop bytes
3641d352ac68SChris Mason  * off the end of the item or if we shift the item to chop bytes off
3642d352ac68SChris Mason  * the front.
3643d352ac68SChris Mason  */
364478ac4f9eSDavid Sterba void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
3645b18c6685SChris Mason {
3646b18c6685SChris Mason 	int slot;
36475f39d397SChris Mason 	struct extent_buffer *leaf;
36485f39d397SChris Mason 	struct btrfs_item *item;
3649b18c6685SChris Mason 	u32 nritems;
3650b18c6685SChris Mason 	unsigned int data_end;
3651b18c6685SChris Mason 	unsigned int old_data_start;
3652b18c6685SChris Mason 	unsigned int old_size;
3653b18c6685SChris Mason 	unsigned int size_diff;
3654b18c6685SChris Mason 	int i;
3655cfed81a0SChris Mason 	struct btrfs_map_token token;
3656cfed81a0SChris Mason 
36575f39d397SChris Mason 	leaf = path->nodes[0];
3658179e29e4SChris Mason 	slot = path->slots[0];
3659179e29e4SChris Mason 
3660179e29e4SChris Mason 	old_size = btrfs_item_size_nr(leaf, slot);
3661179e29e4SChris Mason 	if (old_size == new_size)
3662143bede5SJeff Mahoney 		return;
3663b18c6685SChris Mason 
36645f39d397SChris Mason 	nritems = btrfs_header_nritems(leaf);
36658f881e8cSDavid Sterba 	data_end = leaf_data_end(leaf);
3666b18c6685SChris Mason 
36675f39d397SChris Mason 	old_data_start = btrfs_item_offset_nr(leaf, slot);
3668179e29e4SChris Mason 
3669b18c6685SChris Mason 	size_diff = old_size - new_size;
3670b18c6685SChris Mason 
3671b18c6685SChris Mason 	BUG_ON(slot < 0);
3672b18c6685SChris Mason 	BUG_ON(slot >= nritems);
3673b18c6685SChris Mason 
3674b18c6685SChris Mason 	/*
3675b18c6685SChris Mason 	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3676b18c6685SChris Mason 	 */
3677b18c6685SChris Mason 	/* first correct the data pointers */
3678c82f823cSDavid Sterba 	btrfs_init_map_token(&token, leaf);
3679b18c6685SChris Mason 	for (i = slot; i < nritems; i++) {
36805f39d397SChris Mason 		u32 ioff;
3681dd3cc16bSRoss Kirk 		item = btrfs_item_nr(i);
3682db94535dSChris Mason 
3683cc4c13d5SDavid Sterba 		ioff = btrfs_token_item_offset(&token, item);
3684cc4c13d5SDavid Sterba 		btrfs_set_token_item_offset(&token, item, ioff + size_diff);
3685b18c6685SChris Mason 	}
3686db94535dSChris Mason 
3687b18c6685SChris Mason 	/* shift the data */
3688179e29e4SChris Mason 	if (from_end) {
36893d9ec8c4SNikolay Borisov 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
36903d9ec8c4SNikolay Borisov 			      data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3691b18c6685SChris Mason 			      data_end, old_data_start + new_size - data_end);
3692179e29e4SChris Mason 	} else {
3693179e29e4SChris Mason 		struct btrfs_disk_key disk_key;
3694179e29e4SChris Mason 		u64 offset;
3695179e29e4SChris Mason 
3696179e29e4SChris Mason 		btrfs_item_key(leaf, &disk_key, slot);
3697179e29e4SChris Mason 
3698179e29e4SChris Mason 		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3699179e29e4SChris Mason 			unsigned long ptr;
3700179e29e4SChris Mason 			struct btrfs_file_extent_item *fi;
3701179e29e4SChris Mason 
3702179e29e4SChris Mason 			fi = btrfs_item_ptr(leaf, slot,
3703179e29e4SChris Mason 					    struct btrfs_file_extent_item);
3704179e29e4SChris Mason 			fi = (struct btrfs_file_extent_item *)(
3705179e29e4SChris Mason 			     (unsigned long)fi - size_diff);
3706179e29e4SChris Mason 
3707179e29e4SChris Mason 			if (btrfs_file_extent_type(leaf, fi) ==
3708179e29e4SChris Mason 			    BTRFS_FILE_EXTENT_INLINE) {
3709179e29e4SChris Mason 				ptr = btrfs_item_ptr_offset(leaf, slot);
3710179e29e4SChris Mason 				memmove_extent_buffer(leaf, ptr,
3711179e29e4SChris Mason 				      (unsigned long)fi,
37127ec20afbSDavid Sterba 				      BTRFS_FILE_EXTENT_INLINE_DATA_START);
3713179e29e4SChris Mason 			}
3714179e29e4SChris Mason 		}
3715179e29e4SChris Mason 
37163d9ec8c4SNikolay Borisov 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
37173d9ec8c4SNikolay Borisov 			      data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
3718179e29e4SChris Mason 			      data_end, old_data_start - data_end);
3719179e29e4SChris Mason 
3720179e29e4SChris Mason 		offset = btrfs_disk_key_offset(&disk_key);
3721179e29e4SChris Mason 		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3722179e29e4SChris Mason 		btrfs_set_item_key(leaf, &disk_key, slot);
3723179e29e4SChris Mason 		if (slot == 0)
3724b167fa91SNikolay Borisov 			fixup_low_keys(path, &disk_key, 1);
3725179e29e4SChris Mason 	}
37265f39d397SChris Mason 
3727dd3cc16bSRoss Kirk 	item = btrfs_item_nr(slot);
37285f39d397SChris Mason 	btrfs_set_item_size(leaf, item, new_size);
37295f39d397SChris Mason 	btrfs_mark_buffer_dirty(leaf);
3730b18c6685SChris Mason 
3731e902baacSDavid Sterba 	if (btrfs_leaf_free_space(leaf) < 0) {
3732a4f78750SDavid Sterba 		btrfs_print_leaf(leaf);
3733b18c6685SChris Mason 		BUG();
37345f39d397SChris Mason 	}
3735b18c6685SChris Mason }
3736b18c6685SChris Mason 
3737d352ac68SChris Mason /*
37388f69dbd2SStefan Behrens  * make the item pointed to by the path bigger, data_size is the added size.
3739d352ac68SChris Mason  */
3740c71dd880SDavid Sterba void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
37416567e837SChris Mason {
37426567e837SChris Mason 	int slot;
37435f39d397SChris Mason 	struct extent_buffer *leaf;
37445f39d397SChris Mason 	struct btrfs_item *item;
37456567e837SChris Mason 	u32 nritems;
37466567e837SChris Mason 	unsigned int data_end;
37476567e837SChris Mason 	unsigned int old_data;
37486567e837SChris Mason 	unsigned int old_size;
37496567e837SChris Mason 	int i;
3750cfed81a0SChris Mason 	struct btrfs_map_token token;
3751cfed81a0SChris Mason 
37525f39d397SChris Mason 	leaf = path->nodes[0];
37536567e837SChris Mason 
37545f39d397SChris Mason 	nritems = btrfs_header_nritems(leaf);
37558f881e8cSDavid Sterba 	data_end = leaf_data_end(leaf);
37566567e837SChris Mason 
3757e902baacSDavid Sterba 	if (btrfs_leaf_free_space(leaf) < data_size) {
3758a4f78750SDavid Sterba 		btrfs_print_leaf(leaf);
37596567e837SChris Mason 		BUG();
37605f39d397SChris Mason 	}
37616567e837SChris Mason 	slot = path->slots[0];
37625f39d397SChris Mason 	old_data = btrfs_item_end_nr(leaf, slot);
37636567e837SChris Mason 
37646567e837SChris Mason 	BUG_ON(slot < 0);
37653326d1b0SChris Mason 	if (slot >= nritems) {
3766a4f78750SDavid Sterba 		btrfs_print_leaf(leaf);
3767c71dd880SDavid Sterba 		btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
3768d397712bSChris Mason 			   slot, nritems);
3769290342f6SArnd Bergmann 		BUG();
37703326d1b0SChris Mason 	}
37716567e837SChris Mason 
37726567e837SChris Mason 	/*
37736567e837SChris Mason 	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
37746567e837SChris Mason 	 */
37756567e837SChris Mason 	/* first correct the data pointers */
3776c82f823cSDavid Sterba 	btrfs_init_map_token(&token, leaf);
37776567e837SChris Mason 	for (i = slot; i < nritems; i++) {
37785f39d397SChris Mason 		u32 ioff;
3779dd3cc16bSRoss Kirk 		item = btrfs_item_nr(i);
3780db94535dSChris Mason 
3781cc4c13d5SDavid Sterba 		ioff = btrfs_token_item_offset(&token, item);
3782cc4c13d5SDavid Sterba 		btrfs_set_token_item_offset(&token, item, ioff - data_size);
37836567e837SChris Mason 	}
37845f39d397SChris Mason 
37856567e837SChris Mason 	/* shift the data */
37863d9ec8c4SNikolay Borisov 	memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
37873d9ec8c4SNikolay Borisov 		      data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
37886567e837SChris Mason 		      data_end, old_data - data_end);
37895f39d397SChris Mason 
37906567e837SChris Mason 	data_end = old_data;
37915f39d397SChris Mason 	old_size = btrfs_item_size_nr(leaf, slot);
3792dd3cc16bSRoss Kirk 	item = btrfs_item_nr(slot);
37935f39d397SChris Mason 	btrfs_set_item_size(leaf, item, old_size + data_size);
37945f39d397SChris Mason 	btrfs_mark_buffer_dirty(leaf);
37956567e837SChris Mason 
3796e902baacSDavid Sterba 	if (btrfs_leaf_free_space(leaf) < 0) {
3797a4f78750SDavid Sterba 		btrfs_print_leaf(leaf);
37986567e837SChris Mason 		BUG();
37995f39d397SChris Mason 	}
38006567e837SChris Mason }
38016567e837SChris Mason 
3802da9ffb24SNikolay Borisov /**
3803da9ffb24SNikolay Borisov  * setup_items_for_insert - Helper called before inserting one or more items
3804da9ffb24SNikolay Borisov  * to a leaf. Main purpose is to save stack depth by doing the bulk of the work
3805da9ffb24SNikolay Borisov  * in a function that doesn't call btrfs_search_slot
3806da9ffb24SNikolay Borisov  *
3807da9ffb24SNikolay Borisov  * @root:	root we are inserting items to
3808da9ffb24SNikolay Borisov  * @path:	points to the leaf/slot where we are going to insert new items
3809da9ffb24SNikolay Borisov  * @cpu_key:	array of keys for items to be inserted
3810da9ffb24SNikolay Borisov  * @data_size:	size of the body of each item we are going to insert
3811da9ffb24SNikolay Borisov  * @nr:		size of @cpu_key/@data_size arrays
381274123bd7SChris Mason  */
3813afe5fea7STsutomu Itoh void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
3814310712b2SOmar Sandoval 			    const struct btrfs_key *cpu_key, u32 *data_size,
3815fc0d82e1SNikolay Borisov 			    int nr)
3816be0e5c09SChris Mason {
38170b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
38185f39d397SChris Mason 	struct btrfs_item *item;
38199c58309dSChris Mason 	int i;
38207518a238SChris Mason 	u32 nritems;
3821be0e5c09SChris Mason 	unsigned int data_end;
3822e2fa7227SChris Mason 	struct btrfs_disk_key disk_key;
382344871b1bSChris Mason 	struct extent_buffer *leaf;
382444871b1bSChris Mason 	int slot;
3825cfed81a0SChris Mason 	struct btrfs_map_token token;
3826fc0d82e1SNikolay Borisov 	u32 total_size;
3827fc0d82e1SNikolay Borisov 	u32 total_data = 0;
3828fc0d82e1SNikolay Borisov 
3829fc0d82e1SNikolay Borisov 	for (i = 0; i < nr; i++)
3830fc0d82e1SNikolay Borisov 		total_data += data_size[i];
3831fc0d82e1SNikolay Borisov 	total_size = total_data + (nr * sizeof(struct btrfs_item));
3832cfed81a0SChris Mason 
383324cdc847SFilipe Manana 	if (path->slots[0] == 0) {
383424cdc847SFilipe Manana 		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3835b167fa91SNikolay Borisov 		fixup_low_keys(path, &disk_key, 1);
383624cdc847SFilipe Manana 	}
383724cdc847SFilipe Manana 	btrfs_unlock_up_safe(path, 1);
383824cdc847SFilipe Manana 
38395f39d397SChris Mason 	leaf = path->nodes[0];
384044871b1bSChris Mason 	slot = path->slots[0];
384174123bd7SChris Mason 
38425f39d397SChris Mason 	nritems = btrfs_header_nritems(leaf);
38438f881e8cSDavid Sterba 	data_end = leaf_data_end(leaf);
3844eb60ceacSChris Mason 
3845e902baacSDavid Sterba 	if (btrfs_leaf_free_space(leaf) < total_size) {
3846a4f78750SDavid Sterba 		btrfs_print_leaf(leaf);
38470b246afaSJeff Mahoney 		btrfs_crit(fs_info, "not enough freespace need %u have %d",
3848e902baacSDavid Sterba 			   total_size, btrfs_leaf_free_space(leaf));
3849be0e5c09SChris Mason 		BUG();
3850d4dbff95SChris Mason 	}
38515f39d397SChris Mason 
3852c82f823cSDavid Sterba 	btrfs_init_map_token(&token, leaf);
3853be0e5c09SChris Mason 	if (slot != nritems) {
38545f39d397SChris Mason 		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3855be0e5c09SChris Mason 
38565f39d397SChris Mason 		if (old_data < data_end) {
3857a4f78750SDavid Sterba 			btrfs_print_leaf(leaf);
38587269ddd2SNikolay Borisov 			btrfs_crit(fs_info,
38597269ddd2SNikolay Borisov 		"item at slot %d with data offset %u beyond data end of leaf %u",
38605f39d397SChris Mason 				   slot, old_data, data_end);
3861290342f6SArnd Bergmann 			BUG();
38625f39d397SChris Mason 		}
3863be0e5c09SChris Mason 		/*
3864be0e5c09SChris Mason 		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3865be0e5c09SChris Mason 		 */
3866be0e5c09SChris Mason 		/* first correct the data pointers */
38670783fcfcSChris Mason 		for (i = slot; i < nritems; i++) {
38685f39d397SChris Mason 			u32 ioff;
3869db94535dSChris Mason 
3870dd3cc16bSRoss Kirk 			item = btrfs_item_nr(i);
3871cc4c13d5SDavid Sterba 			ioff = btrfs_token_item_offset(&token, item);
3872cc4c13d5SDavid Sterba 			btrfs_set_token_item_offset(&token, item,
3873cc4c13d5SDavid Sterba 						    ioff - total_data);
38740783fcfcSChris Mason 		}
3875be0e5c09SChris Mason 		/* shift the items */
38769c58309dSChris Mason 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
38775f39d397SChris Mason 			      btrfs_item_nr_offset(slot),
38780783fcfcSChris Mason 			      (nritems - slot) * sizeof(struct btrfs_item));
3879be0e5c09SChris Mason 
3880be0e5c09SChris Mason 		/* shift the data */
38813d9ec8c4SNikolay Borisov 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
38823d9ec8c4SNikolay Borisov 			      data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
3883be0e5c09SChris Mason 			      data_end, old_data - data_end);
3884be0e5c09SChris Mason 		data_end = old_data;
3885be0e5c09SChris Mason 	}
38865f39d397SChris Mason 
388762e2749eSChris Mason 	/* setup the item for the new data */
38889c58309dSChris Mason 	for (i = 0; i < nr; i++) {
38899c58309dSChris Mason 		btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
38909c58309dSChris Mason 		btrfs_set_item_key(leaf, &disk_key, slot + i);
3891dd3cc16bSRoss Kirk 		item = btrfs_item_nr(slot + i);
38929c58309dSChris Mason 		data_end -= data_size[i];
3893fc0716c2SNikolay Borisov 		btrfs_set_token_item_offset(&token, item, data_end);
3894cc4c13d5SDavid Sterba 		btrfs_set_token_item_size(&token, item, data_size[i]);
38959c58309dSChris Mason 	}
389644871b1bSChris Mason 
38979c58309dSChris Mason 	btrfs_set_header_nritems(leaf, nritems + nr);
3898b9473439SChris Mason 	btrfs_mark_buffer_dirty(leaf);
3899aa5d6bedSChris Mason 
3900e902baacSDavid Sterba 	if (btrfs_leaf_free_space(leaf) < 0) {
3901a4f78750SDavid Sterba 		btrfs_print_leaf(leaf);
3902be0e5c09SChris Mason 		BUG();
39035f39d397SChris Mason 	}
390444871b1bSChris Mason }
390544871b1bSChris Mason 
390644871b1bSChris Mason /*
390744871b1bSChris Mason  * Given a key and some data, insert items into the tree.
390844871b1bSChris Mason  * This does all the path init required, making room in the tree if needed.
390944871b1bSChris Mason  */
391044871b1bSChris Mason int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
391144871b1bSChris Mason 			    struct btrfs_root *root,
391244871b1bSChris Mason 			    struct btrfs_path *path,
3913310712b2SOmar Sandoval 			    const struct btrfs_key *cpu_key, u32 *data_size,
391444871b1bSChris Mason 			    int nr)
391544871b1bSChris Mason {
391644871b1bSChris Mason 	int ret = 0;
391744871b1bSChris Mason 	int slot;
391844871b1bSChris Mason 	int i;
391944871b1bSChris Mason 	u32 total_size = 0;
392044871b1bSChris Mason 	u32 total_data = 0;
392144871b1bSChris Mason 
392244871b1bSChris Mason 	for (i = 0; i < nr; i++)
392344871b1bSChris Mason 		total_data += data_size[i];
392444871b1bSChris Mason 
392544871b1bSChris Mason 	total_size = total_data + (nr * sizeof(struct btrfs_item));
392644871b1bSChris Mason 	ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
392744871b1bSChris Mason 	if (ret == 0)
392844871b1bSChris Mason 		return -EEXIST;
392944871b1bSChris Mason 	if (ret < 0)
3930143bede5SJeff Mahoney 		return ret;
393144871b1bSChris Mason 
393244871b1bSChris Mason 	slot = path->slots[0];
393344871b1bSChris Mason 	BUG_ON(slot < 0);
393444871b1bSChris Mason 
3935fc0d82e1SNikolay Borisov 	setup_items_for_insert(root, path, cpu_key, data_size, nr);
3936143bede5SJeff Mahoney 	return 0;
393762e2749eSChris Mason }
393862e2749eSChris Mason 
393962e2749eSChris Mason /*
394062e2749eSChris Mason  * Given a key and some data, insert an item into the tree.
394162e2749eSChris Mason  * This does all the path init required, making room in the tree if needed.
394262e2749eSChris Mason  */
3943310712b2SOmar Sandoval int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3944310712b2SOmar Sandoval 		      const struct btrfs_key *cpu_key, void *data,
3945310712b2SOmar Sandoval 		      u32 data_size)
394662e2749eSChris Mason {
394762e2749eSChris Mason 	int ret = 0;
39482c90e5d6SChris Mason 	struct btrfs_path *path;
39495f39d397SChris Mason 	struct extent_buffer *leaf;
39505f39d397SChris Mason 	unsigned long ptr;
395162e2749eSChris Mason 
39522c90e5d6SChris Mason 	path = btrfs_alloc_path();
3953db5b493aSTsutomu Itoh 	if (!path)
3954db5b493aSTsutomu Itoh 		return -ENOMEM;
39552c90e5d6SChris Mason 	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
395662e2749eSChris Mason 	if (!ret) {
39575f39d397SChris Mason 		leaf = path->nodes[0];
39585f39d397SChris Mason 		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
39595f39d397SChris Mason 		write_extent_buffer(leaf, data, ptr, data_size);
39605f39d397SChris Mason 		btrfs_mark_buffer_dirty(leaf);
396162e2749eSChris Mason 	}
39622c90e5d6SChris Mason 	btrfs_free_path(path);
3963aa5d6bedSChris Mason 	return ret;
3964be0e5c09SChris Mason }
3965be0e5c09SChris Mason 
396674123bd7SChris Mason /*
39675de08d7dSChris Mason  * delete the pointer from a given node.
396874123bd7SChris Mason  *
3969d352ac68SChris Mason  * the tree should have been previously balanced so the deletion does not
3970d352ac68SChris Mason  * empty a node.
397174123bd7SChris Mason  */
3972afe5fea7STsutomu Itoh static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
3973afe5fea7STsutomu Itoh 		    int level, int slot)
3974be0e5c09SChris Mason {
39755f39d397SChris Mason 	struct extent_buffer *parent = path->nodes[level];
39767518a238SChris Mason 	u32 nritems;
3977f3ea38daSJan Schmidt 	int ret;
3978be0e5c09SChris Mason 
39795f39d397SChris Mason 	nritems = btrfs_header_nritems(parent);
3980be0e5c09SChris Mason 	if (slot != nritems - 1) {
3981bf1d3425SDavid Sterba 		if (level) {
3982f3a84ccdSFilipe Manana 			ret = btrfs_tree_mod_log_insert_move(parent, slot,
3983f3a84ccdSFilipe Manana 					slot + 1, nritems - slot - 1);
3984bf1d3425SDavid Sterba 			BUG_ON(ret < 0);
3985bf1d3425SDavid Sterba 		}
39865f39d397SChris Mason 		memmove_extent_buffer(parent,
39875f39d397SChris Mason 			      btrfs_node_key_ptr_offset(slot),
39885f39d397SChris Mason 			      btrfs_node_key_ptr_offset(slot + 1),
3989d6025579SChris Mason 			      sizeof(struct btrfs_key_ptr) *
3990d6025579SChris Mason 			      (nritems - slot - 1));
399157ba86c0SChris Mason 	} else if (level) {
3992f3a84ccdSFilipe Manana 		ret = btrfs_tree_mod_log_insert_key(parent, slot,
3993f3a84ccdSFilipe Manana 				BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS);
399457ba86c0SChris Mason 		BUG_ON(ret < 0);
3995be0e5c09SChris Mason 	}
3996f3ea38daSJan Schmidt 
39977518a238SChris Mason 	nritems--;
39985f39d397SChris Mason 	btrfs_set_header_nritems(parent, nritems);
39997518a238SChris Mason 	if (nritems == 0 && parent == root->node) {
40005f39d397SChris Mason 		BUG_ON(btrfs_header_level(root->node) != 1);
4001eb60ceacSChris Mason 		/* just turn the root into a leaf and break */
40025f39d397SChris Mason 		btrfs_set_header_level(root->node, 0);
4003bb803951SChris Mason 	} else if (slot == 0) {
40045f39d397SChris Mason 		struct btrfs_disk_key disk_key;
40055f39d397SChris Mason 
40065f39d397SChris Mason 		btrfs_node_key(parent, &disk_key, 0);
4007b167fa91SNikolay Borisov 		fixup_low_keys(path, &disk_key, level + 1);
4008be0e5c09SChris Mason 	}
4009d6025579SChris Mason 	btrfs_mark_buffer_dirty(parent);
4010be0e5c09SChris Mason }
4011be0e5c09SChris Mason 
401274123bd7SChris Mason /*
4013323ac95bSChris Mason  * a helper function to delete the leaf pointed to by path->slots[1] and
40145d4f98a2SYan Zheng  * path->nodes[1].
4015323ac95bSChris Mason  *
4016323ac95bSChris Mason  * This deletes the pointer in path->nodes[1] and frees the leaf
4017323ac95bSChris Mason  * block extent.  zero is returned if it all worked out, < 0 otherwise.
4018323ac95bSChris Mason  *
4019323ac95bSChris Mason  * The path must have already been setup for deleting the leaf, including
4020323ac95bSChris Mason  * all the proper balancing.  path->nodes[1] must be locked.
4021323ac95bSChris Mason  */
4022143bede5SJeff Mahoney static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4023323ac95bSChris Mason 				    struct btrfs_root *root,
40245d4f98a2SYan Zheng 				    struct btrfs_path *path,
40255d4f98a2SYan Zheng 				    struct extent_buffer *leaf)
4026323ac95bSChris Mason {
40275d4f98a2SYan Zheng 	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
4028afe5fea7STsutomu Itoh 	del_ptr(root, path, 1, path->slots[1]);
4029323ac95bSChris Mason 
40304d081c41SChris Mason 	/*
40314d081c41SChris Mason 	 * btrfs_free_extent is expensive, we want to make sure we
40324d081c41SChris Mason 	 * aren't holding any locks when we call it
40334d081c41SChris Mason 	 */
40344d081c41SChris Mason 	btrfs_unlock_up_safe(path, 0);
40354d081c41SChris Mason 
4036f0486c68SYan, Zheng 	root_sub_used(root, leaf->len);
4037f0486c68SYan, Zheng 
403867439dadSDavid Sterba 	atomic_inc(&leaf->refs);
40395581a51aSJan Schmidt 	btrfs_free_tree_block(trans, root, leaf, 0, 1);
40403083ee2eSJosef Bacik 	free_extent_buffer_stale(leaf);
4041323ac95bSChris Mason }
4042323ac95bSChris Mason /*
404374123bd7SChris Mason  * delete the item at the leaf level in path.  If that empties
404474123bd7SChris Mason  * the leaf, remove it from the tree
404574123bd7SChris Mason  */
404685e21bacSChris Mason int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
404785e21bacSChris Mason 		    struct btrfs_path *path, int slot, int nr)
4048be0e5c09SChris Mason {
40490b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
40505f39d397SChris Mason 	struct extent_buffer *leaf;
40515f39d397SChris Mason 	struct btrfs_item *item;
4052ce0eac2aSAlexandru Moise 	u32 last_off;
4053ce0eac2aSAlexandru Moise 	u32 dsize = 0;
4054aa5d6bedSChris Mason 	int ret = 0;
4055aa5d6bedSChris Mason 	int wret;
405685e21bacSChris Mason 	int i;
40577518a238SChris Mason 	u32 nritems;
4058be0e5c09SChris Mason 
40595f39d397SChris Mason 	leaf = path->nodes[0];
406085e21bacSChris Mason 	last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
406185e21bacSChris Mason 
406285e21bacSChris Mason 	for (i = 0; i < nr; i++)
406385e21bacSChris Mason 		dsize += btrfs_item_size_nr(leaf, slot + i);
406485e21bacSChris Mason 
40655f39d397SChris Mason 	nritems = btrfs_header_nritems(leaf);
4066be0e5c09SChris Mason 
406785e21bacSChris Mason 	if (slot + nr != nritems) {
40688f881e8cSDavid Sterba 		int data_end = leaf_data_end(leaf);
4069c82f823cSDavid Sterba 		struct btrfs_map_token token;
40705f39d397SChris Mason 
40713d9ec8c4SNikolay Borisov 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4072d6025579SChris Mason 			      data_end + dsize,
40733d9ec8c4SNikolay Borisov 			      BTRFS_LEAF_DATA_OFFSET + data_end,
407485e21bacSChris Mason 			      last_off - data_end);
40755f39d397SChris Mason 
4076c82f823cSDavid Sterba 		btrfs_init_map_token(&token, leaf);
407785e21bacSChris Mason 		for (i = slot + nr; i < nritems; i++) {
40785f39d397SChris Mason 			u32 ioff;
4079db94535dSChris Mason 
4080dd3cc16bSRoss Kirk 			item = btrfs_item_nr(i);
4081cc4c13d5SDavid Sterba 			ioff = btrfs_token_item_offset(&token, item);
4082cc4c13d5SDavid Sterba 			btrfs_set_token_item_offset(&token, item, ioff + dsize);
40830783fcfcSChris Mason 		}
4084db94535dSChris Mason 
40855f39d397SChris Mason 		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
408685e21bacSChris Mason 			      btrfs_item_nr_offset(slot + nr),
40870783fcfcSChris Mason 			      sizeof(struct btrfs_item) *
408885e21bacSChris Mason 			      (nritems - slot - nr));
4089be0e5c09SChris Mason 	}
409085e21bacSChris Mason 	btrfs_set_header_nritems(leaf, nritems - nr);
409185e21bacSChris Mason 	nritems -= nr;
40925f39d397SChris Mason 
409374123bd7SChris Mason 	/* delete the leaf if we've emptied it */
40947518a238SChris Mason 	if (nritems == 0) {
40955f39d397SChris Mason 		if (leaf == root->node) {
40965f39d397SChris Mason 			btrfs_set_header_level(leaf, 0);
40979a8dd150SChris Mason 		} else {
40986a884d7dSDavid Sterba 			btrfs_clean_tree_block(leaf);
4099143bede5SJeff Mahoney 			btrfs_del_leaf(trans, root, path, leaf);
41009a8dd150SChris Mason 		}
4101be0e5c09SChris Mason 	} else {
41027518a238SChris Mason 		int used = leaf_space_used(leaf, 0, nritems);
4103aa5d6bedSChris Mason 		if (slot == 0) {
41045f39d397SChris Mason 			struct btrfs_disk_key disk_key;
41055f39d397SChris Mason 
41065f39d397SChris Mason 			btrfs_item_key(leaf, &disk_key, 0);
4107b167fa91SNikolay Borisov 			fixup_low_keys(path, &disk_key, 1);
4108aa5d6bedSChris Mason 		}
4109aa5d6bedSChris Mason 
411074123bd7SChris Mason 		/* delete the leaf if it is mostly empty */
41110b246afaSJeff Mahoney 		if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
4112be0e5c09SChris Mason 			/* push_leaf_left fixes the path.
4113be0e5c09SChris Mason 			 * make sure the path still points to our leaf
4114be0e5c09SChris Mason 			 * for possible call to del_ptr below
4115be0e5c09SChris Mason 			 */
41164920c9acSChris Mason 			slot = path->slots[1];
411767439dadSDavid Sterba 			atomic_inc(&leaf->refs);
41185f39d397SChris Mason 
411999d8f83cSChris Mason 			wret = push_leaf_left(trans, root, path, 1, 1,
412099d8f83cSChris Mason 					      1, (u32)-1);
412154aa1f4dSChris Mason 			if (wret < 0 && wret != -ENOSPC)
4122aa5d6bedSChris Mason 				ret = wret;
41235f39d397SChris Mason 
41245f39d397SChris Mason 			if (path->nodes[0] == leaf &&
41255f39d397SChris Mason 			    btrfs_header_nritems(leaf)) {
412699d8f83cSChris Mason 				wret = push_leaf_right(trans, root, path, 1,
412799d8f83cSChris Mason 						       1, 1, 0);
412854aa1f4dSChris Mason 				if (wret < 0 && wret != -ENOSPC)
4129aa5d6bedSChris Mason 					ret = wret;
4130aa5d6bedSChris Mason 			}
41315f39d397SChris Mason 
41325f39d397SChris Mason 			if (btrfs_header_nritems(leaf) == 0) {
4133323ac95bSChris Mason 				path->slots[1] = slot;
4134143bede5SJeff Mahoney 				btrfs_del_leaf(trans, root, path, leaf);
41355f39d397SChris Mason 				free_extent_buffer(leaf);
4136143bede5SJeff Mahoney 				ret = 0;
41375de08d7dSChris Mason 			} else {
4138925baeddSChris Mason 				/* if we're still in the path, make sure
4139925baeddSChris Mason 				 * we're dirty.  Otherwise, one of the
4140925baeddSChris Mason 				 * push_leaf functions must have already
4141925baeddSChris Mason 				 * dirtied this buffer
4142925baeddSChris Mason 				 */
4143925baeddSChris Mason 				if (path->nodes[0] == leaf)
41445f39d397SChris Mason 					btrfs_mark_buffer_dirty(leaf);
41455f39d397SChris Mason 				free_extent_buffer(leaf);
4146be0e5c09SChris Mason 			}
4147d5719762SChris Mason 		} else {
41485f39d397SChris Mason 			btrfs_mark_buffer_dirty(leaf);
4149be0e5c09SChris Mason 		}
41509a8dd150SChris Mason 	}
4151aa5d6bedSChris Mason 	return ret;
41529a8dd150SChris Mason }
41539a8dd150SChris Mason 
415497571fd0SChris Mason /*
4155925baeddSChris Mason  * search the tree again to find a leaf with lesser keys
41567bb86316SChris Mason  * returns 0 if it found something or 1 if there are no lesser leaves.
41577bb86316SChris Mason  * returns < 0 on io errors.
4158d352ac68SChris Mason  *
4159d352ac68SChris Mason  * This may release the path, and so you may lose any locks held at the
4160d352ac68SChris Mason  * time you call it.
41617bb86316SChris Mason  */
416216e7549fSJosef Bacik int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
41637bb86316SChris Mason {
4164925baeddSChris Mason 	struct btrfs_key key;
4165925baeddSChris Mason 	struct btrfs_disk_key found_key;
4166925baeddSChris Mason 	int ret;
41677bb86316SChris Mason 
4168925baeddSChris Mason 	btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
4169925baeddSChris Mason 
4170e8b0d724SFilipe David Borba Manana 	if (key.offset > 0) {
4171925baeddSChris Mason 		key.offset--;
4172e8b0d724SFilipe David Borba Manana 	} else if (key.type > 0) {
4173925baeddSChris Mason 		key.type--;
4174e8b0d724SFilipe David Borba Manana 		key.offset = (u64)-1;
4175e8b0d724SFilipe David Borba Manana 	} else if (key.objectid > 0) {
4176925baeddSChris Mason 		key.objectid--;
4177e8b0d724SFilipe David Borba Manana 		key.type = (u8)-1;
4178e8b0d724SFilipe David Borba Manana 		key.offset = (u64)-1;
4179e8b0d724SFilipe David Borba Manana 	} else {
41807bb86316SChris Mason 		return 1;
4181e8b0d724SFilipe David Borba Manana 	}
41827bb86316SChris Mason 
4183b3b4aa74SDavid Sterba 	btrfs_release_path(path);
4184925baeddSChris Mason 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4185925baeddSChris Mason 	if (ret < 0)
4186925baeddSChris Mason 		return ret;
4187925baeddSChris Mason 	btrfs_item_key(path->nodes[0], &found_key, 0);
4188925baeddSChris Mason 	ret = comp_keys(&found_key, &key);
4189337c6f68SFilipe Manana 	/*
4190337c6f68SFilipe Manana 	 * We might have had an item with the previous key in the tree right
4191337c6f68SFilipe Manana 	 * before we released our path. And after we released our path, that
4192337c6f68SFilipe Manana 	 * item might have been pushed to the first slot (0) of the leaf we
4193337c6f68SFilipe Manana 	 * were holding due to a tree balance. Alternatively, an item with the
4194337c6f68SFilipe Manana 	 * previous key can exist as the only element of a leaf (big fat item).
4195337c6f68SFilipe Manana 	 * Therefore account for these 2 cases, so that our callers (like
4196337c6f68SFilipe Manana 	 * btrfs_previous_item) don't miss an existing item with a key matching
4197337c6f68SFilipe Manana 	 * the previous key we computed above.
4198337c6f68SFilipe Manana 	 */
4199337c6f68SFilipe Manana 	if (ret <= 0)
42007bb86316SChris Mason 		return 0;
4201925baeddSChris Mason 	return 1;
42027bb86316SChris Mason }
42037bb86316SChris Mason 
42043f157a2fSChris Mason /*
42053f157a2fSChris Mason  * A helper function to walk down the tree starting at min_key, and looking
4206de78b51aSEric Sandeen  * for nodes or leaves that are have a minimum transaction id.
4207de78b51aSEric Sandeen  * This is used by the btree defrag code, and tree logging
42083f157a2fSChris Mason  *
42093f157a2fSChris Mason  * This does not cow, but it does stuff the starting key it finds back
42103f157a2fSChris Mason  * into min_key, so you can call btrfs_search_slot with cow=1 on the
42113f157a2fSChris Mason  * key and get a writable path.
42123f157a2fSChris Mason  *
42133f157a2fSChris Mason  * This honors path->lowest_level to prevent descent past a given level
42143f157a2fSChris Mason  * of the tree.
42153f157a2fSChris Mason  *
4216d352ac68SChris Mason  * min_trans indicates the oldest transaction that you are interested
4217d352ac68SChris Mason  * in walking through.  Any nodes or leaves older than min_trans are
4218d352ac68SChris Mason  * skipped over (without reading them).
4219d352ac68SChris Mason  *
42203f157a2fSChris Mason  * returns zero if something useful was found, < 0 on error and 1 if there
42213f157a2fSChris Mason  * was nothing in the tree that matched the search criteria.
42223f157a2fSChris Mason  */
42233f157a2fSChris Mason int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4224de78b51aSEric Sandeen 			 struct btrfs_path *path,
42253f157a2fSChris Mason 			 u64 min_trans)
42263f157a2fSChris Mason {
42273f157a2fSChris Mason 	struct extent_buffer *cur;
42283f157a2fSChris Mason 	struct btrfs_key found_key;
42293f157a2fSChris Mason 	int slot;
42309652480bSYan 	int sret;
42313f157a2fSChris Mason 	u32 nritems;
42323f157a2fSChris Mason 	int level;
42333f157a2fSChris Mason 	int ret = 1;
4234f98de9b9SFilipe Manana 	int keep_locks = path->keep_locks;
42353f157a2fSChris Mason 
4236f98de9b9SFilipe Manana 	path->keep_locks = 1;
42373f157a2fSChris Mason again:
4238bd681513SChris Mason 	cur = btrfs_read_lock_root_node(root);
42393f157a2fSChris Mason 	level = btrfs_header_level(cur);
4240e02119d5SChris Mason 	WARN_ON(path->nodes[level]);
42413f157a2fSChris Mason 	path->nodes[level] = cur;
4242bd681513SChris Mason 	path->locks[level] = BTRFS_READ_LOCK;
42433f157a2fSChris Mason 
42443f157a2fSChris Mason 	if (btrfs_header_generation(cur) < min_trans) {
42453f157a2fSChris Mason 		ret = 1;
42463f157a2fSChris Mason 		goto out;
42473f157a2fSChris Mason 	}
42483f157a2fSChris Mason 	while (1) {
42493f157a2fSChris Mason 		nritems = btrfs_header_nritems(cur);
42503f157a2fSChris Mason 		level = btrfs_header_level(cur);
4251e3b83361SQu Wenruo 		sret = btrfs_bin_search(cur, min_key, &slot);
4252cbca7d59SFilipe Manana 		if (sret < 0) {
4253cbca7d59SFilipe Manana 			ret = sret;
4254cbca7d59SFilipe Manana 			goto out;
4255cbca7d59SFilipe Manana 		}
42563f157a2fSChris Mason 
4257323ac95bSChris Mason 		/* at the lowest level, we're done, setup the path and exit */
4258323ac95bSChris Mason 		if (level == path->lowest_level) {
4259e02119d5SChris Mason 			if (slot >= nritems)
4260e02119d5SChris Mason 				goto find_next_key;
42613f157a2fSChris Mason 			ret = 0;
42623f157a2fSChris Mason 			path->slots[level] = slot;
42633f157a2fSChris Mason 			btrfs_item_key_to_cpu(cur, &found_key, slot);
42643f157a2fSChris Mason 			goto out;
42653f157a2fSChris Mason 		}
42669652480bSYan 		if (sret && slot > 0)
42679652480bSYan 			slot--;
42683f157a2fSChris Mason 		/*
4269de78b51aSEric Sandeen 		 * check this node pointer against the min_trans parameters.
4270260db43cSRandy Dunlap 		 * If it is too old, skip to the next one.
42713f157a2fSChris Mason 		 */
42723f157a2fSChris Mason 		while (slot < nritems) {
42733f157a2fSChris Mason 			u64 gen;
4274e02119d5SChris Mason 
42753f157a2fSChris Mason 			gen = btrfs_node_ptr_generation(cur, slot);
42763f157a2fSChris Mason 			if (gen < min_trans) {
42773f157a2fSChris Mason 				slot++;
42783f157a2fSChris Mason 				continue;
42793f157a2fSChris Mason 			}
42803f157a2fSChris Mason 			break;
42813f157a2fSChris Mason 		}
4282e02119d5SChris Mason find_next_key:
42833f157a2fSChris Mason 		/*
42843f157a2fSChris Mason 		 * we didn't find a candidate key in this node, walk forward
42853f157a2fSChris Mason 		 * and find another one
42863f157a2fSChris Mason 		 */
42873f157a2fSChris Mason 		if (slot >= nritems) {
4288e02119d5SChris Mason 			path->slots[level] = slot;
4289e02119d5SChris Mason 			sret = btrfs_find_next_key(root, path, min_key, level,
4290de78b51aSEric Sandeen 						  min_trans);
4291e02119d5SChris Mason 			if (sret == 0) {
4292b3b4aa74SDavid Sterba 				btrfs_release_path(path);
42933f157a2fSChris Mason 				goto again;
42943f157a2fSChris Mason 			} else {
42953f157a2fSChris Mason 				goto out;
42963f157a2fSChris Mason 			}
42973f157a2fSChris Mason 		}
42983f157a2fSChris Mason 		/* save our key for returning back */
42993f157a2fSChris Mason 		btrfs_node_key_to_cpu(cur, &found_key, slot);
43003f157a2fSChris Mason 		path->slots[level] = slot;
43013f157a2fSChris Mason 		if (level == path->lowest_level) {
43023f157a2fSChris Mason 			ret = 0;
43033f157a2fSChris Mason 			goto out;
43043f157a2fSChris Mason 		}
43054b231ae4SDavid Sterba 		cur = btrfs_read_node_slot(cur, slot);
4306fb770ae4SLiu Bo 		if (IS_ERR(cur)) {
4307fb770ae4SLiu Bo 			ret = PTR_ERR(cur);
4308fb770ae4SLiu Bo 			goto out;
4309fb770ae4SLiu Bo 		}
43103f157a2fSChris Mason 
4311bd681513SChris Mason 		btrfs_tree_read_lock(cur);
4312b4ce94deSChris Mason 
4313bd681513SChris Mason 		path->locks[level - 1] = BTRFS_READ_LOCK;
43143f157a2fSChris Mason 		path->nodes[level - 1] = cur;
4315f7c79f30SChris Mason 		unlock_up(path, level, 1, 0, NULL);
43163f157a2fSChris Mason 	}
43173f157a2fSChris Mason out:
4318f98de9b9SFilipe Manana 	path->keep_locks = keep_locks;
4319f98de9b9SFilipe Manana 	if (ret == 0) {
4320f98de9b9SFilipe Manana 		btrfs_unlock_up_safe(path, path->lowest_level + 1);
4321f98de9b9SFilipe Manana 		memcpy(min_key, &found_key, sizeof(found_key));
4322f98de9b9SFilipe Manana 	}
43233f157a2fSChris Mason 	return ret;
43243f157a2fSChris Mason }
43253f157a2fSChris Mason 
43263f157a2fSChris Mason /*
43273f157a2fSChris Mason  * this is similar to btrfs_next_leaf, but does not try to preserve
43283f157a2fSChris Mason  * and fixup the path.  It looks for and returns the next key in the
4329de78b51aSEric Sandeen  * tree based on the current path and the min_trans parameters.
43303f157a2fSChris Mason  *
43313f157a2fSChris Mason  * 0 is returned if another key is found, < 0 if there are any errors
43323f157a2fSChris Mason  * and 1 is returned if there are no higher keys in the tree
43333f157a2fSChris Mason  *
43343f157a2fSChris Mason  * path->keep_locks should be set to 1 on the search made before
43353f157a2fSChris Mason  * calling this function.
43363f157a2fSChris Mason  */
4337e7a84565SChris Mason int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4338de78b51aSEric Sandeen 			struct btrfs_key *key, int level, u64 min_trans)
4339e7a84565SChris Mason {
4340e7a84565SChris Mason 	int slot;
4341e7a84565SChris Mason 	struct extent_buffer *c;
4342e7a84565SChris Mason 
43436a9fb468SJosef Bacik 	WARN_ON(!path->keep_locks && !path->skip_locking);
4344e7a84565SChris Mason 	while (level < BTRFS_MAX_LEVEL) {
4345e7a84565SChris Mason 		if (!path->nodes[level])
4346e7a84565SChris Mason 			return 1;
4347e7a84565SChris Mason 
4348e7a84565SChris Mason 		slot = path->slots[level] + 1;
4349e7a84565SChris Mason 		c = path->nodes[level];
43503f157a2fSChris Mason next:
4351e7a84565SChris Mason 		if (slot >= btrfs_header_nritems(c)) {
435233c66f43SYan Zheng 			int ret;
435333c66f43SYan Zheng 			int orig_lowest;
435433c66f43SYan Zheng 			struct btrfs_key cur_key;
435533c66f43SYan Zheng 			if (level + 1 >= BTRFS_MAX_LEVEL ||
435633c66f43SYan Zheng 			    !path->nodes[level + 1])
4357e7a84565SChris Mason 				return 1;
435833c66f43SYan Zheng 
43596a9fb468SJosef Bacik 			if (path->locks[level + 1] || path->skip_locking) {
436033c66f43SYan Zheng 				level++;
4361e7a84565SChris Mason 				continue;
4362e7a84565SChris Mason 			}
436333c66f43SYan Zheng 
436433c66f43SYan Zheng 			slot = btrfs_header_nritems(c) - 1;
436533c66f43SYan Zheng 			if (level == 0)
436633c66f43SYan Zheng 				btrfs_item_key_to_cpu(c, &cur_key, slot);
436733c66f43SYan Zheng 			else
436833c66f43SYan Zheng 				btrfs_node_key_to_cpu(c, &cur_key, slot);
436933c66f43SYan Zheng 
437033c66f43SYan Zheng 			orig_lowest = path->lowest_level;
4371b3b4aa74SDavid Sterba 			btrfs_release_path(path);
437233c66f43SYan Zheng 			path->lowest_level = level;
437333c66f43SYan Zheng 			ret = btrfs_search_slot(NULL, root, &cur_key, path,
437433c66f43SYan Zheng 						0, 0);
437533c66f43SYan Zheng 			path->lowest_level = orig_lowest;
437633c66f43SYan Zheng 			if (ret < 0)
437733c66f43SYan Zheng 				return ret;
437833c66f43SYan Zheng 
437933c66f43SYan Zheng 			c = path->nodes[level];
438033c66f43SYan Zheng 			slot = path->slots[level];
438133c66f43SYan Zheng 			if (ret == 0)
438233c66f43SYan Zheng 				slot++;
438333c66f43SYan Zheng 			goto next;
438433c66f43SYan Zheng 		}
438533c66f43SYan Zheng 
4386e7a84565SChris Mason 		if (level == 0)
4387e7a84565SChris Mason 			btrfs_item_key_to_cpu(c, key, slot);
43883f157a2fSChris Mason 		else {
43893f157a2fSChris Mason 			u64 gen = btrfs_node_ptr_generation(c, slot);
43903f157a2fSChris Mason 
43913f157a2fSChris Mason 			if (gen < min_trans) {
43923f157a2fSChris Mason 				slot++;
43933f157a2fSChris Mason 				goto next;
43943f157a2fSChris Mason 			}
4395e7a84565SChris Mason 			btrfs_node_key_to_cpu(c, key, slot);
43963f157a2fSChris Mason 		}
4397e7a84565SChris Mason 		return 0;
4398e7a84565SChris Mason 	}
4399e7a84565SChris Mason 	return 1;
4400e7a84565SChris Mason }
4401e7a84565SChris Mason 
44027bb86316SChris Mason /*
4403925baeddSChris Mason  * search the tree again to find a leaf with greater keys
44040f70abe2SChris Mason  * returns 0 if it found something or 1 if there are no greater leaves.
44050f70abe2SChris Mason  * returns < 0 on io errors.
440697571fd0SChris Mason  */
4407234b63a0SChris Mason int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4408d97e63b6SChris Mason {
44093d7806ecSJan Schmidt 	return btrfs_next_old_leaf(root, path, 0);
44103d7806ecSJan Schmidt }
44113d7806ecSJan Schmidt 
44123d7806ecSJan Schmidt int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
44133d7806ecSJan Schmidt 			u64 time_seq)
44143d7806ecSJan Schmidt {
4415d97e63b6SChris Mason 	int slot;
44168e73f275SChris Mason 	int level;
44175f39d397SChris Mason 	struct extent_buffer *c;
44188e73f275SChris Mason 	struct extent_buffer *next;
4419925baeddSChris Mason 	struct btrfs_key key;
4420925baeddSChris Mason 	u32 nritems;
4421925baeddSChris Mason 	int ret;
44220e46318dSJosef Bacik 	int i;
4423925baeddSChris Mason 
4424925baeddSChris Mason 	nritems = btrfs_header_nritems(path->nodes[0]);
4425d397712bSChris Mason 	if (nritems == 0)
4426925baeddSChris Mason 		return 1;
4427925baeddSChris Mason 
44288e73f275SChris Mason 	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
44298e73f275SChris Mason again:
44308e73f275SChris Mason 	level = 1;
44318e73f275SChris Mason 	next = NULL;
4432b3b4aa74SDavid Sterba 	btrfs_release_path(path);
44338e73f275SChris Mason 
4434a2135011SChris Mason 	path->keep_locks = 1;
44358e73f275SChris Mason 
44363d7806ecSJan Schmidt 	if (time_seq)
44373d7806ecSJan Schmidt 		ret = btrfs_search_old_slot(root, &key, path, time_seq);
44383d7806ecSJan Schmidt 	else
4439925baeddSChris Mason 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4440925baeddSChris Mason 	path->keep_locks = 0;
4441925baeddSChris Mason 
4442925baeddSChris Mason 	if (ret < 0)
4443925baeddSChris Mason 		return ret;
4444925baeddSChris Mason 
4445a2135011SChris Mason 	nritems = btrfs_header_nritems(path->nodes[0]);
4446168fd7d2SChris Mason 	/*
4447168fd7d2SChris Mason 	 * by releasing the path above we dropped all our locks.  A balance
4448168fd7d2SChris Mason 	 * could have added more items next to the key that used to be
4449168fd7d2SChris Mason 	 * at the very end of the block.  So, check again here and
4450168fd7d2SChris Mason 	 * advance the path if there are now more items available.
4451168fd7d2SChris Mason 	 */
4452a2135011SChris Mason 	if (nritems > 0 && path->slots[0] < nritems - 1) {
4453e457afecSYan Zheng 		if (ret == 0)
4454168fd7d2SChris Mason 			path->slots[0]++;
44558e73f275SChris Mason 		ret = 0;
4456925baeddSChris Mason 		goto done;
4457925baeddSChris Mason 	}
44580b43e04fSLiu Bo 	/*
44590b43e04fSLiu Bo 	 * So the above check misses one case:
44600b43e04fSLiu Bo 	 * - after releasing the path above, someone has removed the item that
44610b43e04fSLiu Bo 	 *   used to be at the very end of the block, and balance between leafs
44620b43e04fSLiu Bo 	 *   gets another one with bigger key.offset to replace it.
44630b43e04fSLiu Bo 	 *
44640b43e04fSLiu Bo 	 * This one should be returned as well, or we can get leaf corruption
44650b43e04fSLiu Bo 	 * later(esp. in __btrfs_drop_extents()).
44660b43e04fSLiu Bo 	 *
44670b43e04fSLiu Bo 	 * And a bit more explanation about this check,
44680b43e04fSLiu Bo 	 * with ret > 0, the key isn't found, the path points to the slot
44690b43e04fSLiu Bo 	 * where it should be inserted, so the path->slots[0] item must be the
44700b43e04fSLiu Bo 	 * bigger one.
44710b43e04fSLiu Bo 	 */
44720b43e04fSLiu Bo 	if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
44730b43e04fSLiu Bo 		ret = 0;
44740b43e04fSLiu Bo 		goto done;
44750b43e04fSLiu Bo 	}
4476d97e63b6SChris Mason 
4477234b63a0SChris Mason 	while (level < BTRFS_MAX_LEVEL) {
44788e73f275SChris Mason 		if (!path->nodes[level]) {
44798e73f275SChris Mason 			ret = 1;
44808e73f275SChris Mason 			goto done;
44818e73f275SChris Mason 		}
44825f39d397SChris Mason 
4483d97e63b6SChris Mason 		slot = path->slots[level] + 1;
4484d97e63b6SChris Mason 		c = path->nodes[level];
44855f39d397SChris Mason 		if (slot >= btrfs_header_nritems(c)) {
4486d97e63b6SChris Mason 			level++;
44878e73f275SChris Mason 			if (level == BTRFS_MAX_LEVEL) {
44888e73f275SChris Mason 				ret = 1;
44898e73f275SChris Mason 				goto done;
44908e73f275SChris Mason 			}
4491d97e63b6SChris Mason 			continue;
4492d97e63b6SChris Mason 		}
44935f39d397SChris Mason 
44940e46318dSJosef Bacik 
44950e46318dSJosef Bacik 		/*
44960e46318dSJosef Bacik 		 * Our current level is where we're going to start from, and to
44970e46318dSJosef Bacik 		 * make sure lockdep doesn't complain we need to drop our locks
44980e46318dSJosef Bacik 		 * and nodes from 0 to our current level.
44990e46318dSJosef Bacik 		 */
45000e46318dSJosef Bacik 		for (i = 0; i < level; i++) {
45010e46318dSJosef Bacik 			if (path->locks[level]) {
45020e46318dSJosef Bacik 				btrfs_tree_read_unlock(path->nodes[i]);
45030e46318dSJosef Bacik 				path->locks[i] = 0;
45040e46318dSJosef Bacik 			}
45050e46318dSJosef Bacik 			free_extent_buffer(path->nodes[i]);
45060e46318dSJosef Bacik 			path->nodes[i] = NULL;
4507925baeddSChris Mason 		}
45085f39d397SChris Mason 
45098e73f275SChris Mason 		next = c;
4510d07b8528SLiu Bo 		ret = read_block_for_search(root, path, &next, level,
4511cda79c54SDavid Sterba 					    slot, &key);
45128e73f275SChris Mason 		if (ret == -EAGAIN)
45138e73f275SChris Mason 			goto again;
45145f39d397SChris Mason 
451576a05b35SChris Mason 		if (ret < 0) {
4516b3b4aa74SDavid Sterba 			btrfs_release_path(path);
451776a05b35SChris Mason 			goto done;
451876a05b35SChris Mason 		}
451976a05b35SChris Mason 
45205cd57b2cSChris Mason 		if (!path->skip_locking) {
4521bd681513SChris Mason 			ret = btrfs_try_tree_read_lock(next);
4522d42244a0SJan Schmidt 			if (!ret && time_seq) {
4523d42244a0SJan Schmidt 				/*
4524d42244a0SJan Schmidt 				 * If we don't get the lock, we may be racing
4525d42244a0SJan Schmidt 				 * with push_leaf_left, holding that lock while
4526d42244a0SJan Schmidt 				 * itself waiting for the leaf we've currently
4527d42244a0SJan Schmidt 				 * locked. To solve this situation, we give up
4528d42244a0SJan Schmidt 				 * on our lock and cycle.
4529d42244a0SJan Schmidt 				 */
4530cf538830SJan Schmidt 				free_extent_buffer(next);
4531d42244a0SJan Schmidt 				btrfs_release_path(path);
4532d42244a0SJan Schmidt 				cond_resched();
4533d42244a0SJan Schmidt 				goto again;
4534d42244a0SJan Schmidt 			}
45350e46318dSJosef Bacik 			if (!ret)
45360e46318dSJosef Bacik 				btrfs_tree_read_lock(next);
4537bd681513SChris Mason 		}
4538d97e63b6SChris Mason 		break;
4539d97e63b6SChris Mason 	}
4540d97e63b6SChris Mason 	path->slots[level] = slot;
4541d97e63b6SChris Mason 	while (1) {
4542d97e63b6SChris Mason 		level--;
4543d97e63b6SChris Mason 		path->nodes[level] = next;
4544d97e63b6SChris Mason 		path->slots[level] = 0;
4545a74a4b97SChris Mason 		if (!path->skip_locking)
4546ffeb03cfSJosef Bacik 			path->locks[level] = BTRFS_READ_LOCK;
4547d97e63b6SChris Mason 		if (!level)
4548d97e63b6SChris Mason 			break;
4549b4ce94deSChris Mason 
4550d07b8528SLiu Bo 		ret = read_block_for_search(root, path, &next, level,
4551cda79c54SDavid Sterba 					    0, &key);
45528e73f275SChris Mason 		if (ret == -EAGAIN)
45538e73f275SChris Mason 			goto again;
45548e73f275SChris Mason 
455576a05b35SChris Mason 		if (ret < 0) {
4556b3b4aa74SDavid Sterba 			btrfs_release_path(path);
455776a05b35SChris Mason 			goto done;
455876a05b35SChris Mason 		}
455976a05b35SChris Mason 
4560ffeb03cfSJosef Bacik 		if (!path->skip_locking)
45610e46318dSJosef Bacik 			btrfs_tree_read_lock(next);
4562d97e63b6SChris Mason 	}
45638e73f275SChris Mason 	ret = 0;
4564925baeddSChris Mason done:
4565f7c79f30SChris Mason 	unlock_up(path, 0, 1, 0, NULL);
45668e73f275SChris Mason 
45678e73f275SChris Mason 	return ret;
4568d97e63b6SChris Mason }
45690b86a832SChris Mason 
45703f157a2fSChris Mason /*
45713f157a2fSChris Mason  * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
45723f157a2fSChris Mason  * searching until it gets past min_objectid or finds an item of 'type'
45733f157a2fSChris Mason  *
45743f157a2fSChris Mason  * returns 0 if something is found, 1 if nothing was found and < 0 on error
45753f157a2fSChris Mason  */
45760b86a832SChris Mason int btrfs_previous_item(struct btrfs_root *root,
45770b86a832SChris Mason 			struct btrfs_path *path, u64 min_objectid,
45780b86a832SChris Mason 			int type)
45790b86a832SChris Mason {
45800b86a832SChris Mason 	struct btrfs_key found_key;
45810b86a832SChris Mason 	struct extent_buffer *leaf;
4582e02119d5SChris Mason 	u32 nritems;
45830b86a832SChris Mason 	int ret;
45840b86a832SChris Mason 
45850b86a832SChris Mason 	while (1) {
45860b86a832SChris Mason 		if (path->slots[0] == 0) {
45870b86a832SChris Mason 			ret = btrfs_prev_leaf(root, path);
45880b86a832SChris Mason 			if (ret != 0)
45890b86a832SChris Mason 				return ret;
45900b86a832SChris Mason 		} else {
45910b86a832SChris Mason 			path->slots[0]--;
45920b86a832SChris Mason 		}
45930b86a832SChris Mason 		leaf = path->nodes[0];
4594e02119d5SChris Mason 		nritems = btrfs_header_nritems(leaf);
4595e02119d5SChris Mason 		if (nritems == 0)
4596e02119d5SChris Mason 			return 1;
4597e02119d5SChris Mason 		if (path->slots[0] == nritems)
4598e02119d5SChris Mason 			path->slots[0]--;
4599e02119d5SChris Mason 
46000b86a832SChris Mason 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4601e02119d5SChris Mason 		if (found_key.objectid < min_objectid)
4602e02119d5SChris Mason 			break;
46030a4eefbbSYan Zheng 		if (found_key.type == type)
46040a4eefbbSYan Zheng 			return 0;
4605e02119d5SChris Mason 		if (found_key.objectid == min_objectid &&
4606e02119d5SChris Mason 		    found_key.type < type)
4607e02119d5SChris Mason 			break;
46080b86a832SChris Mason 	}
46090b86a832SChris Mason 	return 1;
46100b86a832SChris Mason }
4611ade2e0b3SWang Shilong 
4612ade2e0b3SWang Shilong /*
4613ade2e0b3SWang Shilong  * search in extent tree to find a previous Metadata/Data extent item with
4614ade2e0b3SWang Shilong  * min objecitd.
4615ade2e0b3SWang Shilong  *
4616ade2e0b3SWang Shilong  * returns 0 if something is found, 1 if nothing was found and < 0 on error
4617ade2e0b3SWang Shilong  */
4618ade2e0b3SWang Shilong int btrfs_previous_extent_item(struct btrfs_root *root,
4619ade2e0b3SWang Shilong 			struct btrfs_path *path, u64 min_objectid)
4620ade2e0b3SWang Shilong {
4621ade2e0b3SWang Shilong 	struct btrfs_key found_key;
4622ade2e0b3SWang Shilong 	struct extent_buffer *leaf;
4623ade2e0b3SWang Shilong 	u32 nritems;
4624ade2e0b3SWang Shilong 	int ret;
4625ade2e0b3SWang Shilong 
4626ade2e0b3SWang Shilong 	while (1) {
4627ade2e0b3SWang Shilong 		if (path->slots[0] == 0) {
4628ade2e0b3SWang Shilong 			ret = btrfs_prev_leaf(root, path);
4629ade2e0b3SWang Shilong 			if (ret != 0)
4630ade2e0b3SWang Shilong 				return ret;
4631ade2e0b3SWang Shilong 		} else {
4632ade2e0b3SWang Shilong 			path->slots[0]--;
4633ade2e0b3SWang Shilong 		}
4634ade2e0b3SWang Shilong 		leaf = path->nodes[0];
4635ade2e0b3SWang Shilong 		nritems = btrfs_header_nritems(leaf);
4636ade2e0b3SWang Shilong 		if (nritems == 0)
4637ade2e0b3SWang Shilong 			return 1;
4638ade2e0b3SWang Shilong 		if (path->slots[0] == nritems)
4639ade2e0b3SWang Shilong 			path->slots[0]--;
4640ade2e0b3SWang Shilong 
4641ade2e0b3SWang Shilong 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4642ade2e0b3SWang Shilong 		if (found_key.objectid < min_objectid)
4643ade2e0b3SWang Shilong 			break;
4644ade2e0b3SWang Shilong 		if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4645ade2e0b3SWang Shilong 		    found_key.type == BTRFS_METADATA_ITEM_KEY)
4646ade2e0b3SWang Shilong 			return 0;
4647ade2e0b3SWang Shilong 		if (found_key.objectid == min_objectid &&
4648ade2e0b3SWang Shilong 		    found_key.type < BTRFS_EXTENT_ITEM_KEY)
4649ade2e0b3SWang Shilong 			break;
4650ade2e0b3SWang Shilong 	}
4651ade2e0b3SWang Shilong 	return 1;
4652ade2e0b3SWang Shilong }
4653