xref: /openbmc/linux/fs/btrfs/extent-tree.c (revision 46ac4e1f)
1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0
26cbd5570SChris Mason /*
36cbd5570SChris Mason  * Copyright (C) 2007 Oracle.  All rights reserved.
46cbd5570SChris Mason  */
5c1d7c514SDavid Sterba 
6ec6b910fSZach Brown #include <linux/sched.h>
7f361bf4aSIngo Molnar #include <linux/sched/signal.h>
8edbd8d4eSChris Mason #include <linux/pagemap.h>
9ec44a35cSChris Mason #include <linux/writeback.h>
1021af804cSDavid Woodhouse #include <linux/blkdev.h>
11b7a9f29fSChris Mason #include <linux/sort.h>
124184ea7fSChris Mason #include <linux/rcupdate.h>
13817d52f8SJosef Bacik #include <linux/kthread.h>
145a0e3ad6STejun Heo #include <linux/slab.h>
15dff51cd1SDavid Sterba #include <linux/ratelimit.h>
16b150a4f1SJosef Bacik #include <linux/percpu_counter.h>
1769fe2d75SJosef Bacik #include <linux/lockdep.h>
189678c543SNikolay Borisov #include <linux/crc32c.h>
19cfc2de0fSBoris Burkov #include "ctree.h"
20cfc2de0fSBoris Burkov #include "extent-tree.h"
21995946ddSMiao Xie #include "tree-log.h"
22fec577fbSChris Mason #include "disk-io.h"
23fec577fbSChris Mason #include "print-tree.h"
240b86a832SChris Mason #include "volumes.h"
2553b381b3SDavid Woodhouse #include "raid56.h"
26925baeddSChris Mason #include "locking.h"
27fa9c0d79SChris Mason #include "free-space-cache.h"
281e144fb8SOmar Sandoval #include "free-space-tree.h"
296ab0a202SJeff Mahoney #include "sysfs.h"
30fcebe456SJosef Bacik #include "qgroup.h"
31fd708b81SJosef Bacik #include "ref-verify.h"
328719aaaeSJosef Bacik #include "space-info.h"
33d12ffdd1SJosef Bacik #include "block-rsv.h"
3486736342SJosef Bacik #include "delalloc-space.h"
35b0643e59SDennis Zhou #include "discard.h"
36c57dd1f2SQu Wenruo #include "rcu-string.h"
37169e0da9SNaohiro Aota #include "zoned.h"
386143c23cSNaohiro Aota #include "dev-replace.h"
39c7f13d42SJosef Bacik #include "fs.h"
4007e81dc9SJosef Bacik #include "accessors.h"
4145c40c8fSJosef Bacik #include "root-tree.h"
427c8ede16SJosef Bacik #include "file-item.h"
43aa5d3003SJosef Bacik #include "orphan.h"
44103c1972SChristoph Hellwig #include "tree-checker.h"
45fec577fbSChris Mason 
46709c0486SArne Jansen #undef SCRAMBLE_DELAYED_REFS
47709c0486SArne Jansen 
489f9b8e8dSQu Wenruo 
495d4f98a2SYan Zheng static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
50c682f9b3SQu Wenruo 			       struct btrfs_delayed_ref_node *node, u64 parent,
515d4f98a2SYan Zheng 			       u64 root_objectid, u64 owner_objectid,
525d4f98a2SYan Zheng 			       u64 owner_offset, int refs_to_drop,
53c682f9b3SQu Wenruo 			       struct btrfs_delayed_extent_op *extra_op);
545d4f98a2SYan Zheng static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
555d4f98a2SYan Zheng 				    struct extent_buffer *leaf,
565d4f98a2SYan Zheng 				    struct btrfs_extent_item *ei);
575d4f98a2SYan Zheng static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
585d4f98a2SYan Zheng 				      u64 parent, u64 root_objectid,
595d4f98a2SYan Zheng 				      u64 flags, u64 owner, u64 offset,
605d4f98a2SYan Zheng 				      struct btrfs_key *ins, int ref_mod);
615d4f98a2SYan Zheng static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
624e6bd4e0SNikolay Borisov 				     struct btrfs_delayed_ref_node *node,
6321ebfbe7SNikolay Borisov 				     struct btrfs_delayed_extent_op *extent_op);
6411833d66SYan Zheng static int find_next_key(struct btrfs_path *path, int level,
6511833d66SYan Zheng 			 struct btrfs_key *key);
666a63209fSJosef Bacik 
block_group_bits(struct btrfs_block_group * cache,u64 bits)6732da5386SDavid Sterba static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
680f9dd46cSJosef Bacik {
690f9dd46cSJosef Bacik 	return (cache->flags & bits) == bits;
700f9dd46cSJosef Bacik }
710f9dd46cSJosef Bacik 
721a4ed8fdSFilipe Manana /* simple helper to search for an existing data extent at a given offset */
btrfs_lookup_data_extent(struct btrfs_fs_info * fs_info,u64 start,u64 len)732ff7e61eSJeff Mahoney int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
74e02119d5SChris Mason {
7529cbcf40SJosef Bacik 	struct btrfs_root *root = btrfs_extent_root(fs_info, start);
76e02119d5SChris Mason 	int ret;
77e02119d5SChris Mason 	struct btrfs_key key;
7831840ae1SZheng Yan 	struct btrfs_path *path;
79e02119d5SChris Mason 
8031840ae1SZheng Yan 	path = btrfs_alloc_path();
81d8926bb3SMark Fasheh 	if (!path)
82d8926bb3SMark Fasheh 		return -ENOMEM;
83d8926bb3SMark Fasheh 
84e02119d5SChris Mason 	key.objectid = start;
85e02119d5SChris Mason 	key.offset = len;
863173a18fSJosef Bacik 	key.type = BTRFS_EXTENT_ITEM_KEY;
8729cbcf40SJosef Bacik 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8831840ae1SZheng Yan 	btrfs_free_path(path);
897bb86316SChris Mason 	return ret;
907bb86316SChris Mason }
917bb86316SChris Mason 
92d8d5f3e1SChris Mason /*
933173a18fSJosef Bacik  * helper function to lookup reference count and flags of a tree block.
94a22285a6SYan, Zheng  *
95a22285a6SYan, Zheng  * the head node for delayed ref is used to store the sum of all the
96a22285a6SYan, Zheng  * reference count modifications queued up in the rbtree. the head
97a22285a6SYan, Zheng  * node may also store the extent flags to set. This way you can check
98a22285a6SYan, Zheng  * to see what the reference count and extent flags would be if all of
99a22285a6SYan, Zheng  * the delayed refs are not processed.
100a22285a6SYan, Zheng  */
btrfs_lookup_extent_info(struct btrfs_trans_handle * trans,struct btrfs_fs_info * fs_info,u64 bytenr,u64 offset,int metadata,u64 * refs,u64 * flags)101a22285a6SYan, Zheng int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
1022ff7e61eSJeff Mahoney 			     struct btrfs_fs_info *fs_info, u64 bytenr,
1033173a18fSJosef Bacik 			     u64 offset, int metadata, u64 *refs, u64 *flags)
104a22285a6SYan, Zheng {
10529cbcf40SJosef Bacik 	struct btrfs_root *extent_root;
106a22285a6SYan, Zheng 	struct btrfs_delayed_ref_head *head;
107a22285a6SYan, Zheng 	struct btrfs_delayed_ref_root *delayed_refs;
108a22285a6SYan, Zheng 	struct btrfs_path *path;
109a22285a6SYan, Zheng 	struct btrfs_extent_item *ei;
110a22285a6SYan, Zheng 	struct extent_buffer *leaf;
111a22285a6SYan, Zheng 	struct btrfs_key key;
112a22285a6SYan, Zheng 	u32 item_size;
113a22285a6SYan, Zheng 	u64 num_refs;
114a22285a6SYan, Zheng 	u64 extent_flags;
115a22285a6SYan, Zheng 	int ret;
116a22285a6SYan, Zheng 
1173173a18fSJosef Bacik 	/*
1183173a18fSJosef Bacik 	 * If we don't have skinny metadata, don't bother doing anything
1193173a18fSJosef Bacik 	 * different
1203173a18fSJosef Bacik 	 */
1210b246afaSJeff Mahoney 	if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
1220b246afaSJeff Mahoney 		offset = fs_info->nodesize;
1233173a18fSJosef Bacik 		metadata = 0;
1243173a18fSJosef Bacik 	}
1253173a18fSJosef Bacik 
126a22285a6SYan, Zheng 	path = btrfs_alloc_path();
127a22285a6SYan, Zheng 	if (!path)
128a22285a6SYan, Zheng 		return -ENOMEM;
129a22285a6SYan, Zheng 
130a22285a6SYan, Zheng 	if (!trans) {
131a22285a6SYan, Zheng 		path->skip_locking = 1;
132a22285a6SYan, Zheng 		path->search_commit_root = 1;
133a22285a6SYan, Zheng 	}
134639eefc8SFilipe David Borba Manana 
135639eefc8SFilipe David Borba Manana search_again:
136639eefc8SFilipe David Borba Manana 	key.objectid = bytenr;
137639eefc8SFilipe David Borba Manana 	key.offset = offset;
138639eefc8SFilipe David Borba Manana 	if (metadata)
139639eefc8SFilipe David Borba Manana 		key.type = BTRFS_METADATA_ITEM_KEY;
140639eefc8SFilipe David Borba Manana 	else
141639eefc8SFilipe David Borba Manana 		key.type = BTRFS_EXTENT_ITEM_KEY;
142639eefc8SFilipe David Borba Manana 
14329cbcf40SJosef Bacik 	extent_root = btrfs_extent_root(fs_info, bytenr);
14429cbcf40SJosef Bacik 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
145a22285a6SYan, Zheng 	if (ret < 0)
146a22285a6SYan, Zheng 		goto out_free;
147a22285a6SYan, Zheng 
1483173a18fSJosef Bacik 	if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
14974be9510SFilipe David Borba Manana 		if (path->slots[0]) {
15074be9510SFilipe David Borba Manana 			path->slots[0]--;
15174be9510SFilipe David Borba Manana 			btrfs_item_key_to_cpu(path->nodes[0], &key,
15274be9510SFilipe David Borba Manana 					      path->slots[0]);
15374be9510SFilipe David Borba Manana 			if (key.objectid == bytenr &&
15474be9510SFilipe David Borba Manana 			    key.type == BTRFS_EXTENT_ITEM_KEY &&
1550b246afaSJeff Mahoney 			    key.offset == fs_info->nodesize)
15674be9510SFilipe David Borba Manana 				ret = 0;
15774be9510SFilipe David Borba Manana 		}
15874be9510SFilipe David Borba Manana 	}
1593173a18fSJosef Bacik 
160a22285a6SYan, Zheng 	if (ret == 0) {
161a22285a6SYan, Zheng 		leaf = path->nodes[0];
1623212fa14SJosef Bacik 		item_size = btrfs_item_size(leaf, path->slots[0]);
163a22285a6SYan, Zheng 		if (item_size >= sizeof(*ei)) {
164a22285a6SYan, Zheng 			ei = btrfs_item_ptr(leaf, path->slots[0],
165a22285a6SYan, Zheng 					    struct btrfs_extent_item);
166a22285a6SYan, Zheng 			num_refs = btrfs_extent_refs(leaf, ei);
167a22285a6SYan, Zheng 			extent_flags = btrfs_extent_flags(leaf, ei);
168a22285a6SYan, Zheng 		} else {
169182741d2SQu Wenruo 			ret = -EUCLEAN;
170182741d2SQu Wenruo 			btrfs_err(fs_info,
171182741d2SQu Wenruo 			"unexpected extent item size, has %u expect >= %zu",
172182741d2SQu Wenruo 				  item_size, sizeof(*ei));
173ba3c2b19SNikolay Borisov 			if (trans)
174ba3c2b19SNikolay Borisov 				btrfs_abort_transaction(trans, ret);
175ba3c2b19SNikolay Borisov 			else
176ba3c2b19SNikolay Borisov 				btrfs_handle_fs_error(fs_info, ret, NULL);
177ba3c2b19SNikolay Borisov 
178ba3c2b19SNikolay Borisov 			goto out_free;
179a22285a6SYan, Zheng 		}
180ba3c2b19SNikolay Borisov 
181a22285a6SYan, Zheng 		BUG_ON(num_refs == 0);
182a22285a6SYan, Zheng 	} else {
183a22285a6SYan, Zheng 		num_refs = 0;
184a22285a6SYan, Zheng 		extent_flags = 0;
185a22285a6SYan, Zheng 		ret = 0;
186a22285a6SYan, Zheng 	}
187a22285a6SYan, Zheng 
188a22285a6SYan, Zheng 	if (!trans)
189a22285a6SYan, Zheng 		goto out;
190a22285a6SYan, Zheng 
191a22285a6SYan, Zheng 	delayed_refs = &trans->transaction->delayed_refs;
192a22285a6SYan, Zheng 	spin_lock(&delayed_refs->lock);
193f72ad18eSLiu Bo 	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
194a22285a6SYan, Zheng 	if (head) {
195a22285a6SYan, Zheng 		if (!mutex_trylock(&head->mutex)) {
196d278850eSJosef Bacik 			refcount_inc(&head->refs);
197a22285a6SYan, Zheng 			spin_unlock(&delayed_refs->lock);
198a22285a6SYan, Zheng 
199b3b4aa74SDavid Sterba 			btrfs_release_path(path);
200a22285a6SYan, Zheng 
2018cc33e5cSDavid Sterba 			/*
2028cc33e5cSDavid Sterba 			 * Mutex was contended, block until it's released and try
2038cc33e5cSDavid Sterba 			 * again
2048cc33e5cSDavid Sterba 			 */
205a22285a6SYan, Zheng 			mutex_lock(&head->mutex);
206a22285a6SYan, Zheng 			mutex_unlock(&head->mutex);
207d278850eSJosef Bacik 			btrfs_put_delayed_ref_head(head);
208639eefc8SFilipe David Borba Manana 			goto search_again;
209a22285a6SYan, Zheng 		}
210d7df2c79SJosef Bacik 		spin_lock(&head->lock);
211a22285a6SYan, Zheng 		if (head->extent_op && head->extent_op->update_flags)
212a22285a6SYan, Zheng 			extent_flags |= head->extent_op->flags_to_set;
213a22285a6SYan, Zheng 		else
214a22285a6SYan, Zheng 			BUG_ON(num_refs == 0);
215a22285a6SYan, Zheng 
216d278850eSJosef Bacik 		num_refs += head->ref_mod;
217d7df2c79SJosef Bacik 		spin_unlock(&head->lock);
218a22285a6SYan, Zheng 		mutex_unlock(&head->mutex);
219a22285a6SYan, Zheng 	}
220a22285a6SYan, Zheng 	spin_unlock(&delayed_refs->lock);
221a22285a6SYan, Zheng out:
222a22285a6SYan, Zheng 	WARN_ON(num_refs == 0);
223a22285a6SYan, Zheng 	if (refs)
224a22285a6SYan, Zheng 		*refs = num_refs;
225a22285a6SYan, Zheng 	if (flags)
226a22285a6SYan, Zheng 		*flags = extent_flags;
227a22285a6SYan, Zheng out_free:
228a22285a6SYan, Zheng 	btrfs_free_path(path);
229a22285a6SYan, Zheng 	return ret;
230a22285a6SYan, Zheng }
231a22285a6SYan, Zheng 
232a22285a6SYan, Zheng /*
233d8d5f3e1SChris Mason  * Back reference rules.  Back refs have three main goals:
234d8d5f3e1SChris Mason  *
235d8d5f3e1SChris Mason  * 1) differentiate between all holders of references to an extent so that
236d8d5f3e1SChris Mason  *    when a reference is dropped we can make sure it was a valid reference
237d8d5f3e1SChris Mason  *    before freeing the extent.
238d8d5f3e1SChris Mason  *
239d8d5f3e1SChris Mason  * 2) Provide enough information to quickly find the holders of an extent
240d8d5f3e1SChris Mason  *    if we notice a given block is corrupted or bad.
241d8d5f3e1SChris Mason  *
242d8d5f3e1SChris Mason  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
243d8d5f3e1SChris Mason  *    maintenance.  This is actually the same as #2, but with a slightly
244d8d5f3e1SChris Mason  *    different use case.
245d8d5f3e1SChris Mason  *
2465d4f98a2SYan Zheng  * There are two kinds of back refs. The implicit back refs is optimized
2475d4f98a2SYan Zheng  * for pointers in non-shared tree blocks. For a given pointer in a block,
2485d4f98a2SYan Zheng  * back refs of this kind provide information about the block's owner tree
2495d4f98a2SYan Zheng  * and the pointer's key. These information allow us to find the block by
2505d4f98a2SYan Zheng  * b-tree searching. The full back refs is for pointers in tree blocks not
2515d4f98a2SYan Zheng  * referenced by their owner trees. The location of tree block is recorded
2525d4f98a2SYan Zheng  * in the back refs. Actually the full back refs is generic, and can be
2535d4f98a2SYan Zheng  * used in all cases the implicit back refs is used. The major shortcoming
2545d4f98a2SYan Zheng  * of the full back refs is its overhead. Every time a tree block gets
2555d4f98a2SYan Zheng  * COWed, we have to update back refs entry for all pointers in it.
2565d4f98a2SYan Zheng  *
2575d4f98a2SYan Zheng  * For a newly allocated tree block, we use implicit back refs for
2585d4f98a2SYan Zheng  * pointers in it. This means most tree related operations only involve
2595d4f98a2SYan Zheng  * implicit back refs. For a tree block created in old transaction, the
2605d4f98a2SYan Zheng  * only way to drop a reference to it is COW it. So we can detect the
2615d4f98a2SYan Zheng  * event that tree block loses its owner tree's reference and do the
2625d4f98a2SYan Zheng  * back refs conversion.
2635d4f98a2SYan Zheng  *
26401327610SNicholas D Steeves  * When a tree block is COWed through a tree, there are four cases:
2655d4f98a2SYan Zheng  *
2665d4f98a2SYan Zheng  * The reference count of the block is one and the tree is the block's
2675d4f98a2SYan Zheng  * owner tree. Nothing to do in this case.
2685d4f98a2SYan Zheng  *
2695d4f98a2SYan Zheng  * The reference count of the block is one and the tree is not the
2705d4f98a2SYan Zheng  * block's owner tree. In this case, full back refs is used for pointers
2715d4f98a2SYan Zheng  * in the block. Remove these full back refs, add implicit back refs for
2725d4f98a2SYan Zheng  * every pointers in the new block.
2735d4f98a2SYan Zheng  *
2745d4f98a2SYan Zheng  * The reference count of the block is greater than one and the tree is
2755d4f98a2SYan Zheng  * the block's owner tree. In this case, implicit back refs is used for
2765d4f98a2SYan Zheng  * pointers in the block. Add full back refs for every pointers in the
2775d4f98a2SYan Zheng  * block, increase lower level extents' reference counts. The original
2785d4f98a2SYan Zheng  * implicit back refs are entailed to the new block.
2795d4f98a2SYan Zheng  *
2805d4f98a2SYan Zheng  * The reference count of the block is greater than one and the tree is
2815d4f98a2SYan Zheng  * not the block's owner tree. Add implicit back refs for every pointer in
2825d4f98a2SYan Zheng  * the new block, increase lower level extents' reference count.
2835d4f98a2SYan Zheng  *
2845d4f98a2SYan Zheng  * Back Reference Key composing:
2855d4f98a2SYan Zheng  *
2865d4f98a2SYan Zheng  * The key objectid corresponds to the first byte in the extent,
2875d4f98a2SYan Zheng  * The key type is used to differentiate between types of back refs.
2885d4f98a2SYan Zheng  * There are different meanings of the key offset for different types
2895d4f98a2SYan Zheng  * of back refs.
2905d4f98a2SYan Zheng  *
291d8d5f3e1SChris Mason  * File extents can be referenced by:
292d8d5f3e1SChris Mason  *
293d8d5f3e1SChris Mason  * - multiple snapshots, subvolumes, or different generations in one subvol
29431840ae1SZheng Yan  * - different files inside a single subvolume
295d8d5f3e1SChris Mason  * - different offsets inside a file (bookend extents in file.c)
296d8d5f3e1SChris Mason  *
2975d4f98a2SYan Zheng  * The extent ref structure for the implicit back refs has fields for:
298d8d5f3e1SChris Mason  *
299d8d5f3e1SChris Mason  * - Objectid of the subvolume root
300d8d5f3e1SChris Mason  * - objectid of the file holding the reference
3015d4f98a2SYan Zheng  * - original offset in the file
3025d4f98a2SYan Zheng  * - how many bookend extents
30331840ae1SZheng Yan  *
3045d4f98a2SYan Zheng  * The key offset for the implicit back refs is hash of the first
3055d4f98a2SYan Zheng  * three fields.
306d8d5f3e1SChris Mason  *
3075d4f98a2SYan Zheng  * The extent ref structure for the full back refs has field for:
308d8d5f3e1SChris Mason  *
3095d4f98a2SYan Zheng  * - number of pointers in the tree leaf
310d8d5f3e1SChris Mason  *
3115d4f98a2SYan Zheng  * The key offset for the implicit back refs is the first byte of
3125d4f98a2SYan Zheng  * the tree leaf
313d8d5f3e1SChris Mason  *
3145d4f98a2SYan Zheng  * When a file extent is allocated, The implicit back refs is used.
3155d4f98a2SYan Zheng  * the fields are filled in:
316d8d5f3e1SChris Mason  *
3175d4f98a2SYan Zheng  *     (root_key.objectid, inode objectid, offset in file, 1)
3185d4f98a2SYan Zheng  *
3195d4f98a2SYan Zheng  * When a file extent is removed file truncation, we find the
3205d4f98a2SYan Zheng  * corresponding implicit back refs and check the following fields:
3215d4f98a2SYan Zheng  *
3225d4f98a2SYan Zheng  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
323d8d5f3e1SChris Mason  *
324d8d5f3e1SChris Mason  * Btree extents can be referenced by:
325d8d5f3e1SChris Mason  *
326d8d5f3e1SChris Mason  * - Different subvolumes
327d8d5f3e1SChris Mason  *
3285d4f98a2SYan Zheng  * Both the implicit back refs and the full back refs for tree blocks
3295d4f98a2SYan Zheng  * only consist of key. The key offset for the implicit back refs is
3305d4f98a2SYan Zheng  * objectid of block's owner tree. The key offset for the full back refs
3315d4f98a2SYan Zheng  * is the first byte of parent block.
332d8d5f3e1SChris Mason  *
3335d4f98a2SYan Zheng  * When implicit back refs is used, information about the lowest key and
3345d4f98a2SYan Zheng  * level of the tree block are required. These information are stored in
3355d4f98a2SYan Zheng  * tree block info structure.
336d8d5f3e1SChris Mason  */
33731840ae1SZheng Yan 
338167ce953SLiu Bo /*
339167ce953SLiu Bo  * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
34052042d8eSAndrea Gelmini  * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
341167ce953SLiu Bo  * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
342167ce953SLiu Bo  */
btrfs_get_extent_inline_ref_type(const struct extent_buffer * eb,struct btrfs_extent_inline_ref * iref,enum btrfs_inline_ref_type is_data)343167ce953SLiu Bo int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
344167ce953SLiu Bo 				     struct btrfs_extent_inline_ref *iref,
345167ce953SLiu Bo 				     enum btrfs_inline_ref_type is_data)
346167ce953SLiu Bo {
347167ce953SLiu Bo 	int type = btrfs_extent_inline_ref_type(eb, iref);
34864ecdb64SLiu Bo 	u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
349167ce953SLiu Bo 
350167ce953SLiu Bo 	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
351167ce953SLiu Bo 	    type == BTRFS_SHARED_BLOCK_REF_KEY ||
352167ce953SLiu Bo 	    type == BTRFS_SHARED_DATA_REF_KEY ||
353167ce953SLiu Bo 	    type == BTRFS_EXTENT_DATA_REF_KEY) {
354167ce953SLiu Bo 		if (is_data == BTRFS_REF_TYPE_BLOCK) {
35564ecdb64SLiu Bo 			if (type == BTRFS_TREE_BLOCK_REF_KEY)
356167ce953SLiu Bo 				return type;
35764ecdb64SLiu Bo 			if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
35864ecdb64SLiu Bo 				ASSERT(eb->fs_info);
35964ecdb64SLiu Bo 				/*
360ea57788eSQu Wenruo 				 * Every shared one has parent tree block,
361ea57788eSQu Wenruo 				 * which must be aligned to sector size.
36264ecdb64SLiu Bo 				 */
36364ecdb64SLiu Bo 				if (offset &&
364ea57788eSQu Wenruo 				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
36564ecdb64SLiu Bo 					return type;
36664ecdb64SLiu Bo 			}
367167ce953SLiu Bo 		} else if (is_data == BTRFS_REF_TYPE_DATA) {
36864ecdb64SLiu Bo 			if (type == BTRFS_EXTENT_DATA_REF_KEY)
369167ce953SLiu Bo 				return type;
37064ecdb64SLiu Bo 			if (type == BTRFS_SHARED_DATA_REF_KEY) {
37164ecdb64SLiu Bo 				ASSERT(eb->fs_info);
37264ecdb64SLiu Bo 				/*
373ea57788eSQu Wenruo 				 * Every shared one has parent tree block,
374ea57788eSQu Wenruo 				 * which must be aligned to sector size.
37564ecdb64SLiu Bo 				 */
37664ecdb64SLiu Bo 				if (offset &&
377ea57788eSQu Wenruo 				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
37864ecdb64SLiu Bo 					return type;
37964ecdb64SLiu Bo 			}
380167ce953SLiu Bo 		} else {
381167ce953SLiu Bo 			ASSERT(is_data == BTRFS_REF_TYPE_ANY);
382167ce953SLiu Bo 			return type;
383167ce953SLiu Bo 		}
384167ce953SLiu Bo 	}
385167ce953SLiu Bo 
38625761430SQu Wenruo 	WARN_ON(1);
3876c75a589SQu Wenruo 	btrfs_print_leaf(eb);
388ea57788eSQu Wenruo 	btrfs_err(eb->fs_info,
389ea57788eSQu Wenruo 		  "eb %llu iref 0x%lx invalid extent inline ref type %d",
390ea57788eSQu Wenruo 		  eb->start, (unsigned long)iref, type);
391167ce953SLiu Bo 
392167ce953SLiu Bo 	return BTRFS_REF_TYPE_INVALID;
393167ce953SLiu Bo }
394167ce953SLiu Bo 
hash_extent_data_ref(u64 root_objectid,u64 owner,u64 offset)3950785a9aaSQu Wenruo u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
3965d4f98a2SYan Zheng {
3975d4f98a2SYan Zheng 	u32 high_crc = ~(u32)0;
3985d4f98a2SYan Zheng 	u32 low_crc = ~(u32)0;
3995d4f98a2SYan Zheng 	__le64 lenum;
4005d4f98a2SYan Zheng 
4015d4f98a2SYan Zheng 	lenum = cpu_to_le64(root_objectid);
40265019df8SJohannes Thumshirn 	high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
4035d4f98a2SYan Zheng 	lenum = cpu_to_le64(owner);
40465019df8SJohannes Thumshirn 	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
4055d4f98a2SYan Zheng 	lenum = cpu_to_le64(offset);
40665019df8SJohannes Thumshirn 	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
4075d4f98a2SYan Zheng 
4085d4f98a2SYan Zheng 	return ((u64)high_crc << 31) ^ (u64)low_crc;
4095d4f98a2SYan Zheng }
4105d4f98a2SYan Zheng 
hash_extent_data_ref_item(struct extent_buffer * leaf,struct btrfs_extent_data_ref * ref)4115d4f98a2SYan Zheng static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
4125d4f98a2SYan Zheng 				     struct btrfs_extent_data_ref *ref)
4135d4f98a2SYan Zheng {
4145d4f98a2SYan Zheng 	return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
4155d4f98a2SYan Zheng 				    btrfs_extent_data_ref_objectid(leaf, ref),
4165d4f98a2SYan Zheng 				    btrfs_extent_data_ref_offset(leaf, ref));
4175d4f98a2SYan Zheng }
4185d4f98a2SYan Zheng 
match_extent_data_ref(struct extent_buffer * leaf,struct btrfs_extent_data_ref * ref,u64 root_objectid,u64 owner,u64 offset)4195d4f98a2SYan Zheng static int match_extent_data_ref(struct extent_buffer *leaf,
4205d4f98a2SYan Zheng 				 struct btrfs_extent_data_ref *ref,
4215d4f98a2SYan Zheng 				 u64 root_objectid, u64 owner, u64 offset)
4225d4f98a2SYan Zheng {
4235d4f98a2SYan Zheng 	if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
4245d4f98a2SYan Zheng 	    btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
4255d4f98a2SYan Zheng 	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
4265d4f98a2SYan Zheng 		return 0;
4275d4f98a2SYan Zheng 	return 1;
4285d4f98a2SYan Zheng }
4295d4f98a2SYan Zheng 
lookup_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid,u64 owner,u64 offset)4305d4f98a2SYan Zheng static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
4313bb1a1bcSYan Zheng 					   struct btrfs_path *path,
4323bb1a1bcSYan Zheng 					   u64 bytenr, u64 parent,
4335d4f98a2SYan Zheng 					   u64 root_objectid,
4345d4f98a2SYan Zheng 					   u64 owner, u64 offset)
43574493f7aSChris Mason {
43629cbcf40SJosef Bacik 	struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
43774493f7aSChris Mason 	struct btrfs_key key;
4385d4f98a2SYan Zheng 	struct btrfs_extent_data_ref *ref;
43931840ae1SZheng Yan 	struct extent_buffer *leaf;
4405d4f98a2SYan Zheng 	u32 nritems;
44174493f7aSChris Mason 	int ret;
4425d4f98a2SYan Zheng 	int recow;
4435d4f98a2SYan Zheng 	int err = -ENOENT;
44474493f7aSChris Mason 
44574493f7aSChris Mason 	key.objectid = bytenr;
4465d4f98a2SYan Zheng 	if (parent) {
4475d4f98a2SYan Zheng 		key.type = BTRFS_SHARED_DATA_REF_KEY;
44831840ae1SZheng Yan 		key.offset = parent;
4495d4f98a2SYan Zheng 	} else {
4505d4f98a2SYan Zheng 		key.type = BTRFS_EXTENT_DATA_REF_KEY;
4515d4f98a2SYan Zheng 		key.offset = hash_extent_data_ref(root_objectid,
4525d4f98a2SYan Zheng 						  owner, offset);
4535d4f98a2SYan Zheng 	}
4545d4f98a2SYan Zheng again:
4555d4f98a2SYan Zheng 	recow = 0;
4565d4f98a2SYan Zheng 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4575d4f98a2SYan Zheng 	if (ret < 0) {
4585d4f98a2SYan Zheng 		err = ret;
4595d4f98a2SYan Zheng 		goto fail;
4605d4f98a2SYan Zheng 	}
46174493f7aSChris Mason 
4625d4f98a2SYan Zheng 	if (parent) {
4635d4f98a2SYan Zheng 		if (!ret)
4645d4f98a2SYan Zheng 			return 0;
4655d4f98a2SYan Zheng 		goto fail;
46674493f7aSChris Mason 	}
46731840ae1SZheng Yan 
46831840ae1SZheng Yan 	leaf = path->nodes[0];
4695d4f98a2SYan Zheng 	nritems = btrfs_header_nritems(leaf);
4705d4f98a2SYan Zheng 	while (1) {
4715d4f98a2SYan Zheng 		if (path->slots[0] >= nritems) {
4725d4f98a2SYan Zheng 			ret = btrfs_next_leaf(root, path);
4735d4f98a2SYan Zheng 			if (ret < 0)
4745d4f98a2SYan Zheng 				err = ret;
4755d4f98a2SYan Zheng 			if (ret)
4765d4f98a2SYan Zheng 				goto fail;
4775d4f98a2SYan Zheng 
4785d4f98a2SYan Zheng 			leaf = path->nodes[0];
4795d4f98a2SYan Zheng 			nritems = btrfs_header_nritems(leaf);
4805d4f98a2SYan Zheng 			recow = 1;
48131840ae1SZheng Yan 		}
48231840ae1SZheng Yan 
4835d4f98a2SYan Zheng 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4845d4f98a2SYan Zheng 		if (key.objectid != bytenr ||
4855d4f98a2SYan Zheng 		    key.type != BTRFS_EXTENT_DATA_REF_KEY)
4865d4f98a2SYan Zheng 			goto fail;
4875d4f98a2SYan Zheng 
4885d4f98a2SYan Zheng 		ref = btrfs_item_ptr(leaf, path->slots[0],
4895d4f98a2SYan Zheng 				     struct btrfs_extent_data_ref);
4905d4f98a2SYan Zheng 
4915d4f98a2SYan Zheng 		if (match_extent_data_ref(leaf, ref, root_objectid,
4925d4f98a2SYan Zheng 					  owner, offset)) {
4935d4f98a2SYan Zheng 			if (recow) {
494b3b4aa74SDavid Sterba 				btrfs_release_path(path);
4955d4f98a2SYan Zheng 				goto again;
4965d4f98a2SYan Zheng 			}
4975d4f98a2SYan Zheng 			err = 0;
4985d4f98a2SYan Zheng 			break;
4995d4f98a2SYan Zheng 		}
5005d4f98a2SYan Zheng 		path->slots[0]++;
5015d4f98a2SYan Zheng 	}
5025d4f98a2SYan Zheng fail:
5035d4f98a2SYan Zheng 	return err;
5045d4f98a2SYan Zheng }
5055d4f98a2SYan Zheng 
insert_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add)5065d4f98a2SYan Zheng static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
50731840ae1SZheng Yan 					   struct btrfs_path *path,
50831840ae1SZheng Yan 					   u64 bytenr, u64 parent,
5095d4f98a2SYan Zheng 					   u64 root_objectid, u64 owner,
5105d4f98a2SYan Zheng 					   u64 offset, int refs_to_add)
51131840ae1SZheng Yan {
51229cbcf40SJosef Bacik 	struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
51331840ae1SZheng Yan 	struct btrfs_key key;
51431840ae1SZheng Yan 	struct extent_buffer *leaf;
5155d4f98a2SYan Zheng 	u32 size;
51631840ae1SZheng Yan 	u32 num_refs;
51731840ae1SZheng Yan 	int ret;
51831840ae1SZheng Yan 
51931840ae1SZheng Yan 	key.objectid = bytenr;
5205d4f98a2SYan Zheng 	if (parent) {
5215d4f98a2SYan Zheng 		key.type = BTRFS_SHARED_DATA_REF_KEY;
52231840ae1SZheng Yan 		key.offset = parent;
5235d4f98a2SYan Zheng 		size = sizeof(struct btrfs_shared_data_ref);
52431840ae1SZheng Yan 	} else {
5255d4f98a2SYan Zheng 		key.type = BTRFS_EXTENT_DATA_REF_KEY;
5265d4f98a2SYan Zheng 		key.offset = hash_extent_data_ref(root_objectid,
5275d4f98a2SYan Zheng 						  owner, offset);
5285d4f98a2SYan Zheng 		size = sizeof(struct btrfs_extent_data_ref);
52931840ae1SZheng Yan 	}
5305d4f98a2SYan Zheng 
5315d4f98a2SYan Zheng 	ret = btrfs_insert_empty_item(trans, root, path, &key, size);
5325d4f98a2SYan Zheng 	if (ret && ret != -EEXIST)
5335d4f98a2SYan Zheng 		goto fail;
5345d4f98a2SYan Zheng 
5355d4f98a2SYan Zheng 	leaf = path->nodes[0];
5365d4f98a2SYan Zheng 	if (parent) {
5375d4f98a2SYan Zheng 		struct btrfs_shared_data_ref *ref;
5385d4f98a2SYan Zheng 		ref = btrfs_item_ptr(leaf, path->slots[0],
5395d4f98a2SYan Zheng 				     struct btrfs_shared_data_ref);
5405d4f98a2SYan Zheng 		if (ret == 0) {
5415d4f98a2SYan Zheng 			btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
5425d4f98a2SYan Zheng 		} else {
5435d4f98a2SYan Zheng 			num_refs = btrfs_shared_data_ref_count(leaf, ref);
5445d4f98a2SYan Zheng 			num_refs += refs_to_add;
5455d4f98a2SYan Zheng 			btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
5465d4f98a2SYan Zheng 		}
5475d4f98a2SYan Zheng 	} else {
5485d4f98a2SYan Zheng 		struct btrfs_extent_data_ref *ref;
5495d4f98a2SYan Zheng 		while (ret == -EEXIST) {
5505d4f98a2SYan Zheng 			ref = btrfs_item_ptr(leaf, path->slots[0],
5515d4f98a2SYan Zheng 					     struct btrfs_extent_data_ref);
5525d4f98a2SYan Zheng 			if (match_extent_data_ref(leaf, ref, root_objectid,
5535d4f98a2SYan Zheng 						  owner, offset))
5545d4f98a2SYan Zheng 				break;
555b3b4aa74SDavid Sterba 			btrfs_release_path(path);
5565d4f98a2SYan Zheng 			key.offset++;
5575d4f98a2SYan Zheng 			ret = btrfs_insert_empty_item(trans, root, path, &key,
5585d4f98a2SYan Zheng 						      size);
5595d4f98a2SYan Zheng 			if (ret && ret != -EEXIST)
5605d4f98a2SYan Zheng 				goto fail;
5615d4f98a2SYan Zheng 
5625d4f98a2SYan Zheng 			leaf = path->nodes[0];
5635d4f98a2SYan Zheng 		}
5645d4f98a2SYan Zheng 		ref = btrfs_item_ptr(leaf, path->slots[0],
5655d4f98a2SYan Zheng 				     struct btrfs_extent_data_ref);
5665d4f98a2SYan Zheng 		if (ret == 0) {
5675d4f98a2SYan Zheng 			btrfs_set_extent_data_ref_root(leaf, ref,
5685d4f98a2SYan Zheng 						       root_objectid);
5695d4f98a2SYan Zheng 			btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
5705d4f98a2SYan Zheng 			btrfs_set_extent_data_ref_offset(leaf, ref, offset);
5715d4f98a2SYan Zheng 			btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
5725d4f98a2SYan Zheng 		} else {
5735d4f98a2SYan Zheng 			num_refs = btrfs_extent_data_ref_count(leaf, ref);
5745d4f98a2SYan Zheng 			num_refs += refs_to_add;
5755d4f98a2SYan Zheng 			btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
5765d4f98a2SYan Zheng 		}
5775d4f98a2SYan Zheng 	}
578d5e09e38SFilipe Manana 	btrfs_mark_buffer_dirty(trans, leaf);
5795d4f98a2SYan Zheng 	ret = 0;
5805d4f98a2SYan Zheng fail:
581b3b4aa74SDavid Sterba 	btrfs_release_path(path);
5827bb86316SChris Mason 	return ret;
58374493f7aSChris Mason }
58474493f7aSChris Mason 
remove_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int refs_to_drop)5855d4f98a2SYan Zheng static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
58676d76e78SJosef Bacik 					   struct btrfs_root *root,
58756bec294SChris Mason 					   struct btrfs_path *path,
5885b2a54bbSJosef Bacik 					   int refs_to_drop)
58931840ae1SZheng Yan {
5905d4f98a2SYan Zheng 	struct btrfs_key key;
5915d4f98a2SYan Zheng 	struct btrfs_extent_data_ref *ref1 = NULL;
5925d4f98a2SYan Zheng 	struct btrfs_shared_data_ref *ref2 = NULL;
59331840ae1SZheng Yan 	struct extent_buffer *leaf;
5945d4f98a2SYan Zheng 	u32 num_refs = 0;
59531840ae1SZheng Yan 	int ret = 0;
59631840ae1SZheng Yan 
59731840ae1SZheng Yan 	leaf = path->nodes[0];
5985d4f98a2SYan Zheng 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5995d4f98a2SYan Zheng 
6005d4f98a2SYan Zheng 	if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6015d4f98a2SYan Zheng 		ref1 = btrfs_item_ptr(leaf, path->slots[0],
6025d4f98a2SYan Zheng 				      struct btrfs_extent_data_ref);
6035d4f98a2SYan Zheng 		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
6045d4f98a2SYan Zheng 	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6055d4f98a2SYan Zheng 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
6065d4f98a2SYan Zheng 				      struct btrfs_shared_data_ref);
6075d4f98a2SYan Zheng 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
6085d4f98a2SYan Zheng 	} else {
609182741d2SQu Wenruo 		btrfs_err(trans->fs_info,
610182741d2SQu Wenruo 			  "unrecognized backref key (%llu %u %llu)",
611182741d2SQu Wenruo 			  key.objectid, key.type, key.offset);
612182741d2SQu Wenruo 		btrfs_abort_transaction(trans, -EUCLEAN);
613182741d2SQu Wenruo 		return -EUCLEAN;
6145d4f98a2SYan Zheng 	}
6155d4f98a2SYan Zheng 
61656bec294SChris Mason 	BUG_ON(num_refs < refs_to_drop);
61756bec294SChris Mason 	num_refs -= refs_to_drop;
6185d4f98a2SYan Zheng 
61931840ae1SZheng Yan 	if (num_refs == 0) {
62076d76e78SJosef Bacik 		ret = btrfs_del_item(trans, root, path);
62131840ae1SZheng Yan 	} else {
6225d4f98a2SYan Zheng 		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
6235d4f98a2SYan Zheng 			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
6245d4f98a2SYan Zheng 		else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
6255d4f98a2SYan Zheng 			btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
626d5e09e38SFilipe Manana 		btrfs_mark_buffer_dirty(trans, leaf);
62731840ae1SZheng Yan 	}
6285d4f98a2SYan Zheng 	return ret;
6295d4f98a2SYan Zheng }
6305d4f98a2SYan Zheng 
extent_data_ref_count(struct btrfs_path * path,struct btrfs_extent_inline_ref * iref)6319ed0dea0SZhaolei static noinline u32 extent_data_ref_count(struct btrfs_path *path,
6325d4f98a2SYan Zheng 					  struct btrfs_extent_inline_ref *iref)
6335d4f98a2SYan Zheng {
6345d4f98a2SYan Zheng 	struct btrfs_key key;
6355d4f98a2SYan Zheng 	struct extent_buffer *leaf;
6365d4f98a2SYan Zheng 	struct btrfs_extent_data_ref *ref1;
6375d4f98a2SYan Zheng 	struct btrfs_shared_data_ref *ref2;
6385d4f98a2SYan Zheng 	u32 num_refs = 0;
6393de28d57SLiu Bo 	int type;
6405d4f98a2SYan Zheng 
6415d4f98a2SYan Zheng 	leaf = path->nodes[0];
6425d4f98a2SYan Zheng 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
643ba3c2b19SNikolay Borisov 
6445d4f98a2SYan Zheng 	if (iref) {
6453de28d57SLiu Bo 		/*
6463de28d57SLiu Bo 		 * If type is invalid, we should have bailed out earlier than
6473de28d57SLiu Bo 		 * this call.
6483de28d57SLiu Bo 		 */
6493de28d57SLiu Bo 		type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
6503de28d57SLiu Bo 		ASSERT(type != BTRFS_REF_TYPE_INVALID);
6513de28d57SLiu Bo 		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
6525d4f98a2SYan Zheng 			ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
6535d4f98a2SYan Zheng 			num_refs = btrfs_extent_data_ref_count(leaf, ref1);
6545d4f98a2SYan Zheng 		} else {
6555d4f98a2SYan Zheng 			ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
6565d4f98a2SYan Zheng 			num_refs = btrfs_shared_data_ref_count(leaf, ref2);
6575d4f98a2SYan Zheng 		}
6585d4f98a2SYan Zheng 	} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6595d4f98a2SYan Zheng 		ref1 = btrfs_item_ptr(leaf, path->slots[0],
6605d4f98a2SYan Zheng 				      struct btrfs_extent_data_ref);
6615d4f98a2SYan Zheng 		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
6625d4f98a2SYan Zheng 	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6635d4f98a2SYan Zheng 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
6645d4f98a2SYan Zheng 				      struct btrfs_shared_data_ref);
6655d4f98a2SYan Zheng 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
6665d4f98a2SYan Zheng 	} else {
6675d4f98a2SYan Zheng 		WARN_ON(1);
6685d4f98a2SYan Zheng 	}
6695d4f98a2SYan Zheng 	return num_refs;
6705d4f98a2SYan Zheng }
6715d4f98a2SYan Zheng 
lookup_tree_block_ref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid)6725d4f98a2SYan Zheng static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
6735d4f98a2SYan Zheng 					  struct btrfs_path *path,
6745d4f98a2SYan Zheng 					  u64 bytenr, u64 parent,
6755d4f98a2SYan Zheng 					  u64 root_objectid)
6765d4f98a2SYan Zheng {
67729cbcf40SJosef Bacik 	struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
6785d4f98a2SYan Zheng 	struct btrfs_key key;
6795d4f98a2SYan Zheng 	int ret;
6805d4f98a2SYan Zheng 
6815d4f98a2SYan Zheng 	key.objectid = bytenr;
6825d4f98a2SYan Zheng 	if (parent) {
6835d4f98a2SYan Zheng 		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
6845d4f98a2SYan Zheng 		key.offset = parent;
6855d4f98a2SYan Zheng 	} else {
6865d4f98a2SYan Zheng 		key.type = BTRFS_TREE_BLOCK_REF_KEY;
6875d4f98a2SYan Zheng 		key.offset = root_objectid;
6885d4f98a2SYan Zheng 	}
6895d4f98a2SYan Zheng 
6905d4f98a2SYan Zheng 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6915d4f98a2SYan Zheng 	if (ret > 0)
6925d4f98a2SYan Zheng 		ret = -ENOENT;
6935d4f98a2SYan Zheng 	return ret;
6945d4f98a2SYan Zheng }
6955d4f98a2SYan Zheng 
insert_tree_block_ref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid)6965d4f98a2SYan Zheng static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
6975d4f98a2SYan Zheng 					  struct btrfs_path *path,
6985d4f98a2SYan Zheng 					  u64 bytenr, u64 parent,
6995d4f98a2SYan Zheng 					  u64 root_objectid)
7005d4f98a2SYan Zheng {
70129cbcf40SJosef Bacik 	struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
7025d4f98a2SYan Zheng 	struct btrfs_key key;
7035d4f98a2SYan Zheng 	int ret;
7045d4f98a2SYan Zheng 
7055d4f98a2SYan Zheng 	key.objectid = bytenr;
7065d4f98a2SYan Zheng 	if (parent) {
7075d4f98a2SYan Zheng 		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
7085d4f98a2SYan Zheng 		key.offset = parent;
7095d4f98a2SYan Zheng 	} else {
7105d4f98a2SYan Zheng 		key.type = BTRFS_TREE_BLOCK_REF_KEY;
7115d4f98a2SYan Zheng 		key.offset = root_objectid;
7125d4f98a2SYan Zheng 	}
7135d4f98a2SYan Zheng 
71429cbcf40SJosef Bacik 	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
715b3b4aa74SDavid Sterba 	btrfs_release_path(path);
7165d4f98a2SYan Zheng 	return ret;
7175d4f98a2SYan Zheng }
7185d4f98a2SYan Zheng 
extent_ref_type(u64 parent,u64 owner)7195d4f98a2SYan Zheng static inline int extent_ref_type(u64 parent, u64 owner)
7205d4f98a2SYan Zheng {
7215d4f98a2SYan Zheng 	int type;
7225d4f98a2SYan Zheng 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7235d4f98a2SYan Zheng 		if (parent > 0)
7245d4f98a2SYan Zheng 			type = BTRFS_SHARED_BLOCK_REF_KEY;
7255d4f98a2SYan Zheng 		else
7265d4f98a2SYan Zheng 			type = BTRFS_TREE_BLOCK_REF_KEY;
7275d4f98a2SYan Zheng 	} else {
7285d4f98a2SYan Zheng 		if (parent > 0)
7295d4f98a2SYan Zheng 			type = BTRFS_SHARED_DATA_REF_KEY;
7305d4f98a2SYan Zheng 		else
7315d4f98a2SYan Zheng 			type = BTRFS_EXTENT_DATA_REF_KEY;
7325d4f98a2SYan Zheng 	}
7335d4f98a2SYan Zheng 	return type;
7345d4f98a2SYan Zheng }
7355d4f98a2SYan Zheng 
find_next_key(struct btrfs_path * path,int level,struct btrfs_key * key)7362c47e605SYan Zheng static int find_next_key(struct btrfs_path *path, int level,
7372c47e605SYan Zheng 			 struct btrfs_key *key)
7385d4f98a2SYan Zheng 
7395d4f98a2SYan Zheng {
7402c47e605SYan Zheng 	for (; level < BTRFS_MAX_LEVEL; level++) {
7415d4f98a2SYan Zheng 		if (!path->nodes[level])
7425d4f98a2SYan Zheng 			break;
7435d4f98a2SYan Zheng 		if (path->slots[level] + 1 >=
7445d4f98a2SYan Zheng 		    btrfs_header_nritems(path->nodes[level]))
7455d4f98a2SYan Zheng 			continue;
7465d4f98a2SYan Zheng 		if (level == 0)
7475d4f98a2SYan Zheng 			btrfs_item_key_to_cpu(path->nodes[level], key,
7485d4f98a2SYan Zheng 					      path->slots[level] + 1);
7495d4f98a2SYan Zheng 		else
7505d4f98a2SYan Zheng 			btrfs_node_key_to_cpu(path->nodes[level], key,
7515d4f98a2SYan Zheng 					      path->slots[level] + 1);
7525d4f98a2SYan Zheng 		return 0;
7535d4f98a2SYan Zheng 	}
7545d4f98a2SYan Zheng 	return 1;
7555d4f98a2SYan Zheng }
7565d4f98a2SYan Zheng 
7575d4f98a2SYan Zheng /*
7585d4f98a2SYan Zheng  * look for inline back ref. if back ref is found, *ref_ret is set
7595d4f98a2SYan Zheng  * to the address of inline back ref, and 0 is returned.
7605d4f98a2SYan Zheng  *
7615d4f98a2SYan Zheng  * if back ref isn't found, *ref_ret is set to the address where it
7625d4f98a2SYan Zheng  * should be inserted, and -ENOENT is returned.
7635d4f98a2SYan Zheng  *
7645d4f98a2SYan Zheng  * if insert is true and there are too many inline back refs, the path
7655d4f98a2SYan Zheng  * points to the extent item, and -EAGAIN is returned.
7665d4f98a2SYan Zheng  *
7675d4f98a2SYan Zheng  * NOTE: inline back refs are ordered in the same way that back ref
7685d4f98a2SYan Zheng  *	 items in the tree are ordered.
7695d4f98a2SYan Zheng  */
7705d4f98a2SYan Zheng static noinline_for_stack
lookup_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_extent_inline_ref ** ref_ret,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset,int insert)7715d4f98a2SYan Zheng int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
7725d4f98a2SYan Zheng 				 struct btrfs_path *path,
7735d4f98a2SYan Zheng 				 struct btrfs_extent_inline_ref **ref_ret,
7745d4f98a2SYan Zheng 				 u64 bytenr, u64 num_bytes,
7755d4f98a2SYan Zheng 				 u64 parent, u64 root_objectid,
7765d4f98a2SYan Zheng 				 u64 owner, u64 offset, int insert)
7775d4f98a2SYan Zheng {
778867cc1fbSNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
77929cbcf40SJosef Bacik 	struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
7805d4f98a2SYan Zheng 	struct btrfs_key key;
7815d4f98a2SYan Zheng 	struct extent_buffer *leaf;
7825d4f98a2SYan Zheng 	struct btrfs_extent_item *ei;
7835d4f98a2SYan Zheng 	struct btrfs_extent_inline_ref *iref;
7845d4f98a2SYan Zheng 	u64 flags;
7855d4f98a2SYan Zheng 	u64 item_size;
7865d4f98a2SYan Zheng 	unsigned long ptr;
7875d4f98a2SYan Zheng 	unsigned long end;
7885d4f98a2SYan Zheng 	int extra_size;
7895d4f98a2SYan Zheng 	int type;
7905d4f98a2SYan Zheng 	int want;
7915d4f98a2SYan Zheng 	int ret;
7925d4f98a2SYan Zheng 	int err = 0;
7930b246afaSJeff Mahoney 	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
7943de28d57SLiu Bo 	int needed;
7955d4f98a2SYan Zheng 
7965d4f98a2SYan Zheng 	key.objectid = bytenr;
7975d4f98a2SYan Zheng 	key.type = BTRFS_EXTENT_ITEM_KEY;
7985d4f98a2SYan Zheng 	key.offset = num_bytes;
7995d4f98a2SYan Zheng 
8005d4f98a2SYan Zheng 	want = extent_ref_type(parent, owner);
8015d4f98a2SYan Zheng 	if (insert) {
8025d4f98a2SYan Zheng 		extra_size = btrfs_extent_inline_ref_size(want);
8039a664971Sethanwu 		path->search_for_extension = 1;
8045d4f98a2SYan Zheng 		path->keep_locks = 1;
8055d4f98a2SYan Zheng 	} else
8065d4f98a2SYan Zheng 		extra_size = -1;
8073173a18fSJosef Bacik 
8083173a18fSJosef Bacik 	/*
80916d1c062SNikolay Borisov 	 * Owner is our level, so we can just add one to get the level for the
81016d1c062SNikolay Borisov 	 * block we are interested in.
8113173a18fSJosef Bacik 	 */
8123173a18fSJosef Bacik 	if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
8133173a18fSJosef Bacik 		key.type = BTRFS_METADATA_ITEM_KEY;
8143173a18fSJosef Bacik 		key.offset = owner;
8153173a18fSJosef Bacik 	}
8163173a18fSJosef Bacik 
8173173a18fSJosef Bacik again:
8185d4f98a2SYan Zheng 	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
8195d4f98a2SYan Zheng 	if (ret < 0) {
8205d4f98a2SYan Zheng 		err = ret;
8215d4f98a2SYan Zheng 		goto out;
8225d4f98a2SYan Zheng 	}
8233173a18fSJosef Bacik 
8243173a18fSJosef Bacik 	/*
8253173a18fSJosef Bacik 	 * We may be a newly converted file system which still has the old fat
8263173a18fSJosef Bacik 	 * extent entries for metadata, so try and see if we have one of those.
8273173a18fSJosef Bacik 	 */
8283173a18fSJosef Bacik 	if (ret > 0 && skinny_metadata) {
8293173a18fSJosef Bacik 		skinny_metadata = false;
8303173a18fSJosef Bacik 		if (path->slots[0]) {
8313173a18fSJosef Bacik 			path->slots[0]--;
8323173a18fSJosef Bacik 			btrfs_item_key_to_cpu(path->nodes[0], &key,
8333173a18fSJosef Bacik 					      path->slots[0]);
8343173a18fSJosef Bacik 			if (key.objectid == bytenr &&
8353173a18fSJosef Bacik 			    key.type == BTRFS_EXTENT_ITEM_KEY &&
8363173a18fSJosef Bacik 			    key.offset == num_bytes)
8373173a18fSJosef Bacik 				ret = 0;
8383173a18fSJosef Bacik 		}
8393173a18fSJosef Bacik 		if (ret) {
8409ce49a0bSFilipe Manana 			key.objectid = bytenr;
8413173a18fSJosef Bacik 			key.type = BTRFS_EXTENT_ITEM_KEY;
8423173a18fSJosef Bacik 			key.offset = num_bytes;
8433173a18fSJosef Bacik 			btrfs_release_path(path);
8443173a18fSJosef Bacik 			goto again;
8453173a18fSJosef Bacik 		}
8463173a18fSJosef Bacik 	}
8473173a18fSJosef Bacik 
84879787eaaSJeff Mahoney 	if (ret && !insert) {
84979787eaaSJeff Mahoney 		err = -ENOENT;
85079787eaaSJeff Mahoney 		goto out;
851fae7f21cSDulshani Gunawardhana 	} else if (WARN_ON(ret)) {
8527f72f505SQu Wenruo 		btrfs_print_leaf(path->nodes[0]);
8537f72f505SQu Wenruo 		btrfs_err(fs_info,
8547f72f505SQu Wenruo "extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
8557f72f505SQu Wenruo 			  bytenr, num_bytes, parent, root_objectid, owner,
8567f72f505SQu Wenruo 			  offset);
857492104c8SJosef Bacik 		err = -EIO;
858492104c8SJosef Bacik 		goto out;
85979787eaaSJeff Mahoney 	}
8605d4f98a2SYan Zheng 
8615d4f98a2SYan Zheng 	leaf = path->nodes[0];
8623212fa14SJosef Bacik 	item_size = btrfs_item_size(leaf, path->slots[0]);
8636d8ff4e4SDavid Sterba 	if (unlikely(item_size < sizeof(*ei))) {
864182741d2SQu Wenruo 		err = -EUCLEAN;
865182741d2SQu Wenruo 		btrfs_err(fs_info,
866182741d2SQu Wenruo 			  "unexpected extent item size, has %llu expect >= %zu",
867182741d2SQu Wenruo 			  item_size, sizeof(*ei));
868ba3c2b19SNikolay Borisov 		btrfs_abort_transaction(trans, err);
869ba3c2b19SNikolay Borisov 		goto out;
870ba3c2b19SNikolay Borisov 	}
8715d4f98a2SYan Zheng 
8725d4f98a2SYan Zheng 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
8735d4f98a2SYan Zheng 	flags = btrfs_extent_flags(leaf, ei);
8745d4f98a2SYan Zheng 
8755d4f98a2SYan Zheng 	ptr = (unsigned long)(ei + 1);
8765d4f98a2SYan Zheng 	end = (unsigned long)ei + item_size;
8775d4f98a2SYan Zheng 
8783173a18fSJosef Bacik 	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
8795d4f98a2SYan Zheng 		ptr += sizeof(struct btrfs_tree_block_info);
8805d4f98a2SYan Zheng 		BUG_ON(ptr > end);
8815d4f98a2SYan Zheng 	}
8825d4f98a2SYan Zheng 
8833de28d57SLiu Bo 	if (owner >= BTRFS_FIRST_FREE_OBJECTID)
8843de28d57SLiu Bo 		needed = BTRFS_REF_TYPE_DATA;
8853de28d57SLiu Bo 	else
8863de28d57SLiu Bo 		needed = BTRFS_REF_TYPE_BLOCK;
8873de28d57SLiu Bo 
8885d4f98a2SYan Zheng 	err = -ENOENT;
8895d4f98a2SYan Zheng 	while (1) {
8905d4f98a2SYan Zheng 		if (ptr >= end) {
891cf4f03c3SNikolay Borisov 			if (ptr > end) {
892cf4f03c3SNikolay Borisov 				err = -EUCLEAN;
893cf4f03c3SNikolay Borisov 				btrfs_print_leaf(path->nodes[0]);
894cf4f03c3SNikolay Borisov 				btrfs_crit(fs_info,
895cf4f03c3SNikolay Borisov "overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
896cf4f03c3SNikolay Borisov 					path->slots[0], root_objectid, owner, offset, parent);
897cf4f03c3SNikolay Borisov 			}
8985d4f98a2SYan Zheng 			break;
8995d4f98a2SYan Zheng 		}
9005d4f98a2SYan Zheng 		iref = (struct btrfs_extent_inline_ref *)ptr;
9013de28d57SLiu Bo 		type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
9023de28d57SLiu Bo 		if (type == BTRFS_REF_TYPE_INVALID) {
903af431dcbSSu Yue 			err = -EUCLEAN;
9043de28d57SLiu Bo 			goto out;
9053de28d57SLiu Bo 		}
9063de28d57SLiu Bo 
9075d4f98a2SYan Zheng 		if (want < type)
9085d4f98a2SYan Zheng 			break;
9095d4f98a2SYan Zheng 		if (want > type) {
9105d4f98a2SYan Zheng 			ptr += btrfs_extent_inline_ref_size(type);
9115d4f98a2SYan Zheng 			continue;
9125d4f98a2SYan Zheng 		}
9135d4f98a2SYan Zheng 
9145d4f98a2SYan Zheng 		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9155d4f98a2SYan Zheng 			struct btrfs_extent_data_ref *dref;
9165d4f98a2SYan Zheng 			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9175d4f98a2SYan Zheng 			if (match_extent_data_ref(leaf, dref, root_objectid,
9185d4f98a2SYan Zheng 						  owner, offset)) {
9195d4f98a2SYan Zheng 				err = 0;
9205d4f98a2SYan Zheng 				break;
9215d4f98a2SYan Zheng 			}
9225d4f98a2SYan Zheng 			if (hash_extent_data_ref_item(leaf, dref) <
9235d4f98a2SYan Zheng 			    hash_extent_data_ref(root_objectid, owner, offset))
9245d4f98a2SYan Zheng 				break;
9255d4f98a2SYan Zheng 		} else {
9265d4f98a2SYan Zheng 			u64 ref_offset;
9275d4f98a2SYan Zheng 			ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
9285d4f98a2SYan Zheng 			if (parent > 0) {
9295d4f98a2SYan Zheng 				if (parent == ref_offset) {
9305d4f98a2SYan Zheng 					err = 0;
9315d4f98a2SYan Zheng 					break;
9325d4f98a2SYan Zheng 				}
9335d4f98a2SYan Zheng 				if (ref_offset < parent)
9345d4f98a2SYan Zheng 					break;
9355d4f98a2SYan Zheng 			} else {
9365d4f98a2SYan Zheng 				if (root_objectid == ref_offset) {
9375d4f98a2SYan Zheng 					err = 0;
9385d4f98a2SYan Zheng 					break;
9395d4f98a2SYan Zheng 				}
9405d4f98a2SYan Zheng 				if (ref_offset < root_objectid)
9415d4f98a2SYan Zheng 					break;
9425d4f98a2SYan Zheng 			}
9435d4f98a2SYan Zheng 		}
9445d4f98a2SYan Zheng 		ptr += btrfs_extent_inline_ref_size(type);
9455d4f98a2SYan Zheng 	}
9465d4f98a2SYan Zheng 	if (err == -ENOENT && insert) {
9475d4f98a2SYan Zheng 		if (item_size + extra_size >=
9485d4f98a2SYan Zheng 		    BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
9495d4f98a2SYan Zheng 			err = -EAGAIN;
9505d4f98a2SYan Zheng 			goto out;
9515d4f98a2SYan Zheng 		}
9525d4f98a2SYan Zheng 		/*
9535d4f98a2SYan Zheng 		 * To add new inline back ref, we have to make sure
9545d4f98a2SYan Zheng 		 * there is no corresponding back ref item.
9555d4f98a2SYan Zheng 		 * For simplicity, we just do not add new inline back
9565d4f98a2SYan Zheng 		 * ref if there is any kind of item for this block
9575d4f98a2SYan Zheng 		 */
9582c47e605SYan Zheng 		if (find_next_key(path, 0, &key) == 0 &&
9592c47e605SYan Zheng 		    key.objectid == bytenr &&
96085d4198eSYan Zheng 		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
9615d4f98a2SYan Zheng 			err = -EAGAIN;
9625d4f98a2SYan Zheng 			goto out;
9635d4f98a2SYan Zheng 		}
9645d4f98a2SYan Zheng 	}
9655d4f98a2SYan Zheng 	*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
9665d4f98a2SYan Zheng out:
96785d4198eSYan Zheng 	if (insert) {
9685d4f98a2SYan Zheng 		path->keep_locks = 0;
9699a664971Sethanwu 		path->search_for_extension = 0;
9705d4f98a2SYan Zheng 		btrfs_unlock_up_safe(path, 1);
9715d4f98a2SYan Zheng 	}
9725d4f98a2SYan Zheng 	return err;
9735d4f98a2SYan Zheng }
9745d4f98a2SYan Zheng 
9755d4f98a2SYan Zheng /*
9765d4f98a2SYan Zheng  * helper to add new inline back ref
9775d4f98a2SYan Zheng  */
9785d4f98a2SYan Zheng static noinline_for_stack
setup_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)979d5e09e38SFilipe Manana void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
9805d4f98a2SYan Zheng 				 struct btrfs_path *path,
9815d4f98a2SYan Zheng 				 struct btrfs_extent_inline_ref *iref,
9825d4f98a2SYan Zheng 				 u64 parent, u64 root_objectid,
9835d4f98a2SYan Zheng 				 u64 owner, u64 offset, int refs_to_add,
9845d4f98a2SYan Zheng 				 struct btrfs_delayed_extent_op *extent_op)
9855d4f98a2SYan Zheng {
9865d4f98a2SYan Zheng 	struct extent_buffer *leaf;
9875d4f98a2SYan Zheng 	struct btrfs_extent_item *ei;
9885d4f98a2SYan Zheng 	unsigned long ptr;
9895d4f98a2SYan Zheng 	unsigned long end;
9905d4f98a2SYan Zheng 	unsigned long item_offset;
9915d4f98a2SYan Zheng 	u64 refs;
9925d4f98a2SYan Zheng 	int size;
9935d4f98a2SYan Zheng 	int type;
9945d4f98a2SYan Zheng 
9955d4f98a2SYan Zheng 	leaf = path->nodes[0];
9965d4f98a2SYan Zheng 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
9975d4f98a2SYan Zheng 	item_offset = (unsigned long)iref - (unsigned long)ei;
9985d4f98a2SYan Zheng 
9995d4f98a2SYan Zheng 	type = extent_ref_type(parent, owner);
10005d4f98a2SYan Zheng 	size = btrfs_extent_inline_ref_size(type);
10015d4f98a2SYan Zheng 
1002d5e09e38SFilipe Manana 	btrfs_extend_item(trans, path, size);
10035d4f98a2SYan Zheng 
10045d4f98a2SYan Zheng 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
10055d4f98a2SYan Zheng 	refs = btrfs_extent_refs(leaf, ei);
10065d4f98a2SYan Zheng 	refs += refs_to_add;
10075d4f98a2SYan Zheng 	btrfs_set_extent_refs(leaf, ei, refs);
10085d4f98a2SYan Zheng 	if (extent_op)
10095d4f98a2SYan Zheng 		__run_delayed_extent_op(extent_op, leaf, ei);
10105d4f98a2SYan Zheng 
10115d4f98a2SYan Zheng 	ptr = (unsigned long)ei + item_offset;
10123212fa14SJosef Bacik 	end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
10135d4f98a2SYan Zheng 	if (ptr < end - size)
10145d4f98a2SYan Zheng 		memmove_extent_buffer(leaf, ptr + size, ptr,
10155d4f98a2SYan Zheng 				      end - size - ptr);
10165d4f98a2SYan Zheng 
10175d4f98a2SYan Zheng 	iref = (struct btrfs_extent_inline_ref *)ptr;
10185d4f98a2SYan Zheng 	btrfs_set_extent_inline_ref_type(leaf, iref, type);
10195d4f98a2SYan Zheng 	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10205d4f98a2SYan Zheng 		struct btrfs_extent_data_ref *dref;
10215d4f98a2SYan Zheng 		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10225d4f98a2SYan Zheng 		btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
10235d4f98a2SYan Zheng 		btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
10245d4f98a2SYan Zheng 		btrfs_set_extent_data_ref_offset(leaf, dref, offset);
10255d4f98a2SYan Zheng 		btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
10265d4f98a2SYan Zheng 	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10275d4f98a2SYan Zheng 		struct btrfs_shared_data_ref *sref;
10285d4f98a2SYan Zheng 		sref = (struct btrfs_shared_data_ref *)(iref + 1);
10295d4f98a2SYan Zheng 		btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
10305d4f98a2SYan Zheng 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
10315d4f98a2SYan Zheng 	} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10325d4f98a2SYan Zheng 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
10335d4f98a2SYan Zheng 	} else {
10345d4f98a2SYan Zheng 		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
10355d4f98a2SYan Zheng 	}
1036d5e09e38SFilipe Manana 	btrfs_mark_buffer_dirty(trans, leaf);
10375d4f98a2SYan Zheng }
10385d4f98a2SYan Zheng 
lookup_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_extent_inline_ref ** ref_ret,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset)10395d4f98a2SYan Zheng static int lookup_extent_backref(struct btrfs_trans_handle *trans,
10405d4f98a2SYan Zheng 				 struct btrfs_path *path,
10415d4f98a2SYan Zheng 				 struct btrfs_extent_inline_ref **ref_ret,
10425d4f98a2SYan Zheng 				 u64 bytenr, u64 num_bytes, u64 parent,
10435d4f98a2SYan Zheng 				 u64 root_objectid, u64 owner, u64 offset)
10445d4f98a2SYan Zheng {
10455d4f98a2SYan Zheng 	int ret;
10465d4f98a2SYan Zheng 
1047867cc1fbSNikolay Borisov 	ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1048867cc1fbSNikolay Borisov 					   num_bytes, parent, root_objectid,
1049867cc1fbSNikolay Borisov 					   owner, offset, 0);
10505d4f98a2SYan Zheng 	if (ret != -ENOENT)
10515d4f98a2SYan Zheng 		return ret;
10525d4f98a2SYan Zheng 
1053b3b4aa74SDavid Sterba 	btrfs_release_path(path);
10545d4f98a2SYan Zheng 	*ref_ret = NULL;
10555d4f98a2SYan Zheng 
10565d4f98a2SYan Zheng 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1057b8582eeaSNikolay Borisov 		ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1058b8582eeaSNikolay Borisov 					    root_objectid);
10595d4f98a2SYan Zheng 	} else {
1060bd1d53efSNikolay Borisov 		ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1061bd1d53efSNikolay Borisov 					     root_objectid, owner, offset);
10625d4f98a2SYan Zheng 	}
10635d4f98a2SYan Zheng 	return ret;
10645d4f98a2SYan Zheng }
10655d4f98a2SYan Zheng 
10665d4f98a2SYan Zheng /*
10675d4f98a2SYan Zheng  * helper to update/remove inline back ref
10685d4f98a2SYan Zheng  */
update_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,int refs_to_mod,struct btrfs_delayed_extent_op * extent_op)1069d5e09e38SFilipe Manana static noinline_for_stack int update_inline_extent_backref(
1070d5e09e38SFilipe Manana 				  struct btrfs_trans_handle *trans,
1071d5e09e38SFilipe Manana 				  struct btrfs_path *path,
10725d4f98a2SYan Zheng 				  struct btrfs_extent_inline_ref *iref,
10735d4f98a2SYan Zheng 				  int refs_to_mod,
10745b2a54bbSJosef Bacik 				  struct btrfs_delayed_extent_op *extent_op)
10755d4f98a2SYan Zheng {
107661a18f1cSNikolay Borisov 	struct extent_buffer *leaf = path->nodes[0];
107725761430SQu Wenruo 	struct btrfs_fs_info *fs_info = leaf->fs_info;
10785d4f98a2SYan Zheng 	struct btrfs_extent_item *ei;
10795d4f98a2SYan Zheng 	struct btrfs_extent_data_ref *dref = NULL;
10805d4f98a2SYan Zheng 	struct btrfs_shared_data_ref *sref = NULL;
10815d4f98a2SYan Zheng 	unsigned long ptr;
10825d4f98a2SYan Zheng 	unsigned long end;
10835d4f98a2SYan Zheng 	u32 item_size;
10845d4f98a2SYan Zheng 	int size;
10855d4f98a2SYan Zheng 	int type;
10865d4f98a2SYan Zheng 	u64 refs;
10875d4f98a2SYan Zheng 
10885d4f98a2SYan Zheng 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
10895d4f98a2SYan Zheng 	refs = btrfs_extent_refs(leaf, ei);
109025761430SQu Wenruo 	if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) {
109125761430SQu Wenruo 		struct btrfs_key key;
109225761430SQu Wenruo 		u32 extent_size;
109325761430SQu Wenruo 
109425761430SQu Wenruo 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
109525761430SQu Wenruo 		if (key.type == BTRFS_METADATA_ITEM_KEY)
109625761430SQu Wenruo 			extent_size = fs_info->nodesize;
109725761430SQu Wenruo 		else
109825761430SQu Wenruo 			extent_size = key.offset;
109925761430SQu Wenruo 		btrfs_print_leaf(leaf);
110025761430SQu Wenruo 		btrfs_err(fs_info,
110125761430SQu Wenruo 	"invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
110225761430SQu Wenruo 			  key.objectid, extent_size, refs_to_mod, refs);
110325761430SQu Wenruo 		return -EUCLEAN;
110425761430SQu Wenruo 	}
11055d4f98a2SYan Zheng 	refs += refs_to_mod;
11065d4f98a2SYan Zheng 	btrfs_set_extent_refs(leaf, ei, refs);
11075d4f98a2SYan Zheng 	if (extent_op)
11085d4f98a2SYan Zheng 		__run_delayed_extent_op(extent_op, leaf, ei);
11095d4f98a2SYan Zheng 
11103de28d57SLiu Bo 	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
111125761430SQu Wenruo 	/*
111225761430SQu Wenruo 	 * Function btrfs_get_extent_inline_ref_type() has already printed
111325761430SQu Wenruo 	 * error messages.
111425761430SQu Wenruo 	 */
111525761430SQu Wenruo 	if (unlikely(type == BTRFS_REF_TYPE_INVALID))
111625761430SQu Wenruo 		return -EUCLEAN;
11175d4f98a2SYan Zheng 
11185d4f98a2SYan Zheng 	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11195d4f98a2SYan Zheng 		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11205d4f98a2SYan Zheng 		refs = btrfs_extent_data_ref_count(leaf, dref);
11215d4f98a2SYan Zheng 	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11225d4f98a2SYan Zheng 		sref = (struct btrfs_shared_data_ref *)(iref + 1);
11235d4f98a2SYan Zheng 		refs = btrfs_shared_data_ref_count(leaf, sref);
11245d4f98a2SYan Zheng 	} else {
11255d4f98a2SYan Zheng 		refs = 1;
112625761430SQu Wenruo 		/*
112725761430SQu Wenruo 		 * For tree blocks we can only drop one ref for it, and tree
112825761430SQu Wenruo 		 * blocks should not have refs > 1.
112925761430SQu Wenruo 		 *
113025761430SQu Wenruo 		 * Furthermore if we're inserting a new inline backref, we
113125761430SQu Wenruo 		 * won't reach this path either. That would be
113225761430SQu Wenruo 		 * setup_inline_extent_backref().
113325761430SQu Wenruo 		 */
113425761430SQu Wenruo 		if (unlikely(refs_to_mod != -1)) {
113525761430SQu Wenruo 			struct btrfs_key key;
113625761430SQu Wenruo 
113725761430SQu Wenruo 			btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
113825761430SQu Wenruo 
113925761430SQu Wenruo 			btrfs_print_leaf(leaf);
114025761430SQu Wenruo 			btrfs_err(fs_info,
114125761430SQu Wenruo 			"invalid refs_to_mod for tree block %llu, has %d expect -1",
114225761430SQu Wenruo 				  key.objectid, refs_to_mod);
114325761430SQu Wenruo 			return -EUCLEAN;
114425761430SQu Wenruo 		}
11455d4f98a2SYan Zheng 	}
11465d4f98a2SYan Zheng 
114725761430SQu Wenruo 	if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) {
114825761430SQu Wenruo 		struct btrfs_key key;
114925761430SQu Wenruo 		u32 extent_size;
115025761430SQu Wenruo 
115125761430SQu Wenruo 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
115225761430SQu Wenruo 		if (key.type == BTRFS_METADATA_ITEM_KEY)
115325761430SQu Wenruo 			extent_size = fs_info->nodesize;
115425761430SQu Wenruo 		else
115525761430SQu Wenruo 			extent_size = key.offset;
115625761430SQu Wenruo 		btrfs_print_leaf(leaf);
115725761430SQu Wenruo 		btrfs_err(fs_info,
115825761430SQu Wenruo "invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu",
115925761430SQu Wenruo 			  (unsigned long)iref, key.objectid, extent_size,
116025761430SQu Wenruo 			  refs_to_mod, refs);
116125761430SQu Wenruo 		return -EUCLEAN;
116225761430SQu Wenruo 	}
11635d4f98a2SYan Zheng 	refs += refs_to_mod;
11645d4f98a2SYan Zheng 
11655d4f98a2SYan Zheng 	if (refs > 0) {
11665d4f98a2SYan Zheng 		if (type == BTRFS_EXTENT_DATA_REF_KEY)
11675d4f98a2SYan Zheng 			btrfs_set_extent_data_ref_count(leaf, dref, refs);
11685d4f98a2SYan Zheng 		else
11695d4f98a2SYan Zheng 			btrfs_set_shared_data_ref_count(leaf, sref, refs);
11705d4f98a2SYan Zheng 	} else {
11715d4f98a2SYan Zheng 		size =  btrfs_extent_inline_ref_size(type);
11723212fa14SJosef Bacik 		item_size = btrfs_item_size(leaf, path->slots[0]);
11735d4f98a2SYan Zheng 		ptr = (unsigned long)iref;
11745d4f98a2SYan Zheng 		end = (unsigned long)ei + item_size;
11755d4f98a2SYan Zheng 		if (ptr + size < end)
11765d4f98a2SYan Zheng 			memmove_extent_buffer(leaf, ptr, ptr + size,
11775d4f98a2SYan Zheng 					      end - ptr - size);
11785d4f98a2SYan Zheng 		item_size -= size;
1179d5e09e38SFilipe Manana 		btrfs_truncate_item(trans, path, item_size, 1);
11805d4f98a2SYan Zheng 	}
1181d5e09e38SFilipe Manana 	btrfs_mark_buffer_dirty(trans, leaf);
118225761430SQu Wenruo 	return 0;
11835d4f98a2SYan Zheng }
11845d4f98a2SYan Zheng 
11855d4f98a2SYan Zheng static noinline_for_stack
insert_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)11865d4f98a2SYan Zheng int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
11875d4f98a2SYan Zheng 				 struct btrfs_path *path,
11885d4f98a2SYan Zheng 				 u64 bytenr, u64 num_bytes, u64 parent,
11895d4f98a2SYan Zheng 				 u64 root_objectid, u64 owner,
11905d4f98a2SYan Zheng 				 u64 offset, int refs_to_add,
11915d4f98a2SYan Zheng 				 struct btrfs_delayed_extent_op *extent_op)
11925d4f98a2SYan Zheng {
11935d4f98a2SYan Zheng 	struct btrfs_extent_inline_ref *iref;
11945d4f98a2SYan Zheng 	int ret;
11955d4f98a2SYan Zheng 
1196867cc1fbSNikolay Borisov 	ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1197867cc1fbSNikolay Borisov 					   num_bytes, parent, root_objectid,
1198867cc1fbSNikolay Borisov 					   owner, offset, 1);
11995d4f98a2SYan Zheng 	if (ret == 0) {
120007cce5cfSQu Wenruo 		/*
120107cce5cfSQu Wenruo 		 * We're adding refs to a tree block we already own, this
120207cce5cfSQu Wenruo 		 * should not happen at all.
120307cce5cfSQu Wenruo 		 */
120407cce5cfSQu Wenruo 		if (owner < BTRFS_FIRST_FREE_OBJECTID) {
120507cce5cfSQu Wenruo 			btrfs_print_leaf(path->nodes[0]);
1206eee3b811SQu Wenruo 			btrfs_crit(trans->fs_info,
1207eee3b811SQu Wenruo "adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
1208eee3b811SQu Wenruo 				   bytenr, num_bytes, root_objectid, path->slots[0]);
120907cce5cfSQu Wenruo 			return -EUCLEAN;
121007cce5cfSQu Wenruo 		}
1211d5e09e38SFilipe Manana 		ret = update_inline_extent_backref(trans, path, iref,
1212d5e09e38SFilipe Manana 						   refs_to_add, extent_op);
12135d4f98a2SYan Zheng 	} else if (ret == -ENOENT) {
1214d5e09e38SFilipe Manana 		setup_inline_extent_backref(trans, path, iref, parent,
1215143bede5SJeff Mahoney 					    root_objectid, owner, offset,
1216143bede5SJeff Mahoney 					    refs_to_add, extent_op);
1217143bede5SJeff Mahoney 		ret = 0;
12185d4f98a2SYan Zheng 	}
12195d4f98a2SYan Zheng 	return ret;
12205d4f98a2SYan Zheng }
12215d4f98a2SYan Zheng 
remove_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,int refs_to_drop,int is_data)12225d4f98a2SYan Zheng static int remove_extent_backref(struct btrfs_trans_handle *trans,
122376d76e78SJosef Bacik 				 struct btrfs_root *root,
12245d4f98a2SYan Zheng 				 struct btrfs_path *path,
12255d4f98a2SYan Zheng 				 struct btrfs_extent_inline_ref *iref,
12265b2a54bbSJosef Bacik 				 int refs_to_drop, int is_data)
12275d4f98a2SYan Zheng {
1228143bede5SJeff Mahoney 	int ret = 0;
12295d4f98a2SYan Zheng 
12305d4f98a2SYan Zheng 	BUG_ON(!is_data && refs_to_drop != 1);
12315b2a54bbSJosef Bacik 	if (iref)
1232d5e09e38SFilipe Manana 		ret = update_inline_extent_backref(trans, path, iref,
1233d5e09e38SFilipe Manana 						   -refs_to_drop, NULL);
12345b2a54bbSJosef Bacik 	else if (is_data)
12355b2a54bbSJosef Bacik 		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
12365b2a54bbSJosef Bacik 	else
123776d76e78SJosef Bacik 		ret = btrfs_del_item(trans, root, path);
123831840ae1SZheng Yan 	return ret;
123931840ae1SZheng Yan }
124031840ae1SZheng Yan 
btrfs_issue_discard(struct block_device * bdev,u64 start,u64 len,u64 * discarded_bytes)1241d04c6b88SJeff Mahoney static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1242d04c6b88SJeff Mahoney 			       u64 *discarded_bytes)
124315916de8SChris Mason {
124486557861SJeff Mahoney 	int j, ret = 0;
124586557861SJeff Mahoney 	u64 bytes_left, end;
1246adbe7e38SAnand Jain 	u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
12474d89d377SJeff Mahoney 
1248be725b06SDavid Sterba 	/* Adjust the range to be aligned to 512B sectors if necessary. */
1249be725b06SDavid Sterba 	if (start != aligned_start) {
12504d89d377SJeff Mahoney 		len -= aligned_start - start;
1251adbe7e38SAnand Jain 		len = round_down(len, 1 << SECTOR_SHIFT);
12524d89d377SJeff Mahoney 		start = aligned_start;
12534d89d377SJeff Mahoney 	}
1254d04c6b88SJeff Mahoney 
1255d04c6b88SJeff Mahoney 	*discarded_bytes = 0;
125686557861SJeff Mahoney 
125786557861SJeff Mahoney 	if (!len)
125886557861SJeff Mahoney 		return 0;
125986557861SJeff Mahoney 
126086557861SJeff Mahoney 	end = start + len;
126186557861SJeff Mahoney 	bytes_left = len;
126286557861SJeff Mahoney 
126386557861SJeff Mahoney 	/* Skip any superblocks on this device. */
126486557861SJeff Mahoney 	for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
126586557861SJeff Mahoney 		u64 sb_start = btrfs_sb_offset(j);
126686557861SJeff Mahoney 		u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
126786557861SJeff Mahoney 		u64 size = sb_start - start;
126886557861SJeff Mahoney 
126986557861SJeff Mahoney 		if (!in_range(sb_start, start, bytes_left) &&
127086557861SJeff Mahoney 		    !in_range(sb_end, start, bytes_left) &&
127186557861SJeff Mahoney 		    !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
127286557861SJeff Mahoney 			continue;
127386557861SJeff Mahoney 
127486557861SJeff Mahoney 		/*
127586557861SJeff Mahoney 		 * Superblock spans beginning of range.  Adjust start and
127686557861SJeff Mahoney 		 * try again.
127786557861SJeff Mahoney 		 */
127886557861SJeff Mahoney 		if (sb_start <= start) {
127986557861SJeff Mahoney 			start += sb_end - start;
128086557861SJeff Mahoney 			if (start > end) {
128186557861SJeff Mahoney 				bytes_left = 0;
128286557861SJeff Mahoney 				break;
128386557861SJeff Mahoney 			}
128486557861SJeff Mahoney 			bytes_left = end - start;
128586557861SJeff Mahoney 			continue;
128686557861SJeff Mahoney 		}
128786557861SJeff Mahoney 
128886557861SJeff Mahoney 		if (size) {
128929e70be2SAnand Jain 			ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
129029e70be2SAnand Jain 						   size >> SECTOR_SHIFT,
129144abff2cSChristoph Hellwig 						   GFP_NOFS);
1292d04c6b88SJeff Mahoney 			if (!ret)
129386557861SJeff Mahoney 				*discarded_bytes += size;
129486557861SJeff Mahoney 			else if (ret != -EOPNOTSUPP)
129586557861SJeff Mahoney 				return ret;
129686557861SJeff Mahoney 		}
129786557861SJeff Mahoney 
129886557861SJeff Mahoney 		start = sb_end;
129986557861SJeff Mahoney 		if (start > end) {
130086557861SJeff Mahoney 			bytes_left = 0;
130186557861SJeff Mahoney 			break;
130286557861SJeff Mahoney 		}
130386557861SJeff Mahoney 		bytes_left = end - start;
130486557861SJeff Mahoney 	}
130586557861SJeff Mahoney 
130686557861SJeff Mahoney 	if (bytes_left) {
130729e70be2SAnand Jain 		ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
130829e70be2SAnand Jain 					   bytes_left >> SECTOR_SHIFT,
130944abff2cSChristoph Hellwig 					   GFP_NOFS);
131086557861SJeff Mahoney 		if (!ret)
131186557861SJeff Mahoney 			*discarded_bytes += bytes_left;
13124d89d377SJeff Mahoney 	}
1313d04c6b88SJeff Mahoney 	return ret;
131415916de8SChris Mason }
131515916de8SChris Mason 
do_discard_extent(struct btrfs_discard_stripe * stripe,u64 * bytes)1316a4012f06SChristoph Hellwig static int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
13176143c23cSNaohiro Aota {
13186143c23cSNaohiro Aota 	struct btrfs_device *dev = stripe->dev;
13196143c23cSNaohiro Aota 	struct btrfs_fs_info *fs_info = dev->fs_info;
13206143c23cSNaohiro Aota 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
13216143c23cSNaohiro Aota 	u64 phys = stripe->physical;
13226143c23cSNaohiro Aota 	u64 len = stripe->length;
13236143c23cSNaohiro Aota 	u64 discarded = 0;
13246143c23cSNaohiro Aota 	int ret = 0;
13256143c23cSNaohiro Aota 
13266143c23cSNaohiro Aota 	/* Zone reset on a zoned filesystem */
13276143c23cSNaohiro Aota 	if (btrfs_can_zone_reset(dev, phys, len)) {
13286143c23cSNaohiro Aota 		u64 src_disc;
13296143c23cSNaohiro Aota 
13306143c23cSNaohiro Aota 		ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
13316143c23cSNaohiro Aota 		if (ret)
13326143c23cSNaohiro Aota 			goto out;
13336143c23cSNaohiro Aota 
13346143c23cSNaohiro Aota 		if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
13356143c23cSNaohiro Aota 		    dev != dev_replace->srcdev)
13366143c23cSNaohiro Aota 			goto out;
13376143c23cSNaohiro Aota 
13386143c23cSNaohiro Aota 		src_disc = discarded;
13396143c23cSNaohiro Aota 
13406143c23cSNaohiro Aota 		/* Send to replace target as well */
13416143c23cSNaohiro Aota 		ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
13426143c23cSNaohiro Aota 					      &discarded);
13436143c23cSNaohiro Aota 		discarded += src_disc;
134470200574SChristoph Hellwig 	} else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
13456143c23cSNaohiro Aota 		ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
13466143c23cSNaohiro Aota 	} else {
13476143c23cSNaohiro Aota 		ret = 0;
13486143c23cSNaohiro Aota 		*bytes = 0;
13496143c23cSNaohiro Aota 	}
13506143c23cSNaohiro Aota 
13516143c23cSNaohiro Aota out:
13526143c23cSNaohiro Aota 	*bytes = discarded;
13536143c23cSNaohiro Aota 	return ret;
13546143c23cSNaohiro Aota }
13556143c23cSNaohiro Aota 
btrfs_discard_extent(struct btrfs_fs_info * fs_info,u64 bytenr,u64 num_bytes,u64 * actual_bytes)13562ff7e61eSJeff Mahoney int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
13575378e607SLi Dongyang 			 u64 num_bytes, u64 *actual_bytes)
13581f3c79a2SLiu Hui {
13596b7faaddSQu Wenruo 	int ret = 0;
13605378e607SLi Dongyang 	u64 discarded_bytes = 0;
13616b7faaddSQu Wenruo 	u64 end = bytenr + num_bytes;
13626b7faaddSQu Wenruo 	u64 cur = bytenr;
1363e244a0aeSChristoph Hellwig 
13642999241dSFilipe Manana 	/*
1365a4012f06SChristoph Hellwig 	 * Avoid races with device replace and make sure the devices in the
1366a4012f06SChristoph Hellwig 	 * stripes don't go away while we are discarding.
13672999241dSFilipe Manana 	 */
13680b246afaSJeff Mahoney 	btrfs_bio_counter_inc_blocked(fs_info);
13696b7faaddSQu Wenruo 	while (cur < end) {
1370a4012f06SChristoph Hellwig 		struct btrfs_discard_stripe *stripes;
1371a4012f06SChristoph Hellwig 		unsigned int num_stripes;
13721f3c79a2SLiu Hui 		int i;
13731f3c79a2SLiu Hui 
13746b7faaddSQu Wenruo 		num_bytes = end - cur;
1375a4012f06SChristoph Hellwig 		stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
1376a4012f06SChristoph Hellwig 		if (IS_ERR(stripes)) {
1377a4012f06SChristoph Hellwig 			ret = PTR_ERR(stripes);
1378a4012f06SChristoph Hellwig 			if (ret == -EOPNOTSUPP)
1379a4012f06SChristoph Hellwig 				ret = 0;
1380a4012f06SChristoph Hellwig 			break;
1381a4012f06SChristoph Hellwig 		}
13821f3c79a2SLiu Hui 
1383a4012f06SChristoph Hellwig 		for (i = 0; i < num_stripes; i++) {
1384a4012f06SChristoph Hellwig 			struct btrfs_discard_stripe *stripe = stripes + i;
1385d04c6b88SJeff Mahoney 			u64 bytes;
138638b5f68eSAnand Jain 
1387a4012f06SChristoph Hellwig 			if (!stripe->dev->bdev) {
1388627e0873SFilipe Manana 				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1389627e0873SFilipe Manana 				continue;
1390627e0873SFilipe Manana 			}
1391dcba6e48SNaohiro Aota 
1392a4012f06SChristoph Hellwig 			if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
1393a4012f06SChristoph Hellwig 					&stripe->dev->dev_state))
13945e753a81SAnand Jain 				continue;
13955e753a81SAnand Jain 
13966143c23cSNaohiro Aota 			ret = do_discard_extent(stripe, &bytes);
1397a4012f06SChristoph Hellwig 			if (ret) {
13986b7faaddSQu Wenruo 				/*
1399a4012f06SChristoph Hellwig 				 * Keep going if discard is not supported by the
1400a4012f06SChristoph Hellwig 				 * device.
14016b7faaddSQu Wenruo 				 */
1402a4012f06SChristoph Hellwig 				if (ret != -EOPNOTSUPP)
1403a4012f06SChristoph Hellwig 					break;
1404d5e2003cSJosef Bacik 				ret = 0;
1405a4012f06SChristoph Hellwig 			} else {
1406a4012f06SChristoph Hellwig 				discarded_bytes += bytes;
14071f3c79a2SLiu Hui 			}
1408a4012f06SChristoph Hellwig 		}
1409a4012f06SChristoph Hellwig 		kfree(stripes);
1410a4012f06SChristoph Hellwig 		if (ret)
1411a4012f06SChristoph Hellwig 			break;
14126b7faaddSQu Wenruo 		cur += num_bytes;
14131f3c79a2SLiu Hui 	}
14140b246afaSJeff Mahoney 	btrfs_bio_counter_dec(fs_info);
14155378e607SLi Dongyang 	if (actual_bytes)
14165378e607SLi Dongyang 		*actual_bytes = discarded_bytes;
14171f3c79a2SLiu Hui 	return ret;
14181f3c79a2SLiu Hui }
14191f3c79a2SLiu Hui 
142079787eaaSJeff Mahoney /* Can return -ENOMEM */
btrfs_inc_extent_ref(struct btrfs_trans_handle * trans,struct btrfs_ref * generic_ref)14215d4f98a2SYan Zheng int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
142282fa113fSQu Wenruo 			 struct btrfs_ref *generic_ref)
142331840ae1SZheng Yan {
142482fa113fSQu Wenruo 	struct btrfs_fs_info *fs_info = trans->fs_info;
142531840ae1SZheng Yan 	int ret;
142666d7e7f0SArne Jansen 
142782fa113fSQu Wenruo 	ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
142882fa113fSQu Wenruo 	       generic_ref->action);
142982fa113fSQu Wenruo 	BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1430113479d5SNikolay Borisov 	       generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
143131840ae1SZheng Yan 
143282fa113fSQu Wenruo 	if (generic_ref->type == BTRFS_REF_METADATA)
14332187374fSJosef Bacik 		ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
143482fa113fSQu Wenruo 	else
14352187374fSJosef Bacik 		ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
1436d7eae340SOmar Sandoval 
143782fa113fSQu Wenruo 	btrfs_ref_tree_mod(fs_info, generic_ref);
14388a5040f7SQu Wenruo 
143931840ae1SZheng Yan 	return ret;
144031840ae1SZheng Yan }
144131840ae1SZheng Yan 
1442bd3c685eSNikolay Borisov /*
1443bd3c685eSNikolay Borisov  * __btrfs_inc_extent_ref - insert backreference for a given extent
1444bd3c685eSNikolay Borisov  *
144507cce5cfSQu Wenruo  * The counterpart is in __btrfs_free_extent(), with examples and more details
144607cce5cfSQu Wenruo  * how it works.
144707cce5cfSQu Wenruo  *
1448bd3c685eSNikolay Borisov  * @trans:	    Handle of transaction
1449bd3c685eSNikolay Borisov  *
1450bd3c685eSNikolay Borisov  * @node:	    The delayed ref node used to get the bytenr/length for
1451bd3c685eSNikolay Borisov  *		    extent whose references are incremented.
1452bd3c685eSNikolay Borisov  *
1453bd3c685eSNikolay Borisov  * @parent:	    If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
1454bd3c685eSNikolay Borisov  *		    BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
1455bd3c685eSNikolay Borisov  *		    bytenr of the parent block. Since new extents are always
1456bd3c685eSNikolay Borisov  *		    created with indirect references, this will only be the case
1457bd3c685eSNikolay Borisov  *		    when relocating a shared extent. In that case, root_objectid
14581a9fd417SDavid Sterba  *		    will be BTRFS_TREE_RELOC_OBJECTID. Otherwise, parent must
1459bd3c685eSNikolay Borisov  *		    be 0
1460bd3c685eSNikolay Borisov  *
1461bd3c685eSNikolay Borisov  * @root_objectid:  The id of the root where this modification has originated,
1462bd3c685eSNikolay Borisov  *		    this can be either one of the well-known metadata trees or
1463bd3c685eSNikolay Borisov  *		    the subvolume id which references this extent.
1464bd3c685eSNikolay Borisov  *
1465bd3c685eSNikolay Borisov  * @owner:	    For data extents it is the inode number of the owning file.
1466bd3c685eSNikolay Borisov  *		    For metadata extents this parameter holds the level in the
1467bd3c685eSNikolay Borisov  *		    tree of the extent.
1468bd3c685eSNikolay Borisov  *
1469bd3c685eSNikolay Borisov  * @offset:	    For metadata extents the offset is ignored and is currently
1470bd3c685eSNikolay Borisov  *		    always passed as 0. For data extents it is the fileoffset
1471bd3c685eSNikolay Borisov  *		    this extent belongs to.
1472bd3c685eSNikolay Borisov  *
1473bd3c685eSNikolay Borisov  * @refs_to_add     Number of references to add
1474bd3c685eSNikolay Borisov  *
1475bd3c685eSNikolay Borisov  * @extent_op       Pointer to a structure, holding information necessary when
1476bd3c685eSNikolay Borisov  *                  updating a tree block's flags
1477bd3c685eSNikolay Borisov  *
1478bd3c685eSNikolay Borisov  */
__btrfs_inc_extent_ref(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)147931840ae1SZheng Yan static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1480c682f9b3SQu Wenruo 				  struct btrfs_delayed_ref_node *node,
14815d4f98a2SYan Zheng 				  u64 parent, u64 root_objectid,
14825d4f98a2SYan Zheng 				  u64 owner, u64 offset, int refs_to_add,
14835d4f98a2SYan Zheng 				  struct btrfs_delayed_extent_op *extent_op)
148456bec294SChris Mason {
14855caf2a00SChris Mason 	struct btrfs_path *path;
14865d4f98a2SYan Zheng 	struct extent_buffer *leaf;
1487234b63a0SChris Mason 	struct btrfs_extent_item *item;
1488fcebe456SJosef Bacik 	struct btrfs_key key;
1489c682f9b3SQu Wenruo 	u64 bytenr = node->bytenr;
1490c682f9b3SQu Wenruo 	u64 num_bytes = node->num_bytes;
14915d4f98a2SYan Zheng 	u64 refs;
14925d4f98a2SYan Zheng 	int ret;
1493037e6390SChris Mason 
14945caf2a00SChris Mason 	path = btrfs_alloc_path();
149554aa1f4dSChris Mason 	if (!path)
149654aa1f4dSChris Mason 		return -ENOMEM;
149726b8003fSChris Mason 
14985d4f98a2SYan Zheng 	/* this will setup the path even if it fails to insert the back ref */
1499a639cdebSNikolay Borisov 	ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1500a639cdebSNikolay Borisov 					   parent, root_objectid, owner,
1501a639cdebSNikolay Borisov 					   offset, refs_to_add, extent_op);
15020ed4792aSQu Wenruo 	if ((ret < 0 && ret != -EAGAIN) || !ret)
15035d4f98a2SYan Zheng 		goto out;
1504fcebe456SJosef Bacik 
1505fcebe456SJosef Bacik 	/*
1506fcebe456SJosef Bacik 	 * Ok we had -EAGAIN which means we didn't have space to insert and
1507fcebe456SJosef Bacik 	 * inline extent ref, so just update the reference count and add a
1508fcebe456SJosef Bacik 	 * normal backref.
1509fcebe456SJosef Bacik 	 */
1510fcebe456SJosef Bacik 	leaf = path->nodes[0];
1511fcebe456SJosef Bacik 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
15125d4f98a2SYan Zheng 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
15135d4f98a2SYan Zheng 	refs = btrfs_extent_refs(leaf, item);
15145d4f98a2SYan Zheng 	btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
15155d4f98a2SYan Zheng 	if (extent_op)
15165d4f98a2SYan Zheng 		__run_delayed_extent_op(extent_op, leaf, item);
151731840ae1SZheng Yan 
1518d5e09e38SFilipe Manana 	btrfs_mark_buffer_dirty(trans, leaf);
1519b3b4aa74SDavid Sterba 	btrfs_release_path(path);
15207bb86316SChris Mason 
152156bec294SChris Mason 	/* now insert the actual backref */
1522d2f79e63SFilipe Manana 	if (owner < BTRFS_FIRST_FREE_OBJECTID)
152365cd6d9eSNikolay Borisov 		ret = insert_tree_block_ref(trans, path, bytenr, parent,
152465cd6d9eSNikolay Borisov 					    root_objectid);
1525d2f79e63SFilipe Manana 	else
152665cd6d9eSNikolay Borisov 		ret = insert_extent_data_ref(trans, path, bytenr, parent,
152765cd6d9eSNikolay Borisov 					     root_objectid, owner, offset,
152865cd6d9eSNikolay Borisov 					     refs_to_add);
1529d2f79e63SFilipe Manana 
153079787eaaSJeff Mahoney 	if (ret)
153166642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
15325d4f98a2SYan Zheng out:
153374493f7aSChris Mason 	btrfs_free_path(path);
153430d133fcSLiu Bo 	return ret;
153502217ed2SChris Mason }
153602217ed2SChris Mason 
run_delayed_data_ref(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,bool insert_reserved)15375d4f98a2SYan Zheng static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
153856bec294SChris Mason 				struct btrfs_delayed_ref_node *node,
15395d4f98a2SYan Zheng 				struct btrfs_delayed_extent_op *extent_op,
154061c681feSFilipe Manana 				bool insert_reserved)
15415d4f98a2SYan Zheng {
15425d4f98a2SYan Zheng 	int ret = 0;
15435d4f98a2SYan Zheng 	struct btrfs_delayed_data_ref *ref;
15445d4f98a2SYan Zheng 	struct btrfs_key ins;
15455d4f98a2SYan Zheng 	u64 parent = 0;
15465d4f98a2SYan Zheng 	u64 ref_root = 0;
15475d4f98a2SYan Zheng 	u64 flags = 0;
15485d4f98a2SYan Zheng 
15495d4f98a2SYan Zheng 	ins.objectid = node->bytenr;
15505d4f98a2SYan Zheng 	ins.offset = node->num_bytes;
15515d4f98a2SYan Zheng 	ins.type = BTRFS_EXTENT_ITEM_KEY;
15525d4f98a2SYan Zheng 
15535d4f98a2SYan Zheng 	ref = btrfs_delayed_node_to_data_ref(node);
15542bf98ef3SNikolay Borisov 	trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1555599c75ecSLiu Bo 
15565d4f98a2SYan Zheng 	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
15575d4f98a2SYan Zheng 		parent = ref->parent;
15585d4f98a2SYan Zheng 	ref_root = ref->root;
15595d4f98a2SYan Zheng 
15605d4f98a2SYan Zheng 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
15613173a18fSJosef Bacik 		if (extent_op)
15625d4f98a2SYan Zheng 			flags |= extent_op->flags_to_set;
1563ef89b824SNikolay Borisov 		ret = alloc_reserved_file_extent(trans, parent, ref_root,
1564ef89b824SNikolay Borisov 						 flags, ref->objectid,
1565ef89b824SNikolay Borisov 						 ref->offset, &ins,
1566ef89b824SNikolay Borisov 						 node->ref_mod);
15675d4f98a2SYan Zheng 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
15682590d0f1SNikolay Borisov 		ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
15692590d0f1SNikolay Borisov 					     ref->objectid, ref->offset,
15702590d0f1SNikolay Borisov 					     node->ref_mod, extent_op);
15715d4f98a2SYan Zheng 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
1572e72cb923SNikolay Borisov 		ret = __btrfs_free_extent(trans, node, parent,
15735d4f98a2SYan Zheng 					  ref_root, ref->objectid,
15745d4f98a2SYan Zheng 					  ref->offset, node->ref_mod,
1575c682f9b3SQu Wenruo 					  extent_op);
15765d4f98a2SYan Zheng 	} else {
15775d4f98a2SYan Zheng 		BUG();
15785d4f98a2SYan Zheng 	}
15795d4f98a2SYan Zheng 	return ret;
15805d4f98a2SYan Zheng }
15815d4f98a2SYan Zheng 
__run_delayed_extent_op(struct btrfs_delayed_extent_op * extent_op,struct extent_buffer * leaf,struct btrfs_extent_item * ei)15825d4f98a2SYan Zheng static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
15835d4f98a2SYan Zheng 				    struct extent_buffer *leaf,
15845d4f98a2SYan Zheng 				    struct btrfs_extent_item *ei)
15855d4f98a2SYan Zheng {
15865d4f98a2SYan Zheng 	u64 flags = btrfs_extent_flags(leaf, ei);
15875d4f98a2SYan Zheng 	if (extent_op->update_flags) {
15885d4f98a2SYan Zheng 		flags |= extent_op->flags_to_set;
15895d4f98a2SYan Zheng 		btrfs_set_extent_flags(leaf, ei, flags);
15905d4f98a2SYan Zheng 	}
15915d4f98a2SYan Zheng 
15925d4f98a2SYan Zheng 	if (extent_op->update_key) {
15935d4f98a2SYan Zheng 		struct btrfs_tree_block_info *bi;
15945d4f98a2SYan Zheng 		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
15955d4f98a2SYan Zheng 		bi = (struct btrfs_tree_block_info *)(ei + 1);
15965d4f98a2SYan Zheng 		btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
15975d4f98a2SYan Zheng 	}
15985d4f98a2SYan Zheng }
15995d4f98a2SYan Zheng 
run_delayed_extent_op(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_head * head,struct btrfs_delayed_extent_op * extent_op)16005d4f98a2SYan Zheng static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1601d278850eSJosef Bacik 				 struct btrfs_delayed_ref_head *head,
16025d4f98a2SYan Zheng 				 struct btrfs_delayed_extent_op *extent_op)
16035d4f98a2SYan Zheng {
160420b9a2d6SNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
160529cbcf40SJosef Bacik 	struct btrfs_root *root;
16065d4f98a2SYan Zheng 	struct btrfs_key key;
16075d4f98a2SYan Zheng 	struct btrfs_path *path;
16085d4f98a2SYan Zheng 	struct btrfs_extent_item *ei;
16095d4f98a2SYan Zheng 	struct extent_buffer *leaf;
16105d4f98a2SYan Zheng 	u32 item_size;
16115d4f98a2SYan Zheng 	int ret;
16125d4f98a2SYan Zheng 	int err = 0;
16130e3696f8SDavid Sterba 	int metadata = 1;
16145d4f98a2SYan Zheng 
1615bf31f87fSDavid Sterba 	if (TRANS_ABORTED(trans))
161679787eaaSJeff Mahoney 		return 0;
161779787eaaSJeff Mahoney 
16180e3696f8SDavid Sterba 	if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
16193173a18fSJosef Bacik 		metadata = 0;
16203173a18fSJosef Bacik 
16215d4f98a2SYan Zheng 	path = btrfs_alloc_path();
16225d4f98a2SYan Zheng 	if (!path)
16235d4f98a2SYan Zheng 		return -ENOMEM;
16245d4f98a2SYan Zheng 
1625d278850eSJosef Bacik 	key.objectid = head->bytenr;
16263173a18fSJosef Bacik 
16273173a18fSJosef Bacik 	if (metadata) {
16283173a18fSJosef Bacik 		key.type = BTRFS_METADATA_ITEM_KEY;
1629b1c79e09SJosef Bacik 		key.offset = extent_op->level;
16303173a18fSJosef Bacik 	} else {
16315d4f98a2SYan Zheng 		key.type = BTRFS_EXTENT_ITEM_KEY;
1632d278850eSJosef Bacik 		key.offset = head->num_bytes;
16333173a18fSJosef Bacik 	}
16345d4f98a2SYan Zheng 
163529cbcf40SJosef Bacik 	root = btrfs_extent_root(fs_info, key.objectid);
16363173a18fSJosef Bacik again:
163729cbcf40SJosef Bacik 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
16385d4f98a2SYan Zheng 	if (ret < 0) {
16395d4f98a2SYan Zheng 		err = ret;
16405d4f98a2SYan Zheng 		goto out;
16415d4f98a2SYan Zheng 	}
16425d4f98a2SYan Zheng 	if (ret > 0) {
16433173a18fSJosef Bacik 		if (metadata) {
164455994887SFilipe David Borba Manana 			if (path->slots[0] > 0) {
164555994887SFilipe David Borba Manana 				path->slots[0]--;
164655994887SFilipe David Borba Manana 				btrfs_item_key_to_cpu(path->nodes[0], &key,
164755994887SFilipe David Borba Manana 						      path->slots[0]);
1648d278850eSJosef Bacik 				if (key.objectid == head->bytenr &&
164955994887SFilipe David Borba Manana 				    key.type == BTRFS_EXTENT_ITEM_KEY &&
1650d278850eSJosef Bacik 				    key.offset == head->num_bytes)
165155994887SFilipe David Borba Manana 					ret = 0;
165255994887SFilipe David Borba Manana 			}
165355994887SFilipe David Borba Manana 			if (ret > 0) {
16543173a18fSJosef Bacik 				btrfs_release_path(path);
16553173a18fSJosef Bacik 				metadata = 0;
16563173a18fSJosef Bacik 
1657d278850eSJosef Bacik 				key.objectid = head->bytenr;
1658d278850eSJosef Bacik 				key.offset = head->num_bytes;
16593173a18fSJosef Bacik 				key.type = BTRFS_EXTENT_ITEM_KEY;
16603173a18fSJosef Bacik 				goto again;
16613173a18fSJosef Bacik 			}
166255994887SFilipe David Borba Manana 		} else {
16638ec0a4a5SFilipe Manana 			err = -EUCLEAN;
16648ec0a4a5SFilipe Manana 			btrfs_err(fs_info,
16658ec0a4a5SFilipe Manana 		  "missing extent item for extent %llu num_bytes %llu level %d",
16668ec0a4a5SFilipe Manana 				  head->bytenr, head->num_bytes, extent_op->level);
16675d4f98a2SYan Zheng 			goto out;
16685d4f98a2SYan Zheng 		}
166955994887SFilipe David Borba Manana 	}
16705d4f98a2SYan Zheng 
16715d4f98a2SYan Zheng 	leaf = path->nodes[0];
16723212fa14SJosef Bacik 	item_size = btrfs_item_size(leaf, path->slots[0]);
1673ba3c2b19SNikolay Borisov 
16746d8ff4e4SDavid Sterba 	if (unlikely(item_size < sizeof(*ei))) {
1675182741d2SQu Wenruo 		err = -EUCLEAN;
1676182741d2SQu Wenruo 		btrfs_err(fs_info,
1677182741d2SQu Wenruo 			  "unexpected extent item size, has %u expect >= %zu",
1678182741d2SQu Wenruo 			  item_size, sizeof(*ei));
1679ba3c2b19SNikolay Borisov 		btrfs_abort_transaction(trans, err);
1680ba3c2b19SNikolay Borisov 		goto out;
1681ba3c2b19SNikolay Borisov 	}
1682ba3c2b19SNikolay Borisov 
16835d4f98a2SYan Zheng 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
16845d4f98a2SYan Zheng 	__run_delayed_extent_op(extent_op, leaf, ei);
16855d4f98a2SYan Zheng 
1686d5e09e38SFilipe Manana 	btrfs_mark_buffer_dirty(trans, leaf);
16875d4f98a2SYan Zheng out:
16885d4f98a2SYan Zheng 	btrfs_free_path(path);
16895d4f98a2SYan Zheng 	return err;
16905d4f98a2SYan Zheng }
16915d4f98a2SYan Zheng 
run_delayed_tree_ref(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,bool insert_reserved)16925d4f98a2SYan Zheng static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
16935d4f98a2SYan Zheng 				struct btrfs_delayed_ref_node *node,
16945d4f98a2SYan Zheng 				struct btrfs_delayed_extent_op *extent_op,
169561c681feSFilipe Manana 				bool insert_reserved)
16965d4f98a2SYan Zheng {
16975d4f98a2SYan Zheng 	int ret = 0;
16985d4f98a2SYan Zheng 	struct btrfs_delayed_tree_ref *ref;
16995d4f98a2SYan Zheng 	u64 parent = 0;
17005d4f98a2SYan Zheng 	u64 ref_root = 0;
17015d4f98a2SYan Zheng 
17025d4f98a2SYan Zheng 	ref = btrfs_delayed_node_to_tree_ref(node);
1703f97806f2SNikolay Borisov 	trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1704599c75ecSLiu Bo 
17055d4f98a2SYan Zheng 	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
17065d4f98a2SYan Zheng 		parent = ref->parent;
17075d4f98a2SYan Zheng 	ref_root = ref->root;
17085d4f98a2SYan Zheng 
17091bf76df3SFilipe Manana 	if (unlikely(node->ref_mod != 1)) {
1710f97806f2SNikolay Borisov 		btrfs_err(trans->fs_info,
17111bf76df3SFilipe Manana 	"btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
171202794222SLiu Bo 			  node->bytenr, node->ref_mod, node->action, ref_root,
171302794222SLiu Bo 			  parent);
17141bf76df3SFilipe Manana 		return -EUCLEAN;
171502794222SLiu Bo 	}
17165d4f98a2SYan Zheng 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
17173173a18fSJosef Bacik 		BUG_ON(!extent_op || !extent_op->update_flags);
171821ebfbe7SNikolay Borisov 		ret = alloc_reserved_tree_block(trans, node, extent_op);
17195d4f98a2SYan Zheng 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
17202590d0f1SNikolay Borisov 		ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
17212590d0f1SNikolay Borisov 					     ref->level, 0, 1, extent_op);
17225d4f98a2SYan Zheng 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
1723e72cb923SNikolay Borisov 		ret = __btrfs_free_extent(trans, node, parent, ref_root,
1724c682f9b3SQu Wenruo 					  ref->level, 0, 1, extent_op);
17255d4f98a2SYan Zheng 	} else {
17265d4f98a2SYan Zheng 		BUG();
17275d4f98a2SYan Zheng 	}
17285d4f98a2SYan Zheng 	return ret;
17295d4f98a2SYan Zheng }
17305d4f98a2SYan Zheng 
17315d4f98a2SYan Zheng /* helper function to actually process a single delayed ref entry */
run_one_delayed_ref(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,bool insert_reserved)17325d4f98a2SYan Zheng static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
17335d4f98a2SYan Zheng 			       struct btrfs_delayed_ref_node *node,
17345d4f98a2SYan Zheng 			       struct btrfs_delayed_extent_op *extent_op,
173561c681feSFilipe Manana 			       bool insert_reserved)
173656bec294SChris Mason {
173779787eaaSJeff Mahoney 	int ret = 0;
173879787eaaSJeff Mahoney 
1739bf31f87fSDavid Sterba 	if (TRANS_ABORTED(trans)) {
1740857cc2fcSJosef Bacik 		if (insert_reserved)
1741b25c36f8SNikolay Borisov 			btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
174279787eaaSJeff Mahoney 		return 0;
1743857cc2fcSJosef Bacik 	}
174479787eaaSJeff Mahoney 
17455d4f98a2SYan Zheng 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
17465d4f98a2SYan Zheng 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1747f97806f2SNikolay Borisov 		ret = run_delayed_tree_ref(trans, node, extent_op,
17485d4f98a2SYan Zheng 					   insert_reserved);
17495d4f98a2SYan Zheng 	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
17505d4f98a2SYan Zheng 		 node->type == BTRFS_SHARED_DATA_REF_KEY)
17512bf98ef3SNikolay Borisov 		ret = run_delayed_data_ref(trans, node, extent_op,
17525d4f98a2SYan Zheng 					   insert_reserved);
17535d4f98a2SYan Zheng 	else
17545d4f98a2SYan Zheng 		BUG();
175580ee54bfSJosef Bacik 	if (ret && insert_reserved)
1756b25c36f8SNikolay Borisov 		btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
175739f501d6SQu Wenruo 	if (ret < 0)
175839f501d6SQu Wenruo 		btrfs_err(trans->fs_info,
175939f501d6SQu Wenruo "failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
176039f501d6SQu Wenruo 			  node->bytenr, node->num_bytes, node->type,
176139f501d6SQu Wenruo 			  node->action, node->ref_mod, ret);
17625d4f98a2SYan Zheng 	return ret;
1763e9d0b13bSChris Mason }
1764e9d0b13bSChris Mason 
1765c6fc2454SQu Wenruo static inline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head * head)176656bec294SChris Mason select_delayed_ref(struct btrfs_delayed_ref_head *head)
1767a28ec197SChris Mason {
1768cffc3374SFilipe Manana 	struct btrfs_delayed_ref_node *ref;
1769cffc3374SFilipe Manana 
1770e3d03965SLiu Bo 	if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1771c6fc2454SQu Wenruo 		return NULL;
1772d7df2c79SJosef Bacik 
1773cffc3374SFilipe Manana 	/*
1774cffc3374SFilipe Manana 	 * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
1775cffc3374SFilipe Manana 	 * This is to prevent a ref count from going down to zero, which deletes
1776cffc3374SFilipe Manana 	 * the extent item from the extent tree, when there still are references
1777cffc3374SFilipe Manana 	 * to add, which would fail because they would not find the extent item.
1778cffc3374SFilipe Manana 	 */
17791d57ee94SWang Xiaoguang 	if (!list_empty(&head->ref_add_list))
17801d57ee94SWang Xiaoguang 		return list_first_entry(&head->ref_add_list,
17811d57ee94SWang Xiaoguang 				struct btrfs_delayed_ref_node, add_list);
1782cffc3374SFilipe Manana 
1783e3d03965SLiu Bo 	ref = rb_entry(rb_first_cached(&head->ref_tree),
17840e0adbcfSJosef Bacik 		       struct btrfs_delayed_ref_node, ref_node);
17851d57ee94SWang Xiaoguang 	ASSERT(list_empty(&ref->add_list));
17861d57ee94SWang Xiaoguang 	return ref;
178756bec294SChris Mason }
178856bec294SChris Mason 
unselect_delayed_ref_head(struct btrfs_delayed_ref_root * delayed_refs,struct btrfs_delayed_ref_head * head)17892eadaa22SJosef Bacik static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
17902eadaa22SJosef Bacik 				      struct btrfs_delayed_ref_head *head)
17912eadaa22SJosef Bacik {
17922eadaa22SJosef Bacik 	spin_lock(&delayed_refs->lock);
179361c681feSFilipe Manana 	head->processing = false;
17942eadaa22SJosef Bacik 	delayed_refs->num_heads_ready++;
17952eadaa22SJosef Bacik 	spin_unlock(&delayed_refs->lock);
17962eadaa22SJosef Bacik 	btrfs_delayed_ref_unlock(head);
17972eadaa22SJosef Bacik }
17982eadaa22SJosef Bacik 
cleanup_extent_op(struct btrfs_delayed_ref_head * head)1799bedc6617SJosef Bacik static struct btrfs_delayed_extent_op *cleanup_extent_op(
1800b00e6250SJosef Bacik 				struct btrfs_delayed_ref_head *head)
1801b00e6250SJosef Bacik {
1802b00e6250SJosef Bacik 	struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1803bedc6617SJosef Bacik 
1804bedc6617SJosef Bacik 	if (!extent_op)
1805bedc6617SJosef Bacik 		return NULL;
1806bedc6617SJosef Bacik 
1807bedc6617SJosef Bacik 	if (head->must_insert_reserved) {
1808bedc6617SJosef Bacik 		head->extent_op = NULL;
1809bedc6617SJosef Bacik 		btrfs_free_delayed_extent_op(extent_op);
1810bedc6617SJosef Bacik 		return NULL;
1811bedc6617SJosef Bacik 	}
1812bedc6617SJosef Bacik 	return extent_op;
1813bedc6617SJosef Bacik }
1814bedc6617SJosef Bacik 
run_and_cleanup_extent_op(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_head * head)1815bedc6617SJosef Bacik static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1816bedc6617SJosef Bacik 				     struct btrfs_delayed_ref_head *head)
1817bedc6617SJosef Bacik {
1818bedc6617SJosef Bacik 	struct btrfs_delayed_extent_op *extent_op;
1819b00e6250SJosef Bacik 	int ret;
1820b00e6250SJosef Bacik 
1821bedc6617SJosef Bacik 	extent_op = cleanup_extent_op(head);
1822b00e6250SJosef Bacik 	if (!extent_op)
1823b00e6250SJosef Bacik 		return 0;
1824b00e6250SJosef Bacik 	head->extent_op = NULL;
1825b00e6250SJosef Bacik 	spin_unlock(&head->lock);
182620b9a2d6SNikolay Borisov 	ret = run_delayed_extent_op(trans, head, extent_op);
1827b00e6250SJosef Bacik 	btrfs_free_delayed_extent_op(extent_op);
1828b00e6250SJosef Bacik 	return ret ? ret : 1;
1829b00e6250SJosef Bacik }
1830b00e6250SJosef Bacik 
btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info * fs_info,struct btrfs_delayed_ref_root * delayed_refs,struct btrfs_delayed_ref_head * head)183131890da0SJosef Bacik void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
183231890da0SJosef Bacik 				  struct btrfs_delayed_ref_root *delayed_refs,
183307c47775SJosef Bacik 				  struct btrfs_delayed_ref_head *head)
183407c47775SJosef Bacik {
1835ba2c4d4eSJosef Bacik 	int nr_items = 1;	/* Dropping this ref head update. */
183607c47775SJosef Bacik 
1837ba2c4d4eSJosef Bacik 	/*
183881e75ac7SJosef Bacik 	 * We had csum deletions accounted for in our delayed refs rsv, we need
183981e75ac7SJosef Bacik 	 * to drop the csum leaves for this update from our delayed_refs_rsv.
1840ba2c4d4eSJosef Bacik 	 */
184181e75ac7SJosef Bacik 	if (head->total_ref_mod < 0 && head->is_data) {
184207c47775SJosef Bacik 		spin_lock(&delayed_refs->lock);
184307c47775SJosef Bacik 		delayed_refs->pending_csums -= head->num_bytes;
184407c47775SJosef Bacik 		spin_unlock(&delayed_refs->lock);
184581e75ac7SJosef Bacik 		nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
184607c47775SJosef Bacik 	}
184781e75ac7SJosef Bacik 
1848ba2c4d4eSJosef Bacik 	btrfs_delayed_refs_rsv_release(fs_info, nr_items);
184907c47775SJosef Bacik }
185007c47775SJosef Bacik 
cleanup_ref_head(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_head * head)1851194ab0bcSJosef Bacik static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1852194ab0bcSJosef Bacik 			    struct btrfs_delayed_ref_head *head)
1853194ab0bcSJosef Bacik {
1854f9871eddSNikolay Borisov 
1855f9871eddSNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
1856194ab0bcSJosef Bacik 	struct btrfs_delayed_ref_root *delayed_refs;
1857194ab0bcSJosef Bacik 	int ret;
1858194ab0bcSJosef Bacik 
1859194ab0bcSJosef Bacik 	delayed_refs = &trans->transaction->delayed_refs;
1860194ab0bcSJosef Bacik 
1861bedc6617SJosef Bacik 	ret = run_and_cleanup_extent_op(trans, head);
1862194ab0bcSJosef Bacik 	if (ret < 0) {
1863194ab0bcSJosef Bacik 		unselect_delayed_ref_head(delayed_refs, head);
1864194ab0bcSJosef Bacik 		btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1865194ab0bcSJosef Bacik 		return ret;
1866194ab0bcSJosef Bacik 	} else if (ret) {
1867194ab0bcSJosef Bacik 		return ret;
1868194ab0bcSJosef Bacik 	}
1869194ab0bcSJosef Bacik 
1870194ab0bcSJosef Bacik 	/*
1871194ab0bcSJosef Bacik 	 * Need to drop our head ref lock and re-acquire the delayed ref lock
1872194ab0bcSJosef Bacik 	 * and then re-check to make sure nobody got added.
1873194ab0bcSJosef Bacik 	 */
1874194ab0bcSJosef Bacik 	spin_unlock(&head->lock);
1875194ab0bcSJosef Bacik 	spin_lock(&delayed_refs->lock);
1876194ab0bcSJosef Bacik 	spin_lock(&head->lock);
1877e3d03965SLiu Bo 	if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1878194ab0bcSJosef Bacik 		spin_unlock(&head->lock);
1879194ab0bcSJosef Bacik 		spin_unlock(&delayed_refs->lock);
1880194ab0bcSJosef Bacik 		return 1;
1881194ab0bcSJosef Bacik 	}
1882d7baffdaSJosef Bacik 	btrfs_delete_ref_head(delayed_refs, head);
1883c1103f7aSJosef Bacik 	spin_unlock(&head->lock);
18841e7a1421SNikolay Borisov 	spin_unlock(&delayed_refs->lock);
1885c1103f7aSJosef Bacik 
1886c1103f7aSJosef Bacik 	if (head->must_insert_reserved) {
1887b25c36f8SNikolay Borisov 		btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
1888c1103f7aSJosef Bacik 		if (head->is_data) {
1889fc28b25eSJosef Bacik 			struct btrfs_root *csum_root;
1890fc28b25eSJosef Bacik 
1891fc28b25eSJosef Bacik 			csum_root = btrfs_csum_root(fs_info, head->bytenr);
1892fc28b25eSJosef Bacik 			ret = btrfs_del_csums(trans, csum_root, head->bytenr,
1893fc28b25eSJosef Bacik 					      head->num_bytes);
1894c1103f7aSJosef Bacik 		}
1895c1103f7aSJosef Bacik 	}
1896c1103f7aSJosef Bacik 
189731890da0SJosef Bacik 	btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
189807c47775SJosef Bacik 
189907c47775SJosef Bacik 	trace_run_delayed_ref_head(fs_info, head, 0);
1900c1103f7aSJosef Bacik 	btrfs_delayed_ref_unlock(head);
1901d278850eSJosef Bacik 	btrfs_put_delayed_ref_head(head);
1902856bd270SJosef Bacik 	return ret;
1903194ab0bcSJosef Bacik }
1904194ab0bcSJosef Bacik 
btrfs_obtain_ref_head(struct btrfs_trans_handle * trans)1905b1cdbcb5SNikolay Borisov static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1906b1cdbcb5SNikolay Borisov 					struct btrfs_trans_handle *trans)
1907b1cdbcb5SNikolay Borisov {
1908b1cdbcb5SNikolay Borisov 	struct btrfs_delayed_ref_root *delayed_refs =
1909b1cdbcb5SNikolay Borisov 		&trans->transaction->delayed_refs;
1910b1cdbcb5SNikolay Borisov 	struct btrfs_delayed_ref_head *head = NULL;
1911b1cdbcb5SNikolay Borisov 	int ret;
1912b1cdbcb5SNikolay Borisov 
1913b1cdbcb5SNikolay Borisov 	spin_lock(&delayed_refs->lock);
19145637c74bSLu Fengqi 	head = btrfs_select_ref_head(delayed_refs);
1915b1cdbcb5SNikolay Borisov 	if (!head) {
1916b1cdbcb5SNikolay Borisov 		spin_unlock(&delayed_refs->lock);
1917b1cdbcb5SNikolay Borisov 		return head;
1918b1cdbcb5SNikolay Borisov 	}
1919b1cdbcb5SNikolay Borisov 
1920b1cdbcb5SNikolay Borisov 	/*
1921b1cdbcb5SNikolay Borisov 	 * Grab the lock that says we are going to process all the refs for
1922b1cdbcb5SNikolay Borisov 	 * this head
1923b1cdbcb5SNikolay Borisov 	 */
19249e920a6fSLu Fengqi 	ret = btrfs_delayed_ref_lock(delayed_refs, head);
1925b1cdbcb5SNikolay Borisov 	spin_unlock(&delayed_refs->lock);
1926b1cdbcb5SNikolay Borisov 
1927b1cdbcb5SNikolay Borisov 	/*
1928b1cdbcb5SNikolay Borisov 	 * We may have dropped the spin lock to get the head mutex lock, and
1929b1cdbcb5SNikolay Borisov 	 * that might have given someone else time to free the head.  If that's
1930b1cdbcb5SNikolay Borisov 	 * true, it has been removed from our list and we can move on.
1931b1cdbcb5SNikolay Borisov 	 */
1932b1cdbcb5SNikolay Borisov 	if (ret == -EAGAIN)
1933b1cdbcb5SNikolay Borisov 		head = ERR_PTR(-EAGAIN);
1934b1cdbcb5SNikolay Borisov 
1935b1cdbcb5SNikolay Borisov 	return head;
1936b1cdbcb5SNikolay Borisov }
1937b1cdbcb5SNikolay Borisov 
btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_head * locked_ref)1938e7261386SNikolay Borisov static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1939a8fdc051SFilipe Manana 					   struct btrfs_delayed_ref_head *locked_ref)
1940e7261386SNikolay Borisov {
1941e7261386SNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
1942e7261386SNikolay Borisov 	struct btrfs_delayed_ref_root *delayed_refs;
1943e7261386SNikolay Borisov 	struct btrfs_delayed_extent_op *extent_op;
1944e7261386SNikolay Borisov 	struct btrfs_delayed_ref_node *ref;
194561c681feSFilipe Manana 	bool must_insert_reserved;
1946e7261386SNikolay Borisov 	int ret;
1947e7261386SNikolay Borisov 
1948e7261386SNikolay Borisov 	delayed_refs = &trans->transaction->delayed_refs;
1949e7261386SNikolay Borisov 
19500110a4c4SNikolay Borisov 	lockdep_assert_held(&locked_ref->mutex);
19510110a4c4SNikolay Borisov 	lockdep_assert_held(&locked_ref->lock);
19520110a4c4SNikolay Borisov 
1953e7261386SNikolay Borisov 	while ((ref = select_delayed_ref(locked_ref))) {
1954e7261386SNikolay Borisov 		if (ref->seq &&
1955e7261386SNikolay Borisov 		    btrfs_check_delayed_seq(fs_info, ref->seq)) {
1956e7261386SNikolay Borisov 			spin_unlock(&locked_ref->lock);
1957e7261386SNikolay Borisov 			unselect_delayed_ref_head(delayed_refs, locked_ref);
1958e7261386SNikolay Borisov 			return -EAGAIN;
1959e7261386SNikolay Borisov 		}
1960e7261386SNikolay Borisov 
1961e7261386SNikolay Borisov 		rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1962e7261386SNikolay Borisov 		RB_CLEAR_NODE(&ref->ref_node);
1963e7261386SNikolay Borisov 		if (!list_empty(&ref->add_list))
1964e7261386SNikolay Borisov 			list_del(&ref->add_list);
1965e7261386SNikolay Borisov 		/*
1966e7261386SNikolay Borisov 		 * When we play the delayed ref, also correct the ref_mod on
1967e7261386SNikolay Borisov 		 * head
1968e7261386SNikolay Borisov 		 */
1969e7261386SNikolay Borisov 		switch (ref->action) {
1970e7261386SNikolay Borisov 		case BTRFS_ADD_DELAYED_REF:
1971e7261386SNikolay Borisov 		case BTRFS_ADD_DELAYED_EXTENT:
1972e7261386SNikolay Borisov 			locked_ref->ref_mod -= ref->ref_mod;
1973e7261386SNikolay Borisov 			break;
1974e7261386SNikolay Borisov 		case BTRFS_DROP_DELAYED_REF:
1975e7261386SNikolay Borisov 			locked_ref->ref_mod += ref->ref_mod;
1976e7261386SNikolay Borisov 			break;
1977e7261386SNikolay Borisov 		default:
1978e7261386SNikolay Borisov 			WARN_ON(1);
1979e7261386SNikolay Borisov 		}
1980e7261386SNikolay Borisov 		atomic_dec(&delayed_refs->num_entries);
1981e7261386SNikolay Borisov 
1982e7261386SNikolay Borisov 		/*
1983e7261386SNikolay Borisov 		 * Record the must_insert_reserved flag before we drop the
1984e7261386SNikolay Borisov 		 * spin lock.
1985e7261386SNikolay Borisov 		 */
1986e7261386SNikolay Borisov 		must_insert_reserved = locked_ref->must_insert_reserved;
198761c681feSFilipe Manana 		locked_ref->must_insert_reserved = false;
1988e7261386SNikolay Borisov 
1989e7261386SNikolay Borisov 		extent_op = locked_ref->extent_op;
1990e7261386SNikolay Borisov 		locked_ref->extent_op = NULL;
1991e7261386SNikolay Borisov 		spin_unlock(&locked_ref->lock);
1992e7261386SNikolay Borisov 
1993e7261386SNikolay Borisov 		ret = run_one_delayed_ref(trans, ref, extent_op,
1994e7261386SNikolay Borisov 					  must_insert_reserved);
1995e7261386SNikolay Borisov 
1996e7261386SNikolay Borisov 		btrfs_free_delayed_extent_op(extent_op);
1997e7261386SNikolay Borisov 		if (ret) {
1998e7261386SNikolay Borisov 			unselect_delayed_ref_head(delayed_refs, locked_ref);
1999e7261386SNikolay Borisov 			btrfs_put_delayed_ref(ref);
2000e7261386SNikolay Borisov 			return ret;
2001e7261386SNikolay Borisov 		}
2002e7261386SNikolay Borisov 
2003e7261386SNikolay Borisov 		btrfs_put_delayed_ref(ref);
2004e7261386SNikolay Borisov 		cond_resched();
2005e7261386SNikolay Borisov 
2006e7261386SNikolay Borisov 		spin_lock(&locked_ref->lock);
20070c555c97SJohannes Thumshirn 		btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
2008e7261386SNikolay Borisov 	}
2009e7261386SNikolay Borisov 
2010e7261386SNikolay Borisov 	return 0;
2011e7261386SNikolay Borisov }
2012e7261386SNikolay Borisov 
201379787eaaSJeff Mahoney /*
201479787eaaSJeff Mahoney  * Returns 0 on success or if called with an already aborted transaction.
201579787eaaSJeff Mahoney  * Returns -ENOMEM or -EIO on failure and will abort the transaction.
201679787eaaSJeff Mahoney  */
__btrfs_run_delayed_refs(struct btrfs_trans_handle * trans,unsigned long nr)2017d7df2c79SJosef Bacik static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2018d7df2c79SJosef Bacik 					     unsigned long nr)
201956bec294SChris Mason {
20200a1e458aSNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
202156bec294SChris Mason 	struct btrfs_delayed_ref_root *delayed_refs;
202256bec294SChris Mason 	struct btrfs_delayed_ref_head *locked_ref = NULL;
202356bec294SChris Mason 	int ret;
2024d7df2c79SJosef Bacik 	unsigned long count = 0;
202556bec294SChris Mason 
202656bec294SChris Mason 	delayed_refs = &trans->transaction->delayed_refs;
20270110a4c4SNikolay Borisov 	do {
202856bec294SChris Mason 		if (!locked_ref) {
2029b1cdbcb5SNikolay Borisov 			locked_ref = btrfs_obtain_ref_head(trans);
20300110a4c4SNikolay Borisov 			if (IS_ERR_OR_NULL(locked_ref)) {
20310110a4c4SNikolay Borisov 				if (PTR_ERR(locked_ref) == -EAGAIN) {
2032c3e69d58SChris Mason 					continue;
20330110a4c4SNikolay Borisov 				} else {
20340110a4c4SNikolay Borisov 					break;
203556bec294SChris Mason 				}
203656bec294SChris Mason 			}
20370110a4c4SNikolay Borisov 			count++;
20380110a4c4SNikolay Borisov 		}
20392c3cf7d5SFilipe Manana 		/*
20402c3cf7d5SFilipe Manana 		 * We need to try and merge add/drops of the same ref since we
20412c3cf7d5SFilipe Manana 		 * can run into issues with relocate dropping the implicit ref
20422c3cf7d5SFilipe Manana 		 * and then it being added back again before the drop can
20432c3cf7d5SFilipe Manana 		 * finish.  If we merged anything we need to re-loop so we can
20442c3cf7d5SFilipe Manana 		 * get a good ref.
20452c3cf7d5SFilipe Manana 		 * Or we can get node references of the same type that weren't
20462c3cf7d5SFilipe Manana 		 * merged when created due to bumps in the tree mod seq, and
20472c3cf7d5SFilipe Manana 		 * we need to merge them to prevent adding an inline extent
20482c3cf7d5SFilipe Manana 		 * backref before dropping it (triggering a BUG_ON at
20492c3cf7d5SFilipe Manana 		 * insert_inline_extent_backref()).
20502c3cf7d5SFilipe Manana 		 */
2051d7df2c79SJosef Bacik 		spin_lock(&locked_ref->lock);
20520c555c97SJohannes Thumshirn 		btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
2053ae1e206bSJosef Bacik 
2054a8fdc051SFilipe Manana 		ret = btrfs_run_delayed_refs_for_head(trans, locked_ref);
20550110a4c4SNikolay Borisov 		if (ret < 0 && ret != -EAGAIN) {
2056c1103f7aSJosef Bacik 			/*
20570110a4c4SNikolay Borisov 			 * Error, btrfs_run_delayed_refs_for_head already
20580110a4c4SNikolay Borisov 			 * unlocked everything so just bail out
2059c1103f7aSJosef Bacik 			 */
20600110a4c4SNikolay Borisov 			return ret;
20610110a4c4SNikolay Borisov 		} else if (!ret) {
20620110a4c4SNikolay Borisov 			/*
20630110a4c4SNikolay Borisov 			 * Success, perform the usual cleanup of a processed
20640110a4c4SNikolay Borisov 			 * head
20650110a4c4SNikolay Borisov 			 */
2066f9871eddSNikolay Borisov 			ret = cleanup_ref_head(trans, locked_ref);
2067194ab0bcSJosef Bacik 			if (ret > 0 ) {
2068b00e6250SJosef Bacik 				/* We dropped our lock, we need to loop. */
2069b00e6250SJosef Bacik 				ret = 0;
2070d7df2c79SJosef Bacik 				continue;
2071194ab0bcSJosef Bacik 			} else if (ret) {
2072194ab0bcSJosef Bacik 				return ret;
207356bec294SChris Mason 			}
20740110a4c4SNikolay Borisov 		}
20750110a4c4SNikolay Borisov 
20760110a4c4SNikolay Borisov 		/*
20770110a4c4SNikolay Borisov 		 * Either success case or btrfs_run_delayed_refs_for_head
20780110a4c4SNikolay Borisov 		 * returned -EAGAIN, meaning we need to select another head
20790110a4c4SNikolay Borisov 		 */
20800110a4c4SNikolay Borisov 
2081c1103f7aSJosef Bacik 		locked_ref = NULL;
20821887be66SChris Mason 		cond_resched();
20830110a4c4SNikolay Borisov 	} while ((nr != -1 && count < nr) || locked_ref);
20840a2b2a84SJosef Bacik 
2085d7df2c79SJosef Bacik 	return 0;
208656bec294SChris Mason }
208756bec294SChris Mason 
2088709c0486SArne Jansen #ifdef SCRAMBLE_DELAYED_REFS
2089709c0486SArne Jansen /*
2090709c0486SArne Jansen  * Normally delayed refs get processed in ascending bytenr order. This
2091709c0486SArne Jansen  * correlates in most cases to the order added. To expose dependencies on this
2092709c0486SArne Jansen  * order, we start to process the tree in the middle instead of the beginning
2093709c0486SArne Jansen  */
find_middle(struct rb_root * root)2094709c0486SArne Jansen static u64 find_middle(struct rb_root *root)
2095709c0486SArne Jansen {
2096709c0486SArne Jansen 	struct rb_node *n = root->rb_node;
2097709c0486SArne Jansen 	struct btrfs_delayed_ref_node *entry;
2098709c0486SArne Jansen 	int alt = 1;
2099709c0486SArne Jansen 	u64 middle;
2100709c0486SArne Jansen 	u64 first = 0, last = 0;
2101709c0486SArne Jansen 
2102709c0486SArne Jansen 	n = rb_first(root);
2103709c0486SArne Jansen 	if (n) {
2104709c0486SArne Jansen 		entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2105709c0486SArne Jansen 		first = entry->bytenr;
2106709c0486SArne Jansen 	}
2107709c0486SArne Jansen 	n = rb_last(root);
2108709c0486SArne Jansen 	if (n) {
2109709c0486SArne Jansen 		entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2110709c0486SArne Jansen 		last = entry->bytenr;
2111709c0486SArne Jansen 	}
2112709c0486SArne Jansen 	n = root->rb_node;
2113709c0486SArne Jansen 
2114709c0486SArne Jansen 	while (n) {
2115709c0486SArne Jansen 		entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2116709c0486SArne Jansen 		WARN_ON(!entry->in_tree);
2117709c0486SArne Jansen 
2118709c0486SArne Jansen 		middle = entry->bytenr;
2119709c0486SArne Jansen 
2120709c0486SArne Jansen 		if (alt)
2121709c0486SArne Jansen 			n = n->rb_left;
2122709c0486SArne Jansen 		else
2123709c0486SArne Jansen 			n = n->rb_right;
2124709c0486SArne Jansen 
2125709c0486SArne Jansen 		alt = 1 - alt;
2126709c0486SArne Jansen 	}
2127709c0486SArne Jansen 	return middle;
2128709c0486SArne Jansen }
2129709c0486SArne Jansen #endif
2130709c0486SArne Jansen 
21311262133bSJosef Bacik /*
2132c3e69d58SChris Mason  * this starts processing the delayed reference count updates and
2133c3e69d58SChris Mason  * extent insertions we have queued up so far.  count can be
2134c3e69d58SChris Mason  * 0, which means to process everything in the tree at the start
2135c3e69d58SChris Mason  * of the run (but not newly added entries), or it can be some target
2136c3e69d58SChris Mason  * number you'd like to process.
213779787eaaSJeff Mahoney  *
213879787eaaSJeff Mahoney  * Returns 0 on success or if called with an aborted transaction
213979787eaaSJeff Mahoney  * Returns <0 on error and aborts the transaction
2140c3e69d58SChris Mason  */
btrfs_run_delayed_refs(struct btrfs_trans_handle * trans,unsigned long count)2141c3e69d58SChris Mason int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2142c79a70b1SNikolay Borisov 			   unsigned long count)
2143c3e69d58SChris Mason {
2144c79a70b1SNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
2145c3e69d58SChris Mason 	struct rb_node *node;
2146c3e69d58SChris Mason 	struct btrfs_delayed_ref_root *delayed_refs;
2147c46effa6SLiu Bo 	struct btrfs_delayed_ref_head *head;
2148c3e69d58SChris Mason 	int ret;
2149c3e69d58SChris Mason 	int run_all = count == (unsigned long)-1;
2150c3e69d58SChris Mason 
215179787eaaSJeff Mahoney 	/* We'll clean this up in btrfs_cleanup_transaction */
2152bf31f87fSDavid Sterba 	if (TRANS_ABORTED(trans))
215379787eaaSJeff Mahoney 		return 0;
215479787eaaSJeff Mahoney 
21550b246afaSJeff Mahoney 	if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2156511711afSChris Mason 		return 0;
2157511711afSChris Mason 
2158c3e69d58SChris Mason 	delayed_refs = &trans->transaction->delayed_refs;
215926455d33SLiu Bo 	if (count == 0)
216061a56a99SJosef Bacik 		count = delayed_refs->num_heads_ready;
2161bb721703SChris Mason 
2162c3e69d58SChris Mason again:
2163709c0486SArne Jansen #ifdef SCRAMBLE_DELAYED_REFS
2164709c0486SArne Jansen 	delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2165709c0486SArne Jansen #endif
21660a1e458aSNikolay Borisov 	ret = __btrfs_run_delayed_refs(trans, count);
216779787eaaSJeff Mahoney 	if (ret < 0) {
216866642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
216979787eaaSJeff Mahoney 		return ret;
217079787eaaSJeff Mahoney 	}
2171c3e69d58SChris Mason 
2172c3e69d58SChris Mason 	if (run_all) {
21736c686b35SNikolay Borisov 		btrfs_create_pending_block_groups(trans);
2174ea658badSJosef Bacik 
2175d7df2c79SJosef Bacik 		spin_lock(&delayed_refs->lock);
21765c9d028bSLiu Bo 		node = rb_first_cached(&delayed_refs->href_root);
2177d7df2c79SJosef Bacik 		if (!node) {
2178d7df2c79SJosef Bacik 			spin_unlock(&delayed_refs->lock);
2179c3e69d58SChris Mason 			goto out;
2180d7df2c79SJosef Bacik 		}
2181c46effa6SLiu Bo 		head = rb_entry(node, struct btrfs_delayed_ref_head,
2182c46effa6SLiu Bo 				href_node);
2183d278850eSJosef Bacik 		refcount_inc(&head->refs);
218456bec294SChris Mason 		spin_unlock(&delayed_refs->lock);
2185d278850eSJosef Bacik 
2186d278850eSJosef Bacik 		/* Mutex was contended, block until it's released and retry. */
218756bec294SChris Mason 		mutex_lock(&head->mutex);
218856bec294SChris Mason 		mutex_unlock(&head->mutex);
218956bec294SChris Mason 
2190d278850eSJosef Bacik 		btrfs_put_delayed_ref_head(head);
2191d7df2c79SJosef Bacik 		cond_resched();
219256bec294SChris Mason 		goto again;
219356bec294SChris Mason 	}
219454aa1f4dSChris Mason out:
2195a28ec197SChris Mason 	return 0;
2196a28ec197SChris Mason }
2197a28ec197SChris Mason 
btrfs_set_disk_extent_flags(struct btrfs_trans_handle * trans,struct extent_buffer * eb,u64 flags)21985d4f98a2SYan Zheng int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
21994aec05faSJosef Bacik 				struct extent_buffer *eb, u64 flags)
22005d4f98a2SYan Zheng {
22015d4f98a2SYan Zheng 	struct btrfs_delayed_extent_op *extent_op;
22024aec05faSJosef Bacik 	int level = btrfs_header_level(eb);
22035d4f98a2SYan Zheng 	int ret;
22045d4f98a2SYan Zheng 
220578a6184aSMiao Xie 	extent_op = btrfs_alloc_delayed_extent_op();
22065d4f98a2SYan Zheng 	if (!extent_op)
22075d4f98a2SYan Zheng 		return -ENOMEM;
22085d4f98a2SYan Zheng 
22095d4f98a2SYan Zheng 	extent_op->flags_to_set = flags;
221035b3ad50SDavid Sterba 	extent_op->update_flags = true;
221135b3ad50SDavid Sterba 	extent_op->update_key = false;
2212b1c79e09SJosef Bacik 	extent_op->level = level;
22135d4f98a2SYan Zheng 
221442c9d0b5SDavid Sterba 	ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
22155d4f98a2SYan Zheng 	if (ret)
221678a6184aSMiao Xie 		btrfs_free_delayed_extent_op(extent_op);
22175d4f98a2SYan Zheng 	return ret;
22185d4f98a2SYan Zheng }
22195d4f98a2SYan Zheng 
check_delayed_ref(struct btrfs_root * root,struct btrfs_path * path,u64 objectid,u64 offset,u64 bytenr)2220e4c3b2dcSLiu Bo static noinline int check_delayed_ref(struct btrfs_root *root,
22215d4f98a2SYan Zheng 				      struct btrfs_path *path,
22225d4f98a2SYan Zheng 				      u64 objectid, u64 offset, u64 bytenr)
22235d4f98a2SYan Zheng {
22245d4f98a2SYan Zheng 	struct btrfs_delayed_ref_head *head;
22255d4f98a2SYan Zheng 	struct btrfs_delayed_ref_node *ref;
22265d4f98a2SYan Zheng 	struct btrfs_delayed_data_ref *data_ref;
22275d4f98a2SYan Zheng 	struct btrfs_delayed_ref_root *delayed_refs;
2228e4c3b2dcSLiu Bo 	struct btrfs_transaction *cur_trans;
22290e0adbcfSJosef Bacik 	struct rb_node *node;
22305d4f98a2SYan Zheng 	int ret = 0;
22315d4f98a2SYan Zheng 
2232998ac6d2Sethanwu 	spin_lock(&root->fs_info->trans_lock);
2233e4c3b2dcSLiu Bo 	cur_trans = root->fs_info->running_transaction;
2234998ac6d2Sethanwu 	if (cur_trans)
2235998ac6d2Sethanwu 		refcount_inc(&cur_trans->use_count);
2236998ac6d2Sethanwu 	spin_unlock(&root->fs_info->trans_lock);
2237e4c3b2dcSLiu Bo 	if (!cur_trans)
2238e4c3b2dcSLiu Bo 		return 0;
2239e4c3b2dcSLiu Bo 
2240e4c3b2dcSLiu Bo 	delayed_refs = &cur_trans->delayed_refs;
22415d4f98a2SYan Zheng 	spin_lock(&delayed_refs->lock);
2242f72ad18eSLiu Bo 	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2243d7df2c79SJosef Bacik 	if (!head) {
2244d7df2c79SJosef Bacik 		spin_unlock(&delayed_refs->lock);
2245998ac6d2Sethanwu 		btrfs_put_transaction(cur_trans);
2246d7df2c79SJosef Bacik 		return 0;
2247d7df2c79SJosef Bacik 	}
22485d4f98a2SYan Zheng 
22495d4f98a2SYan Zheng 	if (!mutex_trylock(&head->mutex)) {
225026ce9114SJosef Bacik 		if (path->nowait) {
225126ce9114SJosef Bacik 			spin_unlock(&delayed_refs->lock);
225226ce9114SJosef Bacik 			btrfs_put_transaction(cur_trans);
225326ce9114SJosef Bacik 			return -EAGAIN;
225426ce9114SJosef Bacik 		}
225526ce9114SJosef Bacik 
2256d278850eSJosef Bacik 		refcount_inc(&head->refs);
22575d4f98a2SYan Zheng 		spin_unlock(&delayed_refs->lock);
22585d4f98a2SYan Zheng 
2259b3b4aa74SDavid Sterba 		btrfs_release_path(path);
22605d4f98a2SYan Zheng 
22618cc33e5cSDavid Sterba 		/*
22628cc33e5cSDavid Sterba 		 * Mutex was contended, block until it's released and let
22638cc33e5cSDavid Sterba 		 * caller try again
22648cc33e5cSDavid Sterba 		 */
22655d4f98a2SYan Zheng 		mutex_lock(&head->mutex);
22665d4f98a2SYan Zheng 		mutex_unlock(&head->mutex);
2267d278850eSJosef Bacik 		btrfs_put_delayed_ref_head(head);
2268998ac6d2Sethanwu 		btrfs_put_transaction(cur_trans);
22695d4f98a2SYan Zheng 		return -EAGAIN;
22705d4f98a2SYan Zheng 	}
2271d7df2c79SJosef Bacik 	spin_unlock(&delayed_refs->lock);
22725d4f98a2SYan Zheng 
2273d7df2c79SJosef Bacik 	spin_lock(&head->lock);
22740e0adbcfSJosef Bacik 	/*
22750e0adbcfSJosef Bacik 	 * XXX: We should replace this with a proper search function in the
22760e0adbcfSJosef Bacik 	 * future.
22770e0adbcfSJosef Bacik 	 */
2278e3d03965SLiu Bo 	for (node = rb_first_cached(&head->ref_tree); node;
2279e3d03965SLiu Bo 	     node = rb_next(node)) {
22800e0adbcfSJosef Bacik 		ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2281d7df2c79SJosef Bacik 		/* If it's a shared ref we know a cross reference exists */
2282d7df2c79SJosef Bacik 		if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
22835d4f98a2SYan Zheng 			ret = 1;
2284d7df2c79SJosef Bacik 			break;
2285d7df2c79SJosef Bacik 		}
22865d4f98a2SYan Zheng 
22875d4f98a2SYan Zheng 		data_ref = btrfs_delayed_node_to_data_ref(ref);
22885d4f98a2SYan Zheng 
2289d7df2c79SJosef Bacik 		/*
2290d7df2c79SJosef Bacik 		 * If our ref doesn't match the one we're currently looking at
2291d7df2c79SJosef Bacik 		 * then we have a cross reference.
2292d7df2c79SJosef Bacik 		 */
22935d4f98a2SYan Zheng 		if (data_ref->root != root->root_key.objectid ||
2294d7df2c79SJosef Bacik 		    data_ref->objectid != objectid ||
2295d7df2c79SJosef Bacik 		    data_ref->offset != offset) {
2296d7df2c79SJosef Bacik 			ret = 1;
2297d7df2c79SJosef Bacik 			break;
2298d7df2c79SJosef Bacik 		}
2299d7df2c79SJosef Bacik 	}
2300d7df2c79SJosef Bacik 	spin_unlock(&head->lock);
23015d4f98a2SYan Zheng 	mutex_unlock(&head->mutex);
2302998ac6d2Sethanwu 	btrfs_put_transaction(cur_trans);
23035d4f98a2SYan Zheng 	return ret;
23045d4f98a2SYan Zheng }
23055d4f98a2SYan Zheng 
check_committed_ref(struct btrfs_root * root,struct btrfs_path * path,u64 objectid,u64 offset,u64 bytenr,bool strict)2306e4c3b2dcSLiu Bo static noinline int check_committed_ref(struct btrfs_root *root,
23075d4f98a2SYan Zheng 					struct btrfs_path *path,
2308a84d5d42SBoris Burkov 					u64 objectid, u64 offset, u64 bytenr,
2309a84d5d42SBoris Burkov 					bool strict)
2310be20aa9dSChris Mason {
23110b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
231229cbcf40SJosef Bacik 	struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
2313f321e491SYan Zheng 	struct extent_buffer *leaf;
23145d4f98a2SYan Zheng 	struct btrfs_extent_data_ref *ref;
23155d4f98a2SYan Zheng 	struct btrfs_extent_inline_ref *iref;
23165d4f98a2SYan Zheng 	struct btrfs_extent_item *ei;
2317be20aa9dSChris Mason 	struct btrfs_key key;
23185d4f98a2SYan Zheng 	u32 item_size;
23193de28d57SLiu Bo 	int type;
2320f321e491SYan Zheng 	int ret;
2321be20aa9dSChris Mason 
2322be20aa9dSChris Mason 	key.objectid = bytenr;
232331840ae1SZheng Yan 	key.offset = (u64)-1;
2324f321e491SYan Zheng 	key.type = BTRFS_EXTENT_ITEM_KEY;
2325be20aa9dSChris Mason 
2326be20aa9dSChris Mason 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2327be20aa9dSChris Mason 	if (ret < 0)
2328be20aa9dSChris Mason 		goto out;
232979787eaaSJeff Mahoney 	BUG_ON(ret == 0); /* Corruption */
233080ff3856SYan Zheng 
233180ff3856SYan Zheng 	ret = -ENOENT;
233280ff3856SYan Zheng 	if (path->slots[0] == 0)
233331840ae1SZheng Yan 		goto out;
2334be20aa9dSChris Mason 
233531840ae1SZheng Yan 	path->slots[0]--;
2336f321e491SYan Zheng 	leaf = path->nodes[0];
23375d4f98a2SYan Zheng 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2338be20aa9dSChris Mason 
23395d4f98a2SYan Zheng 	if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2340be20aa9dSChris Mason 		goto out;
2341be20aa9dSChris Mason 
234280ff3856SYan Zheng 	ret = 1;
23433212fa14SJosef Bacik 	item_size = btrfs_item_size(leaf, path->slots[0]);
23445d4f98a2SYan Zheng 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
23455d4f98a2SYan Zheng 
2346a6bd9cd1SNikolay Borisov 	/* If extent item has more than 1 inline ref then it's shared */
23475d4f98a2SYan Zheng 	if (item_size != sizeof(*ei) +
23485d4f98a2SYan Zheng 	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
23495d4f98a2SYan Zheng 		goto out;
23505d4f98a2SYan Zheng 
2351a84d5d42SBoris Burkov 	/*
2352a84d5d42SBoris Burkov 	 * If extent created before last snapshot => it's shared unless the
2353a84d5d42SBoris Burkov 	 * snapshot has been deleted. Use the heuristic if strict is false.
2354a84d5d42SBoris Burkov 	 */
2355a84d5d42SBoris Burkov 	if (!strict &&
2356a84d5d42SBoris Burkov 	    (btrfs_extent_generation(leaf, ei) <=
2357a84d5d42SBoris Burkov 	     btrfs_root_last_snapshot(&root->root_item)))
23585d4f98a2SYan Zheng 		goto out;
23595d4f98a2SYan Zheng 
23605d4f98a2SYan Zheng 	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
23613de28d57SLiu Bo 
2362a6bd9cd1SNikolay Borisov 	/* If this extent has SHARED_DATA_REF then it's shared */
23633de28d57SLiu Bo 	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
23643de28d57SLiu Bo 	if (type != BTRFS_EXTENT_DATA_REF_KEY)
23655d4f98a2SYan Zheng 		goto out;
23665d4f98a2SYan Zheng 
23675d4f98a2SYan Zheng 	ref = (struct btrfs_extent_data_ref *)(&iref->offset);
23685d4f98a2SYan Zheng 	if (btrfs_extent_refs(leaf, ei) !=
23695d4f98a2SYan Zheng 	    btrfs_extent_data_ref_count(leaf, ref) ||
23705d4f98a2SYan Zheng 	    btrfs_extent_data_ref_root(leaf, ref) !=
23715d4f98a2SYan Zheng 	    root->root_key.objectid ||
23725d4f98a2SYan Zheng 	    btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
23735d4f98a2SYan Zheng 	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
23745d4f98a2SYan Zheng 		goto out;
23755d4f98a2SYan Zheng 
23765d4f98a2SYan Zheng 	ret = 0;
23775d4f98a2SYan Zheng out:
23785d4f98a2SYan Zheng 	return ret;
23795d4f98a2SYan Zheng }
23805d4f98a2SYan Zheng 
btrfs_cross_ref_exist(struct btrfs_root * root,u64 objectid,u64 offset,u64 bytenr,bool strict,struct btrfs_path * path)2381e4c3b2dcSLiu Bo int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
23821a89f173SFilipe Manana 			  u64 bytenr, bool strict, struct btrfs_path *path)
23835d4f98a2SYan Zheng {
23845d4f98a2SYan Zheng 	int ret;
23855d4f98a2SYan Zheng 
23865d4f98a2SYan Zheng 	do {
2387e4c3b2dcSLiu Bo 		ret = check_committed_ref(root, path, objectid,
2388a84d5d42SBoris Burkov 					  offset, bytenr, strict);
23895d4f98a2SYan Zheng 		if (ret && ret != -ENOENT)
23905d4f98a2SYan Zheng 			goto out;
23915d4f98a2SYan Zheng 
2392380fd066SMisono Tomohiro 		ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2393380fd066SMisono Tomohiro 	} while (ret == -EAGAIN);
23945d4f98a2SYan Zheng 
2395be20aa9dSChris Mason out:
23961a89f173SFilipe Manana 	btrfs_release_path(path);
239737f00a6dSJohannes Thumshirn 	if (btrfs_is_data_reloc_root(root))
2398f0486c68SYan, Zheng 		WARN_ON(ret > 0);
2399f321e491SYan Zheng 	return ret;
2400f321e491SYan Zheng }
2401f321e491SYan Zheng 
__btrfs_mod_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref,int inc)24025d4f98a2SYan Zheng static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2403b7a9f29fSChris Mason 			   struct btrfs_root *root,
24045d4f98a2SYan Zheng 			   struct extent_buffer *buf,
2405e339a6b0SJosef Bacik 			   int full_backref, int inc)
240631840ae1SZheng Yan {
24070b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
240831840ae1SZheng Yan 	u64 bytenr;
24095d4f98a2SYan Zheng 	u64 num_bytes;
24105d4f98a2SYan Zheng 	u64 parent;
241131840ae1SZheng Yan 	u64 ref_root;
241231840ae1SZheng Yan 	u32 nritems;
241331840ae1SZheng Yan 	struct btrfs_key key;
241431840ae1SZheng Yan 	struct btrfs_file_extent_item *fi;
241582fa113fSQu Wenruo 	struct btrfs_ref generic_ref = { 0 };
241682fa113fSQu Wenruo 	bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
241731840ae1SZheng Yan 	int i;
241882fa113fSQu Wenruo 	int action;
241931840ae1SZheng Yan 	int level;
242031840ae1SZheng Yan 	int ret = 0;
2421fccb84c9SDavid Sterba 
24220b246afaSJeff Mahoney 	if (btrfs_is_testing(fs_info))
2423faa2dbf0SJosef Bacik 		return 0;
2424fccb84c9SDavid Sterba 
242531840ae1SZheng Yan 	ref_root = btrfs_header_owner(buf);
242631840ae1SZheng Yan 	nritems = btrfs_header_nritems(buf);
242731840ae1SZheng Yan 	level = btrfs_header_level(buf);
242831840ae1SZheng Yan 
242992a7cc42SQu Wenruo 	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
24305d4f98a2SYan Zheng 		return 0;
2431b7a9f29fSChris Mason 
24325d4f98a2SYan Zheng 	if (full_backref)
24335d4f98a2SYan Zheng 		parent = buf->start;
24345d4f98a2SYan Zheng 	else
24355d4f98a2SYan Zheng 		parent = 0;
243682fa113fSQu Wenruo 	if (inc)
243782fa113fSQu Wenruo 		action = BTRFS_ADD_DELAYED_REF;
243882fa113fSQu Wenruo 	else
243982fa113fSQu Wenruo 		action = BTRFS_DROP_DELAYED_REF;
24405d4f98a2SYan Zheng 
244131840ae1SZheng Yan 	for (i = 0; i < nritems; i++) {
2442db94535dSChris Mason 		if (level == 0) {
24435f39d397SChris Mason 			btrfs_item_key_to_cpu(buf, &key, i);
2444962a298fSDavid Sterba 			if (key.type != BTRFS_EXTENT_DATA_KEY)
244554aa1f4dSChris Mason 				continue;
24465f39d397SChris Mason 			fi = btrfs_item_ptr(buf, i,
244754aa1f4dSChris Mason 					    struct btrfs_file_extent_item);
24485f39d397SChris Mason 			if (btrfs_file_extent_type(buf, fi) ==
244954aa1f4dSChris Mason 			    BTRFS_FILE_EXTENT_INLINE)
245054aa1f4dSChris Mason 				continue;
245131840ae1SZheng Yan 			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
245231840ae1SZheng Yan 			if (bytenr == 0)
245354aa1f4dSChris Mason 				continue;
245431840ae1SZheng Yan 
24555d4f98a2SYan Zheng 			num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
24565d4f98a2SYan Zheng 			key.offset -= btrfs_file_extent_offset(buf, fi);
245782fa113fSQu Wenruo 			btrfs_init_generic_ref(&generic_ref, action, bytenr,
245882fa113fSQu Wenruo 					       num_bytes, parent);
245982fa113fSQu Wenruo 			btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2460f42c5da6SNikolay Borisov 					    key.offset, root->root_key.objectid,
2461f42c5da6SNikolay Borisov 					    for_reloc);
2462dd28b6a5SQu Wenruo 			if (inc)
246382fa113fSQu Wenruo 				ret = btrfs_inc_extent_ref(trans, &generic_ref);
2464dd28b6a5SQu Wenruo 			else
2465ffd4bb2aSQu Wenruo 				ret = btrfs_free_extent(trans, &generic_ref);
24665d4f98a2SYan Zheng 			if (ret)
24675d4f98a2SYan Zheng 				goto fail;
2468b7a9f29fSChris Mason 		} else {
2469b7a9f29fSChris Mason 			bytenr = btrfs_node_blockptr(buf, i);
24700b246afaSJeff Mahoney 			num_bytes = fs_info->nodesize;
247182fa113fSQu Wenruo 			btrfs_init_generic_ref(&generic_ref, action, bytenr,
247282fa113fSQu Wenruo 					       num_bytes, parent);
2473f42c5da6SNikolay Borisov 			btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
2474f42c5da6SNikolay Borisov 					    root->root_key.objectid, for_reloc);
2475dd28b6a5SQu Wenruo 			if (inc)
247682fa113fSQu Wenruo 				ret = btrfs_inc_extent_ref(trans, &generic_ref);
2477dd28b6a5SQu Wenruo 			else
2478ffd4bb2aSQu Wenruo 				ret = btrfs_free_extent(trans, &generic_ref);
24795d4f98a2SYan Zheng 			if (ret)
248031840ae1SZheng Yan 				goto fail;
248131840ae1SZheng Yan 		}
248231840ae1SZheng Yan 	}
248331840ae1SZheng Yan 	return 0;
248431840ae1SZheng Yan fail:
248554aa1f4dSChris Mason 	return ret;
248602217ed2SChris Mason }
248702217ed2SChris Mason 
btrfs_inc_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref)24885d4f98a2SYan Zheng int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2489e339a6b0SJosef Bacik 		  struct extent_buffer *buf, int full_backref)
249031840ae1SZheng Yan {
2491e339a6b0SJosef Bacik 	return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
249231840ae1SZheng Yan }
249331840ae1SZheng Yan 
btrfs_dec_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref)24945d4f98a2SYan Zheng int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2495e339a6b0SJosef Bacik 		  struct extent_buffer *buf, int full_backref)
24965d4f98a2SYan Zheng {
2497e339a6b0SJosef Bacik 	return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
249831840ae1SZheng Yan }
249931840ae1SZheng Yan 
get_alloc_profile_by_root(struct btrfs_root * root,int data)25001b86826dSJeff Mahoney static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2501b742bb82SYan, Zheng {
25020b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
2503b742bb82SYan, Zheng 	u64 flags;
250453b381b3SDavid Woodhouse 	u64 ret;
2505b742bb82SYan, Zheng 
2506b742bb82SYan, Zheng 	if (data)
2507b742bb82SYan, Zheng 		flags = BTRFS_BLOCK_GROUP_DATA;
25080b246afaSJeff Mahoney 	else if (root == fs_info->chunk_root)
2509b742bb82SYan, Zheng 		flags = BTRFS_BLOCK_GROUP_SYSTEM;
2510b742bb82SYan, Zheng 	else
2511b742bb82SYan, Zheng 		flags = BTRFS_BLOCK_GROUP_METADATA;
2512b742bb82SYan, Zheng 
2513878d7b67SJosef Bacik 	ret = btrfs_get_alloc_profile(fs_info, flags);
251453b381b3SDavid Woodhouse 	return ret;
25156a63209fSJosef Bacik }
25166a63209fSJosef Bacik 
first_logical_byte(struct btrfs_fs_info * fs_info)25170eb997bfSFilipe Manana static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
2518a061fc8dSChris Mason {
251908dddb29SFilipe Manana 	struct rb_node *leftmost;
252008dddb29SFilipe Manana 	u64 bytenr = 0;
25210f9dd46cSJosef Bacik 
252216b0c258SFilipe Manana 	read_lock(&fs_info->block_group_cache_lock);
25230eb997bfSFilipe Manana 	/* Get the block group with the lowest logical start address. */
252408dddb29SFilipe Manana 	leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
252508dddb29SFilipe Manana 	if (leftmost) {
252608dddb29SFilipe Manana 		struct btrfs_block_group *bg;
2527a1897fddSLiu Bo 
252808dddb29SFilipe Manana 		bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
252908dddb29SFilipe Manana 		bytenr = bg->start;
253008dddb29SFilipe Manana 	}
253116b0c258SFilipe Manana 	read_unlock(&fs_info->block_group_cache_lock);
2532d2fb3437SYan Zheng 
2533d2fb3437SYan Zheng 	return bytenr;
2534a061fc8dSChris Mason }
2535a061fc8dSChris Mason 
pin_down_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * cache,u64 bytenr,u64 num_bytes,int reserved)25366690d071SNikolay Borisov static int pin_down_extent(struct btrfs_trans_handle *trans,
25376690d071SNikolay Borisov 			   struct btrfs_block_group *cache,
253811833d66SYan Zheng 			   u64 bytenr, u64 num_bytes, int reserved)
2539324ae4dfSYan {
2540fdf08605SDavid Sterba 	struct btrfs_fs_info *fs_info = cache->fs_info;
2541fdf08605SDavid Sterba 
254225179201SJosef Bacik 	spin_lock(&cache->space_info->lock);
2543c286ac48SChris Mason 	spin_lock(&cache->lock);
254411833d66SYan Zheng 	cache->pinned += num_bytes;
2545bb96c4e5SJosef Bacik 	btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2546bb96c4e5SJosef Bacik 					     num_bytes);
254711833d66SYan Zheng 	if (reserved) {
254811833d66SYan Zheng 		cache->reserved -= num_bytes;
254911833d66SYan Zheng 		cache->space_info->bytes_reserved -= num_bytes;
255068b38550SJosef Bacik 	}
2551c286ac48SChris Mason 	spin_unlock(&cache->lock);
255225179201SJosef Bacik 	spin_unlock(&cache->space_info->lock);
255368b38550SJosef Bacik 
2554fe1a598cSDavid Sterba 	set_extent_bit(&trans->transaction->pinned_extents, bytenr,
25551d126800SDavid Sterba 		       bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
2556324ae4dfSYan 	return 0;
2557324ae4dfSYan }
25589078a3e1SChris Mason 
btrfs_pin_extent(struct btrfs_trans_handle * trans,u64 bytenr,u64 num_bytes,int reserved)2559b25c36f8SNikolay Borisov int btrfs_pin_extent(struct btrfs_trans_handle *trans,
2560f0486c68SYan, Zheng 		     u64 bytenr, u64 num_bytes, int reserved)
2561e8569813SZheng Yan {
256232da5386SDavid Sterba 	struct btrfs_block_group *cache;
2563f0486c68SYan, Zheng 
2564b25c36f8SNikolay Borisov 	cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
256579787eaaSJeff Mahoney 	BUG_ON(!cache); /* Logic error */
2566f0486c68SYan, Zheng 
25676690d071SNikolay Borisov 	pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
2568f0486c68SYan, Zheng 
2569f0486c68SYan, Zheng 	btrfs_put_block_group(cache);
2570f0486c68SYan, Zheng 	return 0;
2571f0486c68SYan, Zheng }
2572f0486c68SYan, Zheng 
2573f0486c68SYan, Zheng /*
2574e688b725SChris Mason  * this function must be called within transaction
2575f0486c68SYan, Zheng  */
btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle * trans,u64 bytenr,u64 num_bytes)25769fce5704SNikolay Borisov int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2577e688b725SChris Mason 				    u64 bytenr, u64 num_bytes)
2578f0486c68SYan, Zheng {
257932da5386SDavid Sterba 	struct btrfs_block_group *cache;
2580b50c6e25SJosef Bacik 	int ret;
2581e688b725SChris Mason 
25829fce5704SNikolay Borisov 	cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2583b50c6e25SJosef Bacik 	if (!cache)
2584b50c6e25SJosef Bacik 		return -EINVAL;
2585e688b725SChris Mason 
2586e688b725SChris Mason 	/*
2587ced8ecf0SOmar Sandoval 	 * Fully cache the free space first so that our pin removes the free space
2588ced8ecf0SOmar Sandoval 	 * from the cache.
2589e688b725SChris Mason 	 */
2590ced8ecf0SOmar Sandoval 	ret = btrfs_cache_block_group(cache, true);
25919ad6d91fSFilipe Manana 	if (ret)
25929ad6d91fSFilipe Manana 		goto out;
2593e688b725SChris Mason 
25946690d071SNikolay Borisov 	pin_down_extent(trans, cache, bytenr, num_bytes, 0);
2595e688b725SChris Mason 
2596e688b725SChris Mason 	/* remove us from the free space cache (if we're there at all) */
2597b50c6e25SJosef Bacik 	ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
25989ad6d91fSFilipe Manana out:
2599e688b725SChris Mason 	btrfs_put_block_group(cache);
2600b50c6e25SJosef Bacik 	return ret;
2601e688b725SChris Mason }
2602e688b725SChris Mason 
__exclude_logged_extent(struct btrfs_fs_info * fs_info,u64 start,u64 num_bytes)26032ff7e61eSJeff Mahoney static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
26042ff7e61eSJeff Mahoney 				   u64 start, u64 num_bytes)
26058c2a1a30SJosef Bacik {
26068c2a1a30SJosef Bacik 	int ret;
260732da5386SDavid Sterba 	struct btrfs_block_group *block_group;
26088c2a1a30SJosef Bacik 
26090b246afaSJeff Mahoney 	block_group = btrfs_lookup_block_group(fs_info, start);
26108c2a1a30SJosef Bacik 	if (!block_group)
26118c2a1a30SJosef Bacik 		return -EINVAL;
26128c2a1a30SJosef Bacik 
2613ced8ecf0SOmar Sandoval 	ret = btrfs_cache_block_group(block_group, true);
26148c2a1a30SJosef Bacik 	if (ret)
26159ad6d91fSFilipe Manana 		goto out;
26168c2a1a30SJosef Bacik 
26179ad6d91fSFilipe Manana 	ret = btrfs_remove_free_space(block_group, start, num_bytes);
26189ad6d91fSFilipe Manana out:
26198c2a1a30SJosef Bacik 	btrfs_put_block_group(block_group);
26208c2a1a30SJosef Bacik 	return ret;
26218c2a1a30SJosef Bacik }
26228c2a1a30SJosef Bacik 
btrfs_exclude_logged_extents(struct extent_buffer * eb)2623bcdc428cSDavid Sterba int btrfs_exclude_logged_extents(struct extent_buffer *eb)
26248c2a1a30SJosef Bacik {
2625bcdc428cSDavid Sterba 	struct btrfs_fs_info *fs_info = eb->fs_info;
26268c2a1a30SJosef Bacik 	struct btrfs_file_extent_item *item;
26278c2a1a30SJosef Bacik 	struct btrfs_key key;
26288c2a1a30SJosef Bacik 	int found_type;
26298c2a1a30SJosef Bacik 	int i;
2630b89311efSGu Jinxiang 	int ret = 0;
26318c2a1a30SJosef Bacik 
26322ff7e61eSJeff Mahoney 	if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
26338c2a1a30SJosef Bacik 		return 0;
26348c2a1a30SJosef Bacik 
26358c2a1a30SJosef Bacik 	for (i = 0; i < btrfs_header_nritems(eb); i++) {
26368c2a1a30SJosef Bacik 		btrfs_item_key_to_cpu(eb, &key, i);
26378c2a1a30SJosef Bacik 		if (key.type != BTRFS_EXTENT_DATA_KEY)
26388c2a1a30SJosef Bacik 			continue;
26398c2a1a30SJosef Bacik 		item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
26408c2a1a30SJosef Bacik 		found_type = btrfs_file_extent_type(eb, item);
26418c2a1a30SJosef Bacik 		if (found_type == BTRFS_FILE_EXTENT_INLINE)
26428c2a1a30SJosef Bacik 			continue;
26438c2a1a30SJosef Bacik 		if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
26448c2a1a30SJosef Bacik 			continue;
26458c2a1a30SJosef Bacik 		key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
26468c2a1a30SJosef Bacik 		key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2647b89311efSGu Jinxiang 		ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2648b89311efSGu Jinxiang 		if (ret)
2649b89311efSGu Jinxiang 			break;
26508c2a1a30SJosef Bacik 	}
26518c2a1a30SJosef Bacik 
2652b89311efSGu Jinxiang 	return ret;
26538c2a1a30SJosef Bacik }
26548c2a1a30SJosef Bacik 
26559cfa3e34SFilipe Manana static void
btrfs_inc_block_group_reservations(struct btrfs_block_group * bg)265632da5386SDavid Sterba btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
26579cfa3e34SFilipe Manana {
26589cfa3e34SFilipe Manana 	atomic_inc(&bg->reservations);
26599cfa3e34SFilipe Manana }
26609cfa3e34SFilipe Manana 
2661c759c4e1SJosef Bacik /*
2662c759c4e1SJosef Bacik  * Returns the free cluster for the given space info and sets empty_cluster to
2663c759c4e1SJosef Bacik  * what it should be based on the mount options.
2664c759c4e1SJosef Bacik  */
2665c759c4e1SJosef Bacik static struct btrfs_free_cluster *
fetch_cluster_info(struct btrfs_fs_info * fs_info,struct btrfs_space_info * space_info,u64 * empty_cluster)26662ff7e61eSJeff Mahoney fetch_cluster_info(struct btrfs_fs_info *fs_info,
26672ff7e61eSJeff Mahoney 		   struct btrfs_space_info *space_info, u64 *empty_cluster)
2668c759c4e1SJosef Bacik {
2669c759c4e1SJosef Bacik 	struct btrfs_free_cluster *ret = NULL;
2670c759c4e1SJosef Bacik 
2671c759c4e1SJosef Bacik 	*empty_cluster = 0;
2672c759c4e1SJosef Bacik 	if (btrfs_mixed_space_info(space_info))
2673c759c4e1SJosef Bacik 		return ret;
2674c759c4e1SJosef Bacik 
2675c759c4e1SJosef Bacik 	if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
26760b246afaSJeff Mahoney 		ret = &fs_info->meta_alloc_cluster;
2677583b7231SHans van Kranenburg 		if (btrfs_test_opt(fs_info, SSD))
2678583b7231SHans van Kranenburg 			*empty_cluster = SZ_2M;
2679583b7231SHans van Kranenburg 		else
2680ee22184bSByongho Lee 			*empty_cluster = SZ_64K;
2681583b7231SHans van Kranenburg 	} else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2682583b7231SHans van Kranenburg 		   btrfs_test_opt(fs_info, SSD_SPREAD)) {
2683583b7231SHans van Kranenburg 		*empty_cluster = SZ_2M;
26840b246afaSJeff Mahoney 		ret = &fs_info->data_alloc_cluster;
2685c759c4e1SJosef Bacik 	}
2686c759c4e1SJosef Bacik 
2687c759c4e1SJosef Bacik 	return ret;
2688c759c4e1SJosef Bacik }
2689c759c4e1SJosef Bacik 
unpin_extent_range(struct btrfs_fs_info * fs_info,u64 start,u64 end,const bool return_free_space)26902ff7e61eSJeff Mahoney static int unpin_extent_range(struct btrfs_fs_info *fs_info,
26912ff7e61eSJeff Mahoney 			      u64 start, u64 end,
2692678886bdSFilipe Manana 			      const bool return_free_space)
269311833d66SYan Zheng {
269432da5386SDavid Sterba 	struct btrfs_block_group *cache = NULL;
26957b398f8eSJosef Bacik 	struct btrfs_space_info *space_info;
26967b398f8eSJosef Bacik 	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2697c759c4e1SJosef Bacik 	struct btrfs_free_cluster *cluster = NULL;
269811833d66SYan Zheng 	u64 len;
2699c759c4e1SJosef Bacik 	u64 total_unpinned = 0;
2700c759c4e1SJosef Bacik 	u64 empty_cluster = 0;
27017b398f8eSJosef Bacik 	bool readonly;
270211833d66SYan Zheng 
270311833d66SYan Zheng 	while (start <= end) {
27047b398f8eSJosef Bacik 		readonly = false;
270511833d66SYan Zheng 		if (!cache ||
2706b3470b5dSDavid Sterba 		    start >= cache->start + cache->length) {
270711833d66SYan Zheng 			if (cache)
270811833d66SYan Zheng 				btrfs_put_block_group(cache);
2709c759c4e1SJosef Bacik 			total_unpinned = 0;
271011833d66SYan Zheng 			cache = btrfs_lookup_block_group(fs_info, start);
271179787eaaSJeff Mahoney 			BUG_ON(!cache); /* Logic error */
2712c759c4e1SJosef Bacik 
27132ff7e61eSJeff Mahoney 			cluster = fetch_cluster_info(fs_info,
2714c759c4e1SJosef Bacik 						     cache->space_info,
2715c759c4e1SJosef Bacik 						     &empty_cluster);
2716c759c4e1SJosef Bacik 			empty_cluster <<= 1;
271711833d66SYan Zheng 		}
271811833d66SYan Zheng 
2719b3470b5dSDavid Sterba 		len = cache->start + cache->length - start;
272011833d66SYan Zheng 		len = min(len, end + 1 - start);
272111833d66SYan Zheng 
272248ff7083SOmar Sandoval 		if (return_free_space)
272348ff7083SOmar Sandoval 			btrfs_add_free_space(cache, start, len);
272411833d66SYan Zheng 
2725f0486c68SYan, Zheng 		start += len;
2726c759c4e1SJosef Bacik 		total_unpinned += len;
27277b398f8eSJosef Bacik 		space_info = cache->space_info;
2728f0486c68SYan, Zheng 
2729c759c4e1SJosef Bacik 		/*
2730c759c4e1SJosef Bacik 		 * If this space cluster has been marked as fragmented and we've
2731c759c4e1SJosef Bacik 		 * unpinned enough in this block group to potentially allow a
2732c759c4e1SJosef Bacik 		 * cluster to be created inside of it go ahead and clear the
2733c759c4e1SJosef Bacik 		 * fragmented check.
2734c759c4e1SJosef Bacik 		 */
2735c759c4e1SJosef Bacik 		if (cluster && cluster->fragmented &&
2736c759c4e1SJosef Bacik 		    total_unpinned > empty_cluster) {
2737c759c4e1SJosef Bacik 			spin_lock(&cluster->lock);
2738c759c4e1SJosef Bacik 			cluster->fragmented = 0;
2739c759c4e1SJosef Bacik 			spin_unlock(&cluster->lock);
2740c759c4e1SJosef Bacik 		}
2741c759c4e1SJosef Bacik 
27427b398f8eSJosef Bacik 		spin_lock(&space_info->lock);
274311833d66SYan Zheng 		spin_lock(&cache->lock);
274411833d66SYan Zheng 		cache->pinned -= len;
2745bb96c4e5SJosef Bacik 		btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
27464f4db217SJosef Bacik 		space_info->max_extent_size = 0;
27477b398f8eSJosef Bacik 		if (cache->ro) {
27487b398f8eSJosef Bacik 			space_info->bytes_readonly += len;
27497b398f8eSJosef Bacik 			readonly = true;
2750169e0da9SNaohiro Aota 		} else if (btrfs_is_zoned(fs_info)) {
2751169e0da9SNaohiro Aota 			/* Need reset before reusing in a zoned block group */
2752169e0da9SNaohiro Aota 			space_info->bytes_zone_unusable += len;
2753169e0da9SNaohiro Aota 			readonly = true;
27547b398f8eSJosef Bacik 		}
275511833d66SYan Zheng 		spin_unlock(&cache->lock);
2756957780ebSJosef Bacik 		if (!readonly && return_free_space &&
2757957780ebSJosef Bacik 		    global_rsv->space_info == space_info) {
27587b398f8eSJosef Bacik 			spin_lock(&global_rsv->lock);
27597b398f8eSJosef Bacik 			if (!global_rsv->full) {
2760c4bf1909SJiapeng Chong 				u64 to_add = min(len, global_rsv->size -
27617b398f8eSJosef Bacik 						      global_rsv->reserved);
2762c4bf1909SJiapeng Chong 
2763957780ebSJosef Bacik 				global_rsv->reserved += to_add;
2764bb96c4e5SJosef Bacik 				btrfs_space_info_update_bytes_may_use(fs_info,
2765bb96c4e5SJosef Bacik 						space_info, to_add);
27667b398f8eSJosef Bacik 				if (global_rsv->reserved >= global_rsv->size)
27677b398f8eSJosef Bacik 					global_rsv->full = 1;
2768957780ebSJosef Bacik 				len -= to_add;
27697b398f8eSJosef Bacik 			}
27707b398f8eSJosef Bacik 			spin_unlock(&global_rsv->lock);
27717b398f8eSJosef Bacik 		}
27722732798cSJosef Bacik 		/* Add to any tickets we may have */
27732732798cSJosef Bacik 		if (!readonly && return_free_space && len)
27742732798cSJosef Bacik 			btrfs_try_granting_tickets(fs_info, space_info);
27757b398f8eSJosef Bacik 		spin_unlock(&space_info->lock);
277611833d66SYan Zheng 	}
277711833d66SYan Zheng 
277811833d66SYan Zheng 	if (cache)
277911833d66SYan Zheng 		btrfs_put_block_group(cache);
2780ccd467d6SChris Mason 	return 0;
2781ccd467d6SChris Mason }
2782ccd467d6SChris Mason 
btrfs_finish_extent_commit(struct btrfs_trans_handle * trans)27835ead2dd0SNikolay Borisov int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2784a28ec197SChris Mason {
27855ead2dd0SNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
278632da5386SDavid Sterba 	struct btrfs_block_group *block_group, *tmp;
2787e33e17eeSJeff Mahoney 	struct list_head *deleted_bgs;
278811833d66SYan Zheng 	struct extent_io_tree *unpin;
27891a5bc167SChris Mason 	u64 start;
27901a5bc167SChris Mason 	u64 end;
2791a28ec197SChris Mason 	int ret;
2792a28ec197SChris Mason 
2793fe119a6eSNikolay Borisov 	unpin = &trans->transaction->pinned_extents;
279411833d66SYan Zheng 
2795bf31f87fSDavid Sterba 	while (!TRANS_ABORTED(trans)) {
27960e6ec385SFilipe Manana 		struct extent_state *cached_state = NULL;
27970e6ec385SFilipe Manana 
2798d4b450cdSFilipe Manana 		mutex_lock(&fs_info->unused_bg_unpin_mutex);
2799e5860f82SFilipe Manana 		if (!find_first_extent_bit(unpin, 0, &start, &end,
2800e5860f82SFilipe Manana 					   EXTENT_DIRTY, &cached_state)) {
2801d4b450cdSFilipe Manana 			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2802a28ec197SChris Mason 			break;
2803d4b450cdSFilipe Manana 		}
28041f3c79a2SLiu Hui 
280546b27f50SDennis Zhou 		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
28062ff7e61eSJeff Mahoney 			ret = btrfs_discard_extent(fs_info, start,
28075378e607SLi Dongyang 						   end + 1 - start, NULL);
28081f3c79a2SLiu Hui 
28090e6ec385SFilipe Manana 		clear_extent_dirty(unpin, start, end, &cached_state);
28102ff7e61eSJeff Mahoney 		unpin_extent_range(fs_info, start, end, true);
2811d4b450cdSFilipe Manana 		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
28120e6ec385SFilipe Manana 		free_extent_state(cached_state);
2813c286ac48SChris Mason 		cond_resched();
28140579da42SChris Mason 	}
2815817d52f8SJosef Bacik 
2816a2309300SDennis Zhou 	if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2817a2309300SDennis Zhou 		btrfs_discard_calc_delay(&fs_info->discard_ctl);
2818b0643e59SDennis Zhou 		btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2819a2309300SDennis Zhou 	}
2820b0643e59SDennis Zhou 
2821e33e17eeSJeff Mahoney 	/*
2822e33e17eeSJeff Mahoney 	 * Transaction is finished.  We don't need the lock anymore.  We
2823e33e17eeSJeff Mahoney 	 * do need to clean up the block groups in case of a transaction
2824e33e17eeSJeff Mahoney 	 * abort.
2825e33e17eeSJeff Mahoney 	 */
2826e33e17eeSJeff Mahoney 	deleted_bgs = &trans->transaction->deleted_bgs;
2827e33e17eeSJeff Mahoney 	list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2828e33e17eeSJeff Mahoney 		u64 trimmed = 0;
2829e33e17eeSJeff Mahoney 
2830e33e17eeSJeff Mahoney 		ret = -EROFS;
2831bf31f87fSDavid Sterba 		if (!TRANS_ABORTED(trans))
28322ff7e61eSJeff Mahoney 			ret = btrfs_discard_extent(fs_info,
2833b3470b5dSDavid Sterba 						   block_group->start,
2834b3470b5dSDavid Sterba 						   block_group->length,
2835e33e17eeSJeff Mahoney 						   &trimmed);
2836e33e17eeSJeff Mahoney 
2837e33e17eeSJeff Mahoney 		list_del_init(&block_group->bg_list);
28386b7304afSFilipe Manana 		btrfs_unfreeze_block_group(block_group);
2839e33e17eeSJeff Mahoney 		btrfs_put_block_group(block_group);
2840e33e17eeSJeff Mahoney 
2841e33e17eeSJeff Mahoney 		if (ret) {
2842e33e17eeSJeff Mahoney 			const char *errstr = btrfs_decode_error(ret);
2843e33e17eeSJeff Mahoney 			btrfs_warn(fs_info,
2844913e1535SDavid Sterba 			   "discard failed while removing blockgroup: errno=%d %s",
2845e33e17eeSJeff Mahoney 				   ret, errstr);
2846e33e17eeSJeff Mahoney 		}
2847e33e17eeSJeff Mahoney 	}
2848e33e17eeSJeff Mahoney 
2849e20d96d6SChris Mason 	return 0;
2850e20d96d6SChris Mason }
2851e20d96d6SChris Mason 
do_free_extent_accounting(struct btrfs_trans_handle * trans,u64 bytenr,u64 num_bytes,bool is_data)28528f8aa4c7SJosef Bacik static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
28538f8aa4c7SJosef Bacik 				     u64 bytenr, u64 num_bytes, bool is_data)
28548f8aa4c7SJosef Bacik {
28558f8aa4c7SJosef Bacik 	int ret;
28568f8aa4c7SJosef Bacik 
28578f8aa4c7SJosef Bacik 	if (is_data) {
28588f8aa4c7SJosef Bacik 		struct btrfs_root *csum_root;
28598f8aa4c7SJosef Bacik 
28608f8aa4c7SJosef Bacik 		csum_root = btrfs_csum_root(trans->fs_info, bytenr);
28618f8aa4c7SJosef Bacik 		ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
28628f8aa4c7SJosef Bacik 		if (ret) {
28638f8aa4c7SJosef Bacik 			btrfs_abort_transaction(trans, ret);
28648f8aa4c7SJosef Bacik 			return ret;
28658f8aa4c7SJosef Bacik 		}
28668f8aa4c7SJosef Bacik 	}
28678f8aa4c7SJosef Bacik 
28688f8aa4c7SJosef Bacik 	ret = add_to_free_space_tree(trans, bytenr, num_bytes);
28698f8aa4c7SJosef Bacik 	if (ret) {
28708f8aa4c7SJosef Bacik 		btrfs_abort_transaction(trans, ret);
28718f8aa4c7SJosef Bacik 		return ret;
28728f8aa4c7SJosef Bacik 	}
28738f8aa4c7SJosef Bacik 
28748f8aa4c7SJosef Bacik 	ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
28758f8aa4c7SJosef Bacik 	if (ret)
28768f8aa4c7SJosef Bacik 		btrfs_abort_transaction(trans, ret);
28778f8aa4c7SJosef Bacik 
28788f8aa4c7SJosef Bacik 	return ret;
28798f8aa4c7SJosef Bacik }
28808f8aa4c7SJosef Bacik 
2881eee3b811SQu Wenruo #define abort_and_dump(trans, path, fmt, args...)	\
2882eee3b811SQu Wenruo ({							\
2883eee3b811SQu Wenruo 	btrfs_abort_transaction(trans, -EUCLEAN);	\
2884eee3b811SQu Wenruo 	btrfs_print_leaf(path->nodes[0]);		\
2885eee3b811SQu Wenruo 	btrfs_crit(trans->fs_info, fmt, ##args);	\
2886eee3b811SQu Wenruo })
2887eee3b811SQu Wenruo 
28881c2a07f5SQu Wenruo /*
28891c2a07f5SQu Wenruo  * Drop one or more refs of @node.
28901c2a07f5SQu Wenruo  *
28911c2a07f5SQu Wenruo  * 1. Locate the extent refs.
28921c2a07f5SQu Wenruo  *    It's either inline in EXTENT/METADATA_ITEM or in keyed SHARED_* item.
28931c2a07f5SQu Wenruo  *    Locate it, then reduce the refs number or remove the ref line completely.
28941c2a07f5SQu Wenruo  *
28951c2a07f5SQu Wenruo  * 2. Update the refs count in EXTENT/METADATA_ITEM
28961c2a07f5SQu Wenruo  *
28971c2a07f5SQu Wenruo  * Inline backref case:
28981c2a07f5SQu Wenruo  *
28991c2a07f5SQu Wenruo  * in extent tree we have:
29001c2a07f5SQu Wenruo  *
29011c2a07f5SQu Wenruo  * 	item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
29021c2a07f5SQu Wenruo  *		refs 2 gen 6 flags DATA
29031c2a07f5SQu Wenruo  *		extent data backref root FS_TREE objectid 258 offset 0 count 1
29041c2a07f5SQu Wenruo  *		extent data backref root FS_TREE objectid 257 offset 0 count 1
29051c2a07f5SQu Wenruo  *
29061c2a07f5SQu Wenruo  * This function gets called with:
29071c2a07f5SQu Wenruo  *
29081c2a07f5SQu Wenruo  *    node->bytenr = 13631488
29091c2a07f5SQu Wenruo  *    node->num_bytes = 1048576
29101c2a07f5SQu Wenruo  *    root_objectid = FS_TREE
29111c2a07f5SQu Wenruo  *    owner_objectid = 257
29121c2a07f5SQu Wenruo  *    owner_offset = 0
29131c2a07f5SQu Wenruo  *    refs_to_drop = 1
29141c2a07f5SQu Wenruo  *
29151c2a07f5SQu Wenruo  * Then we should get some like:
29161c2a07f5SQu Wenruo  *
29171c2a07f5SQu Wenruo  * 	item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
29181c2a07f5SQu Wenruo  *		refs 1 gen 6 flags DATA
29191c2a07f5SQu Wenruo  *		extent data backref root FS_TREE objectid 258 offset 0 count 1
29201c2a07f5SQu Wenruo  *
29211c2a07f5SQu Wenruo  * Keyed backref case:
29221c2a07f5SQu Wenruo  *
29231c2a07f5SQu Wenruo  * in extent tree we have:
29241c2a07f5SQu Wenruo  *
29251c2a07f5SQu Wenruo  *	item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
29261c2a07f5SQu Wenruo  *		refs 754 gen 6 flags DATA
29271c2a07f5SQu Wenruo  *	[...]
29281c2a07f5SQu Wenruo  *	item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28
29291c2a07f5SQu Wenruo  *		extent data backref root FS_TREE objectid 866 offset 0 count 1
29301c2a07f5SQu Wenruo  *
29311c2a07f5SQu Wenruo  * This function get called with:
29321c2a07f5SQu Wenruo  *
29331c2a07f5SQu Wenruo  *    node->bytenr = 13631488
29341c2a07f5SQu Wenruo  *    node->num_bytes = 1048576
29351c2a07f5SQu Wenruo  *    root_objectid = FS_TREE
29361c2a07f5SQu Wenruo  *    owner_objectid = 866
29371c2a07f5SQu Wenruo  *    owner_offset = 0
29381c2a07f5SQu Wenruo  *    refs_to_drop = 1
29391c2a07f5SQu Wenruo  *
29401c2a07f5SQu Wenruo  * Then we should get some like:
29411c2a07f5SQu Wenruo  *
29421c2a07f5SQu Wenruo  *	item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
29431c2a07f5SQu Wenruo  *		refs 753 gen 6 flags DATA
29441c2a07f5SQu Wenruo  *
29451c2a07f5SQu Wenruo  * And that (13631488 EXTENT_DATA_REF <HASH>) gets removed.
29461c2a07f5SQu Wenruo  */
__btrfs_free_extent(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,u64 parent,u64 root_objectid,u64 owner_objectid,u64 owner_offset,int refs_to_drop,struct btrfs_delayed_extent_op * extent_op)29475d4f98a2SYan Zheng static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2948c682f9b3SQu Wenruo 			       struct btrfs_delayed_ref_node *node, u64 parent,
29495d4f98a2SYan Zheng 			       u64 root_objectid, u64 owner_objectid,
29505d4f98a2SYan Zheng 			       u64 owner_offset, int refs_to_drop,
2951c682f9b3SQu Wenruo 			       struct btrfs_delayed_extent_op *extent_op)
2952a28ec197SChris Mason {
2953e72cb923SNikolay Borisov 	struct btrfs_fs_info *info = trans->fs_info;
2954e2fa7227SChris Mason 	struct btrfs_key key;
29555d4f98a2SYan Zheng 	struct btrfs_path *path;
295629cbcf40SJosef Bacik 	struct btrfs_root *extent_root;
29575f39d397SChris Mason 	struct extent_buffer *leaf;
29585d4f98a2SYan Zheng 	struct btrfs_extent_item *ei;
29595d4f98a2SYan Zheng 	struct btrfs_extent_inline_ref *iref;
2960a28ec197SChris Mason 	int ret;
29615d4f98a2SYan Zheng 	int is_data;
2962952fccacSChris Mason 	int extent_slot = 0;
2963952fccacSChris Mason 	int found_extent = 0;
2964952fccacSChris Mason 	int num_to_del = 1;
29655d4f98a2SYan Zheng 	u32 item_size;
29665d4f98a2SYan Zheng 	u64 refs;
2967c682f9b3SQu Wenruo 	u64 bytenr = node->bytenr;
2968c682f9b3SQu Wenruo 	u64 num_bytes = node->num_bytes;
29690b246afaSJeff Mahoney 	bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2970037e6390SChris Mason 
297129cbcf40SJosef Bacik 	extent_root = btrfs_extent_root(info, bytenr);
2972abed4aaaSJosef Bacik 	ASSERT(extent_root);
297329cbcf40SJosef Bacik 
29745caf2a00SChris Mason 	path = btrfs_alloc_path();
297554aa1f4dSChris Mason 	if (!path)
297654aa1f4dSChris Mason 		return -ENOMEM;
297754aa1f4dSChris Mason 
29785d4f98a2SYan Zheng 	is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
29791c2a07f5SQu Wenruo 
29801c2a07f5SQu Wenruo 	if (!is_data && refs_to_drop != 1) {
29811c2a07f5SQu Wenruo 		btrfs_crit(info,
29821c2a07f5SQu Wenruo "invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
29831c2a07f5SQu Wenruo 			   node->bytenr, refs_to_drop);
29841c2a07f5SQu Wenruo 		ret = -EINVAL;
29851c2a07f5SQu Wenruo 		btrfs_abort_transaction(trans, ret);
29861c2a07f5SQu Wenruo 		goto out;
29871c2a07f5SQu Wenruo 	}
29885d4f98a2SYan Zheng 
29893173a18fSJosef Bacik 	if (is_data)
2990897ca819SThomas Meyer 		skinny_metadata = false;
29913173a18fSJosef Bacik 
2992fbe4801bSNikolay Borisov 	ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2993fbe4801bSNikolay Borisov 				    parent, root_objectid, owner_objectid,
29945d4f98a2SYan Zheng 				    owner_offset);
29957bb86316SChris Mason 	if (ret == 0) {
29961c2a07f5SQu Wenruo 		/*
29971c2a07f5SQu Wenruo 		 * Either the inline backref or the SHARED_DATA_REF/
29981c2a07f5SQu Wenruo 		 * SHARED_BLOCK_REF is found
29991c2a07f5SQu Wenruo 		 *
30001c2a07f5SQu Wenruo 		 * Here is a quick path to locate EXTENT/METADATA_ITEM.
30011c2a07f5SQu Wenruo 		 * It's possible the EXTENT/METADATA_ITEM is near current slot.
30021c2a07f5SQu Wenruo 		 */
3003952fccacSChris Mason 		extent_slot = path->slots[0];
30045d4f98a2SYan Zheng 		while (extent_slot >= 0) {
30055d4f98a2SYan Zheng 			btrfs_item_key_to_cpu(path->nodes[0], &key,
3006952fccacSChris Mason 					      extent_slot);
30075d4f98a2SYan Zheng 			if (key.objectid != bytenr)
3008952fccacSChris Mason 				break;
30095d4f98a2SYan Zheng 			if (key.type == BTRFS_EXTENT_ITEM_KEY &&
30105d4f98a2SYan Zheng 			    key.offset == num_bytes) {
3011952fccacSChris Mason 				found_extent = 1;
3012952fccacSChris Mason 				break;
3013952fccacSChris Mason 			}
30143173a18fSJosef Bacik 			if (key.type == BTRFS_METADATA_ITEM_KEY &&
30153173a18fSJosef Bacik 			    key.offset == owner_objectid) {
30163173a18fSJosef Bacik 				found_extent = 1;
30173173a18fSJosef Bacik 				break;
30183173a18fSJosef Bacik 			}
30191c2a07f5SQu Wenruo 
30201c2a07f5SQu Wenruo 			/* Quick path didn't find the EXTEMT/METADATA_ITEM */
3021952fccacSChris Mason 			if (path->slots[0] - extent_slot > 5)
3022952fccacSChris Mason 				break;
30235d4f98a2SYan Zheng 			extent_slot--;
3024952fccacSChris Mason 		}
3025a79865c6SNikolay Borisov 
302631840ae1SZheng Yan 		if (!found_extent) {
30271c2a07f5SQu Wenruo 			if (iref) {
3028eee3b811SQu Wenruo 				abort_and_dump(trans, path,
3029eee3b811SQu Wenruo "invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
3030eee3b811SQu Wenruo 					   path->slots[0]);
3031eee3b811SQu Wenruo 				ret = -EUCLEAN;
3032eee3b811SQu Wenruo 				goto out;
30331c2a07f5SQu Wenruo 			}
30341c2a07f5SQu Wenruo 			/* Must be SHARED_* item, remove the backref first */
303576d76e78SJosef Bacik 			ret = remove_extent_backref(trans, extent_root, path,
30365b2a54bbSJosef Bacik 						    NULL, refs_to_drop, is_data);
3037005d6427SDavid Sterba 			if (ret) {
303866642832SJeff Mahoney 				btrfs_abort_transaction(trans, ret);
3039005d6427SDavid Sterba 				goto out;
3040005d6427SDavid Sterba 			}
3041b3b4aa74SDavid Sterba 			btrfs_release_path(path);
30425d4f98a2SYan Zheng 
30431c2a07f5SQu Wenruo 			/* Slow path to locate EXTENT/METADATA_ITEM */
30445d4f98a2SYan Zheng 			key.objectid = bytenr;
30455d4f98a2SYan Zheng 			key.type = BTRFS_EXTENT_ITEM_KEY;
30465d4f98a2SYan Zheng 			key.offset = num_bytes;
30475d4f98a2SYan Zheng 
30483173a18fSJosef Bacik 			if (!is_data && skinny_metadata) {
30493173a18fSJosef Bacik 				key.type = BTRFS_METADATA_ITEM_KEY;
30503173a18fSJosef Bacik 				key.offset = owner_objectid;
30513173a18fSJosef Bacik 			}
30523173a18fSJosef Bacik 
305331840ae1SZheng Yan 			ret = btrfs_search_slot(trans, extent_root,
305431840ae1SZheng Yan 						&key, path, -1, 1);
30553173a18fSJosef Bacik 			if (ret > 0 && skinny_metadata && path->slots[0]) {
30563173a18fSJosef Bacik 				/*
30573173a18fSJosef Bacik 				 * Couldn't find our skinny metadata item,
30583173a18fSJosef Bacik 				 * see if we have ye olde extent item.
30593173a18fSJosef Bacik 				 */
30603173a18fSJosef Bacik 				path->slots[0]--;
30613173a18fSJosef Bacik 				btrfs_item_key_to_cpu(path->nodes[0], &key,
30623173a18fSJosef Bacik 						      path->slots[0]);
30633173a18fSJosef Bacik 				if (key.objectid == bytenr &&
30643173a18fSJosef Bacik 				    key.type == BTRFS_EXTENT_ITEM_KEY &&
30653173a18fSJosef Bacik 				    key.offset == num_bytes)
30663173a18fSJosef Bacik 					ret = 0;
30673173a18fSJosef Bacik 			}
30683173a18fSJosef Bacik 
30693173a18fSJosef Bacik 			if (ret > 0 && skinny_metadata) {
30703173a18fSJosef Bacik 				skinny_metadata = false;
30719ce49a0bSFilipe Manana 				key.objectid = bytenr;
30723173a18fSJosef Bacik 				key.type = BTRFS_EXTENT_ITEM_KEY;
30733173a18fSJosef Bacik 				key.offset = num_bytes;
30743173a18fSJosef Bacik 				btrfs_release_path(path);
30753173a18fSJosef Bacik 				ret = btrfs_search_slot(trans, extent_root,
30763173a18fSJosef Bacik 							&key, path, -1, 1);
30773173a18fSJosef Bacik 			}
30783173a18fSJosef Bacik 
3079f3465ca4SJosef Bacik 			if (ret) {
3080b783e62dSJosef Bacik 				if (ret > 0)
3081a4f78750SDavid Sterba 					btrfs_print_leaf(path->nodes[0]);
3082eee3b811SQu Wenruo 				btrfs_err(info,
3083eee3b811SQu Wenruo 			"umm, got %d back from search, was looking for %llu, slot %d",
3084eee3b811SQu Wenruo 					  ret, bytenr, path->slots[0]);
3085f3465ca4SJosef Bacik 			}
3086005d6427SDavid Sterba 			if (ret < 0) {
308766642832SJeff Mahoney 				btrfs_abort_transaction(trans, ret);
3088005d6427SDavid Sterba 				goto out;
3089005d6427SDavid Sterba 			}
309031840ae1SZheng Yan 			extent_slot = path->slots[0];
309131840ae1SZheng Yan 		}
3092fae7f21cSDulshani Gunawardhana 	} else if (WARN_ON(ret == -ENOENT)) {
3093eee3b811SQu Wenruo 		abort_and_dump(trans, path,
3094eee3b811SQu Wenruo "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
3095c1c9ff7cSGeert Uytterhoeven 			       bytenr, parent, root_objectid, owner_objectid,
3096eee3b811SQu Wenruo 			       owner_offset, path->slots[0]);
3097c4a050bbSJosef Bacik 		goto out;
309879787eaaSJeff Mahoney 	} else {
309966642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
3100005d6427SDavid Sterba 		goto out;
31017bb86316SChris Mason 	}
31025f39d397SChris Mason 
31035f39d397SChris Mason 	leaf = path->nodes[0];
31043212fa14SJosef Bacik 	item_size = btrfs_item_size(leaf, extent_slot);
31056d8ff4e4SDavid Sterba 	if (unlikely(item_size < sizeof(*ei))) {
3106182741d2SQu Wenruo 		ret = -EUCLEAN;
3107182741d2SQu Wenruo 		btrfs_err(trans->fs_info,
3108182741d2SQu Wenruo 			  "unexpected extent item size, has %u expect >= %zu",
3109182741d2SQu Wenruo 			  item_size, sizeof(*ei));
3110ba3c2b19SNikolay Borisov 		btrfs_abort_transaction(trans, ret);
3111ba3c2b19SNikolay Borisov 		goto out;
3112ba3c2b19SNikolay Borisov 	}
31135d4f98a2SYan Zheng 	ei = btrfs_item_ptr(leaf, extent_slot,
31145d4f98a2SYan Zheng 			    struct btrfs_extent_item);
31153173a18fSJosef Bacik 	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
31163173a18fSJosef Bacik 	    key.type == BTRFS_EXTENT_ITEM_KEY) {
31175d4f98a2SYan Zheng 		struct btrfs_tree_block_info *bi;
3118eee3b811SQu Wenruo 
31191c2a07f5SQu Wenruo 		if (item_size < sizeof(*ei) + sizeof(*bi)) {
3120eee3b811SQu Wenruo 			abort_and_dump(trans, path,
3121eee3b811SQu Wenruo "invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
31221c2a07f5SQu Wenruo 				       key.objectid, key.type, key.offset,
3123eee3b811SQu Wenruo 				       path->slots[0], owner_objectid, item_size,
31241c2a07f5SQu Wenruo 				       sizeof(*ei) + sizeof(*bi));
3125eee3b811SQu Wenruo 			ret = -EUCLEAN;
3126eee3b811SQu Wenruo 			goto out;
31271c2a07f5SQu Wenruo 		}
31285d4f98a2SYan Zheng 		bi = (struct btrfs_tree_block_info *)(ei + 1);
31295d4f98a2SYan Zheng 		WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
31305d4f98a2SYan Zheng 	}
31315d4f98a2SYan Zheng 
31325d4f98a2SYan Zheng 	refs = btrfs_extent_refs(leaf, ei);
313332b02538SJosef Bacik 	if (refs < refs_to_drop) {
3134eee3b811SQu Wenruo 		abort_and_dump(trans, path,
3135eee3b811SQu Wenruo 		"trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
3136eee3b811SQu Wenruo 			       refs_to_drop, refs, bytenr, path->slots[0]);
3137eee3b811SQu Wenruo 		ret = -EUCLEAN;
3138eee3b811SQu Wenruo 		goto out;
313932b02538SJosef Bacik 	}
31405d4f98a2SYan Zheng 	refs -= refs_to_drop;
31415d4f98a2SYan Zheng 
31425d4f98a2SYan Zheng 	if (refs > 0) {
31435d4f98a2SYan Zheng 		if (extent_op)
31445d4f98a2SYan Zheng 			__run_delayed_extent_op(extent_op, leaf, ei);
31455d4f98a2SYan Zheng 		/*
31465d4f98a2SYan Zheng 		 * In the case of inline back ref, reference count will
31475d4f98a2SYan Zheng 		 * be updated by remove_extent_backref
31485d4f98a2SYan Zheng 		 */
31495d4f98a2SYan Zheng 		if (iref) {
31501c2a07f5SQu Wenruo 			if (!found_extent) {
3151eee3b811SQu Wenruo 				abort_and_dump(trans, path,
3152eee3b811SQu Wenruo "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
3153eee3b811SQu Wenruo 					       path->slots[0]);
3154eee3b811SQu Wenruo 				ret = -EUCLEAN;
3155eee3b811SQu Wenruo 				goto out;
31561c2a07f5SQu Wenruo 			}
31575d4f98a2SYan Zheng 		} else {
31585d4f98a2SYan Zheng 			btrfs_set_extent_refs(leaf, ei, refs);
3159d5e09e38SFilipe Manana 			btrfs_mark_buffer_dirty(trans, leaf);
31605d4f98a2SYan Zheng 		}
31615d4f98a2SYan Zheng 		if (found_extent) {
316276d76e78SJosef Bacik 			ret = remove_extent_backref(trans, extent_root, path,
31635b2a54bbSJosef Bacik 						    iref, refs_to_drop, is_data);
3164005d6427SDavid Sterba 			if (ret) {
316566642832SJeff Mahoney 				btrfs_abort_transaction(trans, ret);
3166005d6427SDavid Sterba 				goto out;
3167005d6427SDavid Sterba 			}
3168952fccacSChris Mason 		}
31695d4f98a2SYan Zheng 	} else {
31701c2a07f5SQu Wenruo 		/* In this branch refs == 1 */
31715d4f98a2SYan Zheng 		if (found_extent) {
31721c2a07f5SQu Wenruo 			if (is_data && refs_to_drop !=
31731c2a07f5SQu Wenruo 			    extent_data_ref_count(path, iref)) {
3174eee3b811SQu Wenruo 				abort_and_dump(trans, path,
3175eee3b811SQu Wenruo 		"invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
31761c2a07f5SQu Wenruo 					       extent_data_ref_count(path, iref),
3177eee3b811SQu Wenruo 					       refs_to_drop, path->slots[0]);
3178eee3b811SQu Wenruo 				ret = -EUCLEAN;
3179eee3b811SQu Wenruo 				goto out;
31801c2a07f5SQu Wenruo 			}
31815d4f98a2SYan Zheng 			if (iref) {
31821c2a07f5SQu Wenruo 				if (path->slots[0] != extent_slot) {
3183eee3b811SQu Wenruo 					abort_and_dump(trans, path,
3184eee3b811SQu Wenruo "invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
31851c2a07f5SQu Wenruo 						       key.objectid, key.type,
3186eee3b811SQu Wenruo 						       key.offset, path->slots[0]);
3187eee3b811SQu Wenruo 					ret = -EUCLEAN;
3188eee3b811SQu Wenruo 					goto out;
31891c2a07f5SQu Wenruo 				}
31905d4f98a2SYan Zheng 			} else {
31911c2a07f5SQu Wenruo 				/*
31921c2a07f5SQu Wenruo 				 * No inline ref, we must be at SHARED_* item,
31931c2a07f5SQu Wenruo 				 * And it's single ref, it must be:
31941c2a07f5SQu Wenruo 				 * |	extent_slot	  ||extent_slot + 1|
31951c2a07f5SQu Wenruo 				 * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
31961c2a07f5SQu Wenruo 				 */
31971c2a07f5SQu Wenruo 				if (path->slots[0] != extent_slot + 1) {
3198eee3b811SQu Wenruo 					abort_and_dump(trans, path,
3199eee3b811SQu Wenruo 	"invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
3200eee3b811SQu Wenruo 						       path->slots[0]);
3201eee3b811SQu Wenruo 					ret = -EUCLEAN;
3202eee3b811SQu Wenruo 					goto out;
32031c2a07f5SQu Wenruo 				}
32045d4f98a2SYan Zheng 				path->slots[0] = extent_slot;
32055d4f98a2SYan Zheng 				num_to_del = 2;
32065d4f98a2SYan Zheng 			}
32075d4f98a2SYan Zheng 		}
32085d4f98a2SYan Zheng 
3209952fccacSChris Mason 		ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3210952fccacSChris Mason 				      num_to_del);
3211005d6427SDavid Sterba 		if (ret) {
321266642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
3213005d6427SDavid Sterba 			goto out;
3214005d6427SDavid Sterba 		}
3215b3b4aa74SDavid Sterba 		btrfs_release_path(path);
321621af804cSDavid Woodhouse 
32178f8aa4c7SJosef Bacik 		ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
3218a28ec197SChris Mason 	}
3219fcebe456SJosef Bacik 	btrfs_release_path(path);
3220fcebe456SJosef Bacik 
322179787eaaSJeff Mahoney out:
32225caf2a00SChris Mason 	btrfs_free_path(path);
3223a28ec197SChris Mason 	return ret;
3224a28ec197SChris Mason }
3225a28ec197SChris Mason 
3226a28ec197SChris Mason /*
3227f0486c68SYan, Zheng  * when we free an block, it is possible (and likely) that we free the last
32281887be66SChris Mason  * delayed ref for that extent as well.  This searches the delayed ref tree for
32291887be66SChris Mason  * a given extent, and if there are no other delayed refs to be processed, it
32301887be66SChris Mason  * removes it from the tree.
32311887be66SChris Mason  */
check_ref_cleanup(struct btrfs_trans_handle * trans,u64 bytenr)32321887be66SChris Mason static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
32332ff7e61eSJeff Mahoney 				      u64 bytenr)
32341887be66SChris Mason {
32351887be66SChris Mason 	struct btrfs_delayed_ref_head *head;
32361887be66SChris Mason 	struct btrfs_delayed_ref_root *delayed_refs;
3237f0486c68SYan, Zheng 	int ret = 0;
32381887be66SChris Mason 
32391887be66SChris Mason 	delayed_refs = &trans->transaction->delayed_refs;
32401887be66SChris Mason 	spin_lock(&delayed_refs->lock);
3241f72ad18eSLiu Bo 	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
32421887be66SChris Mason 	if (!head)
3243cf93da7bSChris Mason 		goto out_delayed_unlock;
32441887be66SChris Mason 
3245d7df2c79SJosef Bacik 	spin_lock(&head->lock);
3246e3d03965SLiu Bo 	if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
32471887be66SChris Mason 		goto out;
32481887be66SChris Mason 
3249bedc6617SJosef Bacik 	if (cleanup_extent_op(head) != NULL)
32505d4f98a2SYan Zheng 		goto out;
32515d4f98a2SYan Zheng 
32521887be66SChris Mason 	/*
32531887be66SChris Mason 	 * waiting for the lock here would deadlock.  If someone else has it
32541887be66SChris Mason 	 * locked they are already in the process of dropping it anyway
32551887be66SChris Mason 	 */
32561887be66SChris Mason 	if (!mutex_trylock(&head->mutex))
32571887be66SChris Mason 		goto out;
32581887be66SChris Mason 
3259d7baffdaSJosef Bacik 	btrfs_delete_ref_head(delayed_refs, head);
326061c681feSFilipe Manana 	head->processing = false;
3261d7baffdaSJosef Bacik 
3262d7df2c79SJosef Bacik 	spin_unlock(&head->lock);
32631887be66SChris Mason 	spin_unlock(&delayed_refs->lock);
32641887be66SChris Mason 
3265f0486c68SYan, Zheng 	BUG_ON(head->extent_op);
3266f0486c68SYan, Zheng 	if (head->must_insert_reserved)
3267f0486c68SYan, Zheng 		ret = 1;
3268f0486c68SYan, Zheng 
326931890da0SJosef Bacik 	btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3270f0486c68SYan, Zheng 	mutex_unlock(&head->mutex);
3271d278850eSJosef Bacik 	btrfs_put_delayed_ref_head(head);
3272f0486c68SYan, Zheng 	return ret;
32731887be66SChris Mason out:
3274d7df2c79SJosef Bacik 	spin_unlock(&head->lock);
3275cf93da7bSChris Mason 
3276cf93da7bSChris Mason out_delayed_unlock:
32771887be66SChris Mason 	spin_unlock(&delayed_refs->lock);
32781887be66SChris Mason 	return 0;
32791887be66SChris Mason }
32801887be66SChris Mason 
btrfs_free_tree_block(struct btrfs_trans_handle * trans,u64 root_id,struct extent_buffer * buf,u64 parent,int last_ref)3281f0486c68SYan, Zheng void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
32827a163608SFilipe Manana 			   u64 root_id,
3283f0486c68SYan, Zheng 			   struct extent_buffer *buf,
32845581a51aSJan Schmidt 			   u64 parent, int last_ref)
3285f0486c68SYan, Zheng {
32867a163608SFilipe Manana 	struct btrfs_fs_info *fs_info = trans->fs_info;
3287ed4f255bSQu Wenruo 	struct btrfs_ref generic_ref = { 0 };
3288f0486c68SYan, Zheng 	int ret;
3289f0486c68SYan, Zheng 
3290ed4f255bSQu Wenruo 	btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3291ed4f255bSQu Wenruo 			       buf->start, buf->len, parent);
3292ed4f255bSQu Wenruo 	btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
32937a163608SFilipe Manana 			    root_id, 0, false);
3294ed4f255bSQu Wenruo 
32957a163608SFilipe Manana 	if (root_id != BTRFS_TREE_LOG_OBJECTID) {
32968a5040f7SQu Wenruo 		btrfs_ref_tree_mod(fs_info, &generic_ref);
32972187374fSJosef Bacik 		ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
329879787eaaSJeff Mahoney 		BUG_ON(ret); /* -ENOMEM */
3299f0486c68SYan, Zheng 	}
3300f0486c68SYan, Zheng 
33010a16c7d7SOmar Sandoval 	if (last_ref && btrfs_header_generation(buf) == trans->transid) {
330232da5386SDavid Sterba 		struct btrfs_block_group *cache;
3303485df755SFilipe Manana 		bool must_pin = false;
33046219872dSFilipe Manana 
33057a163608SFilipe Manana 		if (root_id != BTRFS_TREE_LOG_OBJECTID) {
33062ff7e61eSJeff Mahoney 			ret = check_ref_cleanup(trans, buf->start);
3307d3575156SNaohiro Aota 			if (!ret) {
3308d3575156SNaohiro Aota 				btrfs_redirty_list_add(trans->transaction, buf);
330937be25bcSJosef Bacik 				goto out;
3310f0486c68SYan, Zheng 			}
3311d3575156SNaohiro Aota 		}
3312f0486c68SYan, Zheng 
33130b246afaSJeff Mahoney 		cache = btrfs_lookup_block_group(fs_info, buf->start);
33146219872dSFilipe Manana 
3315f0486c68SYan, Zheng 		if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
33166690d071SNikolay Borisov 			pin_down_extent(trans, cache, buf->start, buf->len, 1);
33176219872dSFilipe Manana 			btrfs_put_block_group(cache);
331837be25bcSJosef Bacik 			goto out;
3319f0486c68SYan, Zheng 		}
3320f0486c68SYan, Zheng 
3321485df755SFilipe Manana 		/*
3322968b7158SJosef Bacik 		 * If there are tree mod log users we may have recorded mod log
3323968b7158SJosef Bacik 		 * operations for this node.  If we re-allocate this node we
3324968b7158SJosef Bacik 		 * could replay operations on this node that happened when it
3325968b7158SJosef Bacik 		 * existed in a completely different root.  For example if it
3326968b7158SJosef Bacik 		 * was part of root A, then was reallocated to root B, and we
3327968b7158SJosef Bacik 		 * are doing a btrfs_old_search_slot(root b), we could replay
3328968b7158SJosef Bacik 		 * operations that happened when the block was part of root A,
3329968b7158SJosef Bacik 		 * giving us an inconsistent view of the btree.
3330968b7158SJosef Bacik 		 *
3331485df755SFilipe Manana 		 * We are safe from races here because at this point no other
3332485df755SFilipe Manana 		 * node or root points to this extent buffer, so if after this
3333968b7158SJosef Bacik 		 * check a new tree mod log user joins we will not have an
3334968b7158SJosef Bacik 		 * existing log of operations on this node that we have to
3335968b7158SJosef Bacik 		 * contend with.
3336485df755SFilipe Manana 		 */
3337968b7158SJosef Bacik 		if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
3338888dd183SFilipe Manana 			must_pin = true;
3339485df755SFilipe Manana 
3340485df755SFilipe Manana 		if (must_pin || btrfs_is_zoned(fs_info)) {
3341d3575156SNaohiro Aota 			btrfs_redirty_list_add(trans->transaction, buf);
3342d3575156SNaohiro Aota 			pin_down_extent(trans, cache, buf->start, buf->len, 1);
3343d3575156SNaohiro Aota 			btrfs_put_block_group(cache);
3344d3575156SNaohiro Aota 			goto out;
3345d3575156SNaohiro Aota 		}
3346d3575156SNaohiro Aota 
3347f0486c68SYan, Zheng 		WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3348f0486c68SYan, Zheng 
3349f0486c68SYan, Zheng 		btrfs_add_free_space(cache, buf->start, buf->len);
33504824f1f4SWang Xiaoguang 		btrfs_free_reserved_bytes(cache, buf->len, 0);
33516219872dSFilipe Manana 		btrfs_put_block_group(cache);
335271ff6437SJeff Mahoney 		trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3353f0486c68SYan, Zheng 	}
3354f0486c68SYan, Zheng out:
33550a16c7d7SOmar Sandoval 	if (last_ref) {
3356a826d6dcSJosef Bacik 		/*
33570a16c7d7SOmar Sandoval 		 * Deleting the buffer, clear the corrupt flag since it doesn't
33580a16c7d7SOmar Sandoval 		 * matter anymore.
3359a826d6dcSJosef Bacik 		 */
3360a826d6dcSJosef Bacik 		clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3361f0486c68SYan, Zheng 	}
33620a16c7d7SOmar Sandoval }
3363f0486c68SYan, Zheng 
336479787eaaSJeff Mahoney /* Can return -ENOMEM */
btrfs_free_extent(struct btrfs_trans_handle * trans,struct btrfs_ref * ref)3365ffd4bb2aSQu Wenruo int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3366925baeddSChris Mason {
3367ffd4bb2aSQu Wenruo 	struct btrfs_fs_info *fs_info = trans->fs_info;
3368925baeddSChris Mason 	int ret;
3369925baeddSChris Mason 
3370f5ee5c9aSJeff Mahoney 	if (btrfs_is_testing(fs_info))
3371faa2dbf0SJosef Bacik 		return 0;
3372fccb84c9SDavid Sterba 
337356bec294SChris Mason 	/*
337456bec294SChris Mason 	 * tree log blocks never actually go into the extent allocation
337556bec294SChris Mason 	 * tree, just update pinning info and exit early.
337656bec294SChris Mason 	 */
3377ffd4bb2aSQu Wenruo 	if ((ref->type == BTRFS_REF_METADATA &&
3378113479d5SNikolay Borisov 	     ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
3379ffd4bb2aSQu Wenruo 	    (ref->type == BTRFS_REF_DATA &&
3380113479d5SNikolay Borisov 	     ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
3381b9473439SChris Mason 		/* unlocks the pinned mutex */
3382b25c36f8SNikolay Borisov 		btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
338356bec294SChris Mason 		ret = 0;
3384ffd4bb2aSQu Wenruo 	} else if (ref->type == BTRFS_REF_METADATA) {
33852187374fSJosef Bacik 		ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
33865d4f98a2SYan Zheng 	} else {
33872187374fSJosef Bacik 		ret = btrfs_add_delayed_data_ref(trans, ref, 0);
338856bec294SChris Mason 	}
3389d7eae340SOmar Sandoval 
3390ffd4bb2aSQu Wenruo 	if (!((ref->type == BTRFS_REF_METADATA &&
3391113479d5SNikolay Borisov 	       ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
3392ffd4bb2aSQu Wenruo 	      (ref->type == BTRFS_REF_DATA &&
3393113479d5SNikolay Borisov 	       ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
3394ffd4bb2aSQu Wenruo 		btrfs_ref_tree_mod(fs_info, ref);
33958a5040f7SQu Wenruo 
3396925baeddSChris Mason 	return ret;
3397925baeddSChris Mason }
3398925baeddSChris Mason 
3399817d52f8SJosef Bacik enum btrfs_loop_type {
3400b9d97cffSJosef Bacik 	/*
3401b9d97cffSJosef Bacik 	 * Start caching block groups but do not wait for progress or for them
3402b9d97cffSJosef Bacik 	 * to be done.
3403b9d97cffSJosef Bacik 	 */
3404f262fa8dSDavid Sterba 	LOOP_CACHING_NOWAIT,
3405b9d97cffSJosef Bacik 
3406b9d97cffSJosef Bacik 	/*
3407b9d97cffSJosef Bacik 	 * Wait for the block group free_space >= the space we're waiting for if
3408b9d97cffSJosef Bacik 	 * the block group isn't cached.
3409b9d97cffSJosef Bacik 	 */
3410f262fa8dSDavid Sterba 	LOOP_CACHING_WAIT,
3411b9d97cffSJosef Bacik 
3412b9d97cffSJosef Bacik 	/*
3413b9d97cffSJosef Bacik 	 * Allow allocations to happen from block groups that do not yet have a
3414b9d97cffSJosef Bacik 	 * size classification.
3415b9d97cffSJosef Bacik 	 */
341652bb7a21SBoris Burkov 	LOOP_UNSET_SIZE_CLASS,
3417b9d97cffSJosef Bacik 
3418b9d97cffSJosef Bacik 	/*
3419b9d97cffSJosef Bacik 	 * Allocate a chunk and then retry the allocation.
3420b9d97cffSJosef Bacik 	 */
3421f262fa8dSDavid Sterba 	LOOP_ALLOC_CHUNK,
3422b9d97cffSJosef Bacik 
3423b9d97cffSJosef Bacik 	/*
3424b9d97cffSJosef Bacik 	 * Ignore the size class restrictions for this allocation.
3425b9d97cffSJosef Bacik 	 */
342652bb7a21SBoris Burkov 	LOOP_WRONG_SIZE_CLASS,
3427b9d97cffSJosef Bacik 
3428b9d97cffSJosef Bacik 	/*
3429b9d97cffSJosef Bacik 	 * Ignore the empty size, only try to allocate the number of bytes
3430b9d97cffSJosef Bacik 	 * needed for this allocation.
3431b9d97cffSJosef Bacik 	 */
3432f262fa8dSDavid Sterba 	LOOP_NO_EMPTY_SIZE,
3433817d52f8SJosef Bacik };
3434817d52f8SJosef Bacik 
3435e570fd27SMiao Xie static inline void
btrfs_lock_block_group(struct btrfs_block_group * cache,int delalloc)343632da5386SDavid Sterba btrfs_lock_block_group(struct btrfs_block_group *cache,
3437e570fd27SMiao Xie 		       int delalloc)
3438e570fd27SMiao Xie {
3439e570fd27SMiao Xie 	if (delalloc)
3440e570fd27SMiao Xie 		down_read(&cache->data_rwsem);
3441e570fd27SMiao Xie }
3442e570fd27SMiao Xie 
btrfs_grab_block_group(struct btrfs_block_group * cache,int delalloc)344332da5386SDavid Sterba static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3444e570fd27SMiao Xie 		       int delalloc)
3445e570fd27SMiao Xie {
3446e570fd27SMiao Xie 	btrfs_get_block_group(cache);
3447e570fd27SMiao Xie 	if (delalloc)
3448e570fd27SMiao Xie 		down_read(&cache->data_rwsem);
3449e570fd27SMiao Xie }
3450e570fd27SMiao Xie 
btrfs_lock_cluster(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster,int delalloc)345132da5386SDavid Sterba static struct btrfs_block_group *btrfs_lock_cluster(
345232da5386SDavid Sterba 		   struct btrfs_block_group *block_group,
3453e570fd27SMiao Xie 		   struct btrfs_free_cluster *cluster,
3454e570fd27SMiao Xie 		   int delalloc)
3455c142c6a4SJules Irenge 	__acquires(&cluster->refill_lock)
3456e570fd27SMiao Xie {
345732da5386SDavid Sterba 	struct btrfs_block_group *used_bg = NULL;
34586719afdcSGeert Uytterhoeven 
3459e570fd27SMiao Xie 	spin_lock(&cluster->refill_lock);
34606719afdcSGeert Uytterhoeven 	while (1) {
3461e570fd27SMiao Xie 		used_bg = cluster->block_group;
3462e570fd27SMiao Xie 		if (!used_bg)
3463e570fd27SMiao Xie 			return NULL;
3464e570fd27SMiao Xie 
3465e570fd27SMiao Xie 		if (used_bg == block_group)
3466e570fd27SMiao Xie 			return used_bg;
3467e570fd27SMiao Xie 
3468e570fd27SMiao Xie 		btrfs_get_block_group(used_bg);
3469e570fd27SMiao Xie 
3470e570fd27SMiao Xie 		if (!delalloc)
3471e570fd27SMiao Xie 			return used_bg;
3472e570fd27SMiao Xie 
3473e570fd27SMiao Xie 		if (down_read_trylock(&used_bg->data_rwsem))
3474e570fd27SMiao Xie 			return used_bg;
3475e570fd27SMiao Xie 
3476e570fd27SMiao Xie 		spin_unlock(&cluster->refill_lock);
34776719afdcSGeert Uytterhoeven 
3478e321f8a8SLiu Bo 		/* We should only have one-level nested. */
3479e321f8a8SLiu Bo 		down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
34806719afdcSGeert Uytterhoeven 
34816719afdcSGeert Uytterhoeven 		spin_lock(&cluster->refill_lock);
34826719afdcSGeert Uytterhoeven 		if (used_bg == cluster->block_group)
34836719afdcSGeert Uytterhoeven 			return used_bg;
34846719afdcSGeert Uytterhoeven 
34856719afdcSGeert Uytterhoeven 		up_read(&used_bg->data_rwsem);
34866719afdcSGeert Uytterhoeven 		btrfs_put_block_group(used_bg);
34876719afdcSGeert Uytterhoeven 	}
3488e570fd27SMiao Xie }
3489e570fd27SMiao Xie 
3490e570fd27SMiao Xie static inline void
btrfs_release_block_group(struct btrfs_block_group * cache,int delalloc)349132da5386SDavid Sterba btrfs_release_block_group(struct btrfs_block_group *cache,
3492e570fd27SMiao Xie 			 int delalloc)
3493e570fd27SMiao Xie {
3494e570fd27SMiao Xie 	if (delalloc)
3495e570fd27SMiao Xie 		up_read(&cache->data_rwsem);
3496e570fd27SMiao Xie 	btrfs_put_block_group(cache);
3497e570fd27SMiao Xie }
3498e570fd27SMiao Xie 
3499d06e3bb6SQu Wenruo /*
3500d06e3bb6SQu Wenruo  * Helper function for find_free_extent().
3501d06e3bb6SQu Wenruo  *
3502d06e3bb6SQu Wenruo  * Return -ENOENT to inform caller that we need fallback to unclustered mode.
3503d06e3bb6SQu Wenruo  * Return >0 to inform caller that we find nothing
3504d06e3bb6SQu Wenruo  * Return 0 means we have found a location and set ffe_ctl->found_offset.
3505d06e3bb6SQu Wenruo  */
find_free_extent_clustered(struct btrfs_block_group * bg,struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group ** cluster_bg_ret)350632da5386SDavid Sterba static int find_free_extent_clustered(struct btrfs_block_group *bg,
3507d06e3bb6SQu Wenruo 				      struct find_free_extent_ctl *ffe_ctl,
350832da5386SDavid Sterba 				      struct btrfs_block_group **cluster_bg_ret)
3509d06e3bb6SQu Wenruo {
351032da5386SDavid Sterba 	struct btrfs_block_group *cluster_bg;
3511897cae79SNaohiro Aota 	struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3512d06e3bb6SQu Wenruo 	u64 aligned_cluster;
3513d06e3bb6SQu Wenruo 	u64 offset;
3514d06e3bb6SQu Wenruo 	int ret;
3515d06e3bb6SQu Wenruo 
3516d06e3bb6SQu Wenruo 	cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3517d06e3bb6SQu Wenruo 	if (!cluster_bg)
3518d06e3bb6SQu Wenruo 		goto refill_cluster;
3519d06e3bb6SQu Wenruo 	if (cluster_bg != bg && (cluster_bg->ro ||
3520d06e3bb6SQu Wenruo 	    !block_group_bits(cluster_bg, ffe_ctl->flags)))
3521d06e3bb6SQu Wenruo 		goto release_cluster;
3522d06e3bb6SQu Wenruo 
3523d06e3bb6SQu Wenruo 	offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3524b3470b5dSDavid Sterba 			ffe_ctl->num_bytes, cluster_bg->start,
3525d06e3bb6SQu Wenruo 			&ffe_ctl->max_extent_size);
3526d06e3bb6SQu Wenruo 	if (offset) {
3527d06e3bb6SQu Wenruo 		/* We have a block, we're done */
3528d06e3bb6SQu Wenruo 		spin_unlock(&last_ptr->refill_lock);
3529cfc2de0fSBoris Burkov 		trace_btrfs_reserve_extent_cluster(cluster_bg, ffe_ctl);
3530d06e3bb6SQu Wenruo 		*cluster_bg_ret = cluster_bg;
3531d06e3bb6SQu Wenruo 		ffe_ctl->found_offset = offset;
3532d06e3bb6SQu Wenruo 		return 0;
3533d06e3bb6SQu Wenruo 	}
3534d06e3bb6SQu Wenruo 	WARN_ON(last_ptr->block_group != cluster_bg);
3535d06e3bb6SQu Wenruo 
3536d06e3bb6SQu Wenruo release_cluster:
3537d06e3bb6SQu Wenruo 	/*
3538d06e3bb6SQu Wenruo 	 * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so
3539d06e3bb6SQu Wenruo 	 * lets just skip it and let the allocator find whatever block it can
3540d06e3bb6SQu Wenruo 	 * find. If we reach this point, we will have tried the cluster
3541d06e3bb6SQu Wenruo 	 * allocator plenty of times and not have found anything, so we are
3542d06e3bb6SQu Wenruo 	 * likely way too fragmented for the clustering stuff to find anything.
3543d06e3bb6SQu Wenruo 	 *
3544d06e3bb6SQu Wenruo 	 * However, if the cluster is taken from the current block group,
3545d06e3bb6SQu Wenruo 	 * release the cluster first, so that we stand a better chance of
3546d06e3bb6SQu Wenruo 	 * succeeding in the unclustered allocation.
3547d06e3bb6SQu Wenruo 	 */
3548d06e3bb6SQu Wenruo 	if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3549d06e3bb6SQu Wenruo 		spin_unlock(&last_ptr->refill_lock);
3550d06e3bb6SQu Wenruo 		btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3551d06e3bb6SQu Wenruo 		return -ENOENT;
3552d06e3bb6SQu Wenruo 	}
3553d06e3bb6SQu Wenruo 
3554d06e3bb6SQu Wenruo 	/* This cluster didn't work out, free it and start over */
3555d06e3bb6SQu Wenruo 	btrfs_return_cluster_to_free_space(NULL, last_ptr);
3556d06e3bb6SQu Wenruo 
3557d06e3bb6SQu Wenruo 	if (cluster_bg != bg)
3558d06e3bb6SQu Wenruo 		btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3559d06e3bb6SQu Wenruo 
3560d06e3bb6SQu Wenruo refill_cluster:
3561d06e3bb6SQu Wenruo 	if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3562d06e3bb6SQu Wenruo 		spin_unlock(&last_ptr->refill_lock);
3563d06e3bb6SQu Wenruo 		return -ENOENT;
3564d06e3bb6SQu Wenruo 	}
3565d06e3bb6SQu Wenruo 
3566d06e3bb6SQu Wenruo 	aligned_cluster = max_t(u64,
3567d06e3bb6SQu Wenruo 			ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3568d06e3bb6SQu Wenruo 			bg->full_stripe_len);
35692ceeae2eSDavid Sterba 	ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
35702ceeae2eSDavid Sterba 			ffe_ctl->num_bytes, aligned_cluster);
3571d06e3bb6SQu Wenruo 	if (ret == 0) {
3572d06e3bb6SQu Wenruo 		/* Now pull our allocation out of this cluster */
3573d06e3bb6SQu Wenruo 		offset = btrfs_alloc_from_cluster(bg, last_ptr,
3574d06e3bb6SQu Wenruo 				ffe_ctl->num_bytes, ffe_ctl->search_start,
3575d06e3bb6SQu Wenruo 				&ffe_ctl->max_extent_size);
3576d06e3bb6SQu Wenruo 		if (offset) {
3577d06e3bb6SQu Wenruo 			/* We found one, proceed */
3578d06e3bb6SQu Wenruo 			spin_unlock(&last_ptr->refill_lock);
3579d06e3bb6SQu Wenruo 			ffe_ctl->found_offset = offset;
3580cfc2de0fSBoris Burkov 			trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
3581d06e3bb6SQu Wenruo 			return 0;
3582d06e3bb6SQu Wenruo 		}
3583d06e3bb6SQu Wenruo 	}
3584d06e3bb6SQu Wenruo 	/*
3585d06e3bb6SQu Wenruo 	 * At this point we either didn't find a cluster or we weren't able to
3586d06e3bb6SQu Wenruo 	 * allocate a block from our cluster.  Free the cluster we've been
3587d06e3bb6SQu Wenruo 	 * trying to use, and go to the next block group.
3588d06e3bb6SQu Wenruo 	 */
3589d06e3bb6SQu Wenruo 	btrfs_return_cluster_to_free_space(NULL, last_ptr);
3590d06e3bb6SQu Wenruo 	spin_unlock(&last_ptr->refill_lock);
3591d06e3bb6SQu Wenruo 	return 1;
3592d06e3bb6SQu Wenruo }
3593d06e3bb6SQu Wenruo 
3594b4bd745dSQu Wenruo /*
3595e1a41848SQu Wenruo  * Return >0 to inform caller that we find nothing
3596e1a41848SQu Wenruo  * Return 0 when we found an free extent and set ffe_ctrl->found_offset
3597e1a41848SQu Wenruo  */
find_free_extent_unclustered(struct btrfs_block_group * bg,struct find_free_extent_ctl * ffe_ctl)359832da5386SDavid Sterba static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3599e1a41848SQu Wenruo 					struct find_free_extent_ctl *ffe_ctl)
3600e1a41848SQu Wenruo {
3601897cae79SNaohiro Aota 	struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3602e1a41848SQu Wenruo 	u64 offset;
3603e1a41848SQu Wenruo 
3604e1a41848SQu Wenruo 	/*
3605e1a41848SQu Wenruo 	 * We are doing an unclustered allocation, set the fragmented flag so
3606e1a41848SQu Wenruo 	 * we don't bother trying to setup a cluster again until we get more
3607e1a41848SQu Wenruo 	 * space.
3608e1a41848SQu Wenruo 	 */
3609e1a41848SQu Wenruo 	if (unlikely(last_ptr)) {
3610e1a41848SQu Wenruo 		spin_lock(&last_ptr->lock);
3611e1a41848SQu Wenruo 		last_ptr->fragmented = 1;
3612e1a41848SQu Wenruo 		spin_unlock(&last_ptr->lock);
3613e1a41848SQu Wenruo 	}
3614e1a41848SQu Wenruo 	if (ffe_ctl->cached) {
3615e1a41848SQu Wenruo 		struct btrfs_free_space_ctl *free_space_ctl;
3616e1a41848SQu Wenruo 
3617e1a41848SQu Wenruo 		free_space_ctl = bg->free_space_ctl;
3618e1a41848SQu Wenruo 		spin_lock(&free_space_ctl->tree_lock);
3619e1a41848SQu Wenruo 		if (free_space_ctl->free_space <
3620e1a41848SQu Wenruo 		    ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3621e1a41848SQu Wenruo 		    ffe_ctl->empty_size) {
3622e1a41848SQu Wenruo 			ffe_ctl->total_free_space = max_t(u64,
3623e1a41848SQu Wenruo 					ffe_ctl->total_free_space,
3624e1a41848SQu Wenruo 					free_space_ctl->free_space);
3625e1a41848SQu Wenruo 			spin_unlock(&free_space_ctl->tree_lock);
3626e1a41848SQu Wenruo 			return 1;
3627e1a41848SQu Wenruo 		}
3628e1a41848SQu Wenruo 		spin_unlock(&free_space_ctl->tree_lock);
3629e1a41848SQu Wenruo 	}
3630e1a41848SQu Wenruo 
3631e1a41848SQu Wenruo 	offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3632e1a41848SQu Wenruo 			ffe_ctl->num_bytes, ffe_ctl->empty_size,
3633e1a41848SQu Wenruo 			&ffe_ctl->max_extent_size);
3634cd361199SJosef Bacik 	if (!offset)
3635e1a41848SQu Wenruo 		return 1;
3636e1a41848SQu Wenruo 	ffe_ctl->found_offset = offset;
3637e1a41848SQu Wenruo 	return 0;
3638e1a41848SQu Wenruo }
3639e1a41848SQu Wenruo 
do_allocation_clustered(struct btrfs_block_group * block_group,struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group ** bg_ret)3640c668690dSNaohiro Aota static int do_allocation_clustered(struct btrfs_block_group *block_group,
3641c668690dSNaohiro Aota 				   struct find_free_extent_ctl *ffe_ctl,
3642c668690dSNaohiro Aota 				   struct btrfs_block_group **bg_ret)
3643c668690dSNaohiro Aota {
3644c668690dSNaohiro Aota 	int ret;
3645c668690dSNaohiro Aota 
3646c668690dSNaohiro Aota 	/* We want to try and use the cluster allocator, so lets look there */
3647c668690dSNaohiro Aota 	if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
3648897cae79SNaohiro Aota 		ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3649cd361199SJosef Bacik 		if (ret >= 0)
3650c668690dSNaohiro Aota 			return ret;
3651c668690dSNaohiro Aota 		/* ret == -ENOENT case falls through */
3652c668690dSNaohiro Aota 	}
3653c668690dSNaohiro Aota 
3654897cae79SNaohiro Aota 	return find_free_extent_unclustered(block_group, ffe_ctl);
3655c668690dSNaohiro Aota }
3656c668690dSNaohiro Aota 
36572eda5708SNaohiro Aota /*
365840ab3be1SNaohiro Aota  * Tree-log block group locking
365940ab3be1SNaohiro Aota  * ============================
366040ab3be1SNaohiro Aota  *
366140ab3be1SNaohiro Aota  * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
366240ab3be1SNaohiro Aota  * indicates the starting address of a block group, which is reserved only
366340ab3be1SNaohiro Aota  * for tree-log metadata.
366440ab3be1SNaohiro Aota  *
366540ab3be1SNaohiro Aota  * Lock nesting
366640ab3be1SNaohiro Aota  * ============
366740ab3be1SNaohiro Aota  *
366840ab3be1SNaohiro Aota  * space_info::lock
366940ab3be1SNaohiro Aota  *   block_group::lock
367040ab3be1SNaohiro Aota  *     fs_info::treelog_bg_lock
367140ab3be1SNaohiro Aota  */
367240ab3be1SNaohiro Aota 
367340ab3be1SNaohiro Aota /*
36742eda5708SNaohiro Aota  * Simple allocator for sequential-only block group. It only allows sequential
36752eda5708SNaohiro Aota  * allocation. No need to play with trees. This function also reserves the
36762eda5708SNaohiro Aota  * bytes as in btrfs_add_reserved_bytes.
36772eda5708SNaohiro Aota  */
do_allocation_zoned(struct btrfs_block_group * block_group,struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group ** bg_ret)36782eda5708SNaohiro Aota static int do_allocation_zoned(struct btrfs_block_group *block_group,
36792eda5708SNaohiro Aota 			       struct find_free_extent_ctl *ffe_ctl,
36802eda5708SNaohiro Aota 			       struct btrfs_block_group **bg_ret)
36812eda5708SNaohiro Aota {
368240ab3be1SNaohiro Aota 	struct btrfs_fs_info *fs_info = block_group->fs_info;
36832eda5708SNaohiro Aota 	struct btrfs_space_info *space_info = block_group->space_info;
36842eda5708SNaohiro Aota 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
36852eda5708SNaohiro Aota 	u64 start = block_group->start;
36862eda5708SNaohiro Aota 	u64 num_bytes = ffe_ctl->num_bytes;
36872eda5708SNaohiro Aota 	u64 avail;
368840ab3be1SNaohiro Aota 	u64 bytenr = block_group->start;
368940ab3be1SNaohiro Aota 	u64 log_bytenr;
3690c2707a25SJohannes Thumshirn 	u64 data_reloc_bytenr;
36912eda5708SNaohiro Aota 	int ret = 0;
36922d81eb1cSJohannes Thumshirn 	bool skip = false;
36932eda5708SNaohiro Aota 
36942eda5708SNaohiro Aota 	ASSERT(btrfs_is_zoned(block_group->fs_info));
36952eda5708SNaohiro Aota 
369640ab3be1SNaohiro Aota 	/*
369740ab3be1SNaohiro Aota 	 * Do not allow non-tree-log blocks in the dedicated tree-log block
369840ab3be1SNaohiro Aota 	 * group, and vice versa.
369940ab3be1SNaohiro Aota 	 */
370040ab3be1SNaohiro Aota 	spin_lock(&fs_info->treelog_bg_lock);
370140ab3be1SNaohiro Aota 	log_bytenr = fs_info->treelog_bg;
37022d81eb1cSJohannes Thumshirn 	if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
37032d81eb1cSJohannes Thumshirn 			   (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
37042d81eb1cSJohannes Thumshirn 		skip = true;
370540ab3be1SNaohiro Aota 	spin_unlock(&fs_info->treelog_bg_lock);
370640ab3be1SNaohiro Aota 	if (skip)
370740ab3be1SNaohiro Aota 		return 1;
370840ab3be1SNaohiro Aota 
3709c2707a25SJohannes Thumshirn 	/*
3710c2707a25SJohannes Thumshirn 	 * Do not allow non-relocation blocks in the dedicated relocation block
3711c2707a25SJohannes Thumshirn 	 * group, and vice versa.
3712c2707a25SJohannes Thumshirn 	 */
3713c2707a25SJohannes Thumshirn 	spin_lock(&fs_info->relocation_bg_lock);
3714c2707a25SJohannes Thumshirn 	data_reloc_bytenr = fs_info->data_reloc_bg;
3715c2707a25SJohannes Thumshirn 	if (data_reloc_bytenr &&
3716c2707a25SJohannes Thumshirn 	    ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
3717c2707a25SJohannes Thumshirn 	     (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
3718c2707a25SJohannes Thumshirn 		skip = true;
3719c2707a25SJohannes Thumshirn 	spin_unlock(&fs_info->relocation_bg_lock);
3720c2707a25SJohannes Thumshirn 	if (skip)
3721c2707a25SJohannes Thumshirn 		return 1;
37221ada69f6SNaohiro Aota 
37232e654e4bSNaohiro Aota 	/* Check RO and no space case before trying to activate it */
37242e654e4bSNaohiro Aota 	spin_lock(&block_group->lock);
37251bfd4767SNaohiro Aota 	if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
37261ada69f6SNaohiro Aota 		ret = 1;
37271ada69f6SNaohiro Aota 		/*
37281ada69f6SNaohiro Aota 		 * May need to clear fs_info->{treelog,data_reloc}_bg.
37291ada69f6SNaohiro Aota 		 * Return the error after taking the locks.
37301ada69f6SNaohiro Aota 		 */
37312e654e4bSNaohiro Aota 	}
37322e654e4bSNaohiro Aota 	spin_unlock(&block_group->lock);
37332e654e4bSNaohiro Aota 
37345a7d107eSNaohiro Aota 	/* Metadata block group is activated at write time. */
37355a7d107eSNaohiro Aota 	if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
37365a7d107eSNaohiro Aota 	    !btrfs_zone_activate(block_group)) {
37371ada69f6SNaohiro Aota 		ret = 1;
37381ada69f6SNaohiro Aota 		/*
37391ada69f6SNaohiro Aota 		 * May need to clear fs_info->{treelog,data_reloc}_bg.
37401ada69f6SNaohiro Aota 		 * Return the error after taking the locks.
37411ada69f6SNaohiro Aota 		 */
37421ada69f6SNaohiro Aota 	}
37432e654e4bSNaohiro Aota 
37442eda5708SNaohiro Aota 	spin_lock(&space_info->lock);
37452eda5708SNaohiro Aota 	spin_lock(&block_group->lock);
374640ab3be1SNaohiro Aota 	spin_lock(&fs_info->treelog_bg_lock);
3747c2707a25SJohannes Thumshirn 	spin_lock(&fs_info->relocation_bg_lock);
374840ab3be1SNaohiro Aota 
37491ada69f6SNaohiro Aota 	if (ret)
37501ada69f6SNaohiro Aota 		goto out;
37511ada69f6SNaohiro Aota 
375240ab3be1SNaohiro Aota 	ASSERT(!ffe_ctl->for_treelog ||
375340ab3be1SNaohiro Aota 	       block_group->start == fs_info->treelog_bg ||
375440ab3be1SNaohiro Aota 	       fs_info->treelog_bg == 0);
3755c2707a25SJohannes Thumshirn 	ASSERT(!ffe_ctl->for_data_reloc ||
3756c2707a25SJohannes Thumshirn 	       block_group->start == fs_info->data_reloc_bg ||
3757c2707a25SJohannes Thumshirn 	       fs_info->data_reloc_bg == 0);
37582eda5708SNaohiro Aota 
37593349b57fSJosef Bacik 	if (block_group->ro ||
3760332581bdSNaohiro Aota 	    (!ffe_ctl->for_data_reloc &&
3761332581bdSNaohiro Aota 	     test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
37622eda5708SNaohiro Aota 		ret = 1;
37632eda5708SNaohiro Aota 		goto out;
37642eda5708SNaohiro Aota 	}
37652eda5708SNaohiro Aota 
376640ab3be1SNaohiro Aota 	/*
376740ab3be1SNaohiro Aota 	 * Do not allow currently using block group to be tree-log dedicated
376840ab3be1SNaohiro Aota 	 * block group.
376940ab3be1SNaohiro Aota 	 */
377040ab3be1SNaohiro Aota 	if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
377140ab3be1SNaohiro Aota 	    (block_group->used || block_group->reserved)) {
377240ab3be1SNaohiro Aota 		ret = 1;
377340ab3be1SNaohiro Aota 		goto out;
377440ab3be1SNaohiro Aota 	}
377540ab3be1SNaohiro Aota 
3776c2707a25SJohannes Thumshirn 	/*
3777c2707a25SJohannes Thumshirn 	 * Do not allow currently used block group to be the data relocation
3778c2707a25SJohannes Thumshirn 	 * dedicated block group.
3779c2707a25SJohannes Thumshirn 	 */
3780c2707a25SJohannes Thumshirn 	if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
3781c2707a25SJohannes Thumshirn 	    (block_group->used || block_group->reserved)) {
3782c2707a25SJohannes Thumshirn 		ret = 1;
3783c2707a25SJohannes Thumshirn 		goto out;
3784c2707a25SJohannes Thumshirn 	}
3785c2707a25SJohannes Thumshirn 
378698173255SNaohiro Aota 	WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
378798173255SNaohiro Aota 	avail = block_group->zone_capacity - block_group->alloc_offset;
37882eda5708SNaohiro Aota 	if (avail < num_bytes) {
37892eda5708SNaohiro Aota 		if (ffe_ctl->max_extent_size < avail) {
37902eda5708SNaohiro Aota 			/*
37912eda5708SNaohiro Aota 			 * With sequential allocator, free space is always
37922eda5708SNaohiro Aota 			 * contiguous
37932eda5708SNaohiro Aota 			 */
37942eda5708SNaohiro Aota 			ffe_ctl->max_extent_size = avail;
37952eda5708SNaohiro Aota 			ffe_ctl->total_free_space = avail;
37962eda5708SNaohiro Aota 		}
37972eda5708SNaohiro Aota 		ret = 1;
37982eda5708SNaohiro Aota 		goto out;
37992eda5708SNaohiro Aota 	}
38002eda5708SNaohiro Aota 
380140ab3be1SNaohiro Aota 	if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
380240ab3be1SNaohiro Aota 		fs_info->treelog_bg = block_group->start;
380340ab3be1SNaohiro Aota 
3804332581bdSNaohiro Aota 	if (ffe_ctl->for_data_reloc) {
3805332581bdSNaohiro Aota 		if (!fs_info->data_reloc_bg)
3806c2707a25SJohannes Thumshirn 			fs_info->data_reloc_bg = block_group->start;
3807332581bdSNaohiro Aota 		/*
3808332581bdSNaohiro Aota 		 * Do not allow allocations from this block group, unless it is
3809332581bdSNaohiro Aota 		 * for data relocation. Compared to increasing the ->ro, setting
3810332581bdSNaohiro Aota 		 * the ->zoned_data_reloc_ongoing flag still allows nocow
3811332581bdSNaohiro Aota 		 * writers to come in. See btrfs_inc_nocow_writers().
3812332581bdSNaohiro Aota 		 *
3813332581bdSNaohiro Aota 		 * We need to disable an allocation to avoid an allocation of
3814332581bdSNaohiro Aota 		 * regular (non-relocation data) extent. With mix of relocation
3815332581bdSNaohiro Aota 		 * extents and regular extents, we can dispatch WRITE commands
3816332581bdSNaohiro Aota 		 * (for relocation extents) and ZONE APPEND commands (for
3817332581bdSNaohiro Aota 		 * regular extents) at the same time to the same zone, which
3818332581bdSNaohiro Aota 		 * easily break the write pointer.
3819332581bdSNaohiro Aota 		 *
3820332581bdSNaohiro Aota 		 * Also, this flag avoids this block group to be zone finished.
3821332581bdSNaohiro Aota 		 */
3822332581bdSNaohiro Aota 		set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
3823332581bdSNaohiro Aota 	}
3824c2707a25SJohannes Thumshirn 
38252eda5708SNaohiro Aota 	ffe_ctl->found_offset = start + block_group->alloc_offset;
38262eda5708SNaohiro Aota 	block_group->alloc_offset += num_bytes;
38272eda5708SNaohiro Aota 	spin_lock(&ctl->tree_lock);
38282eda5708SNaohiro Aota 	ctl->free_space -= num_bytes;
38292eda5708SNaohiro Aota 	spin_unlock(&ctl->tree_lock);
38302eda5708SNaohiro Aota 
38312eda5708SNaohiro Aota 	/*
38322eda5708SNaohiro Aota 	 * We do not check if found_offset is aligned to stripesize. The
38332eda5708SNaohiro Aota 	 * address is anyway rewritten when using zone append writing.
38342eda5708SNaohiro Aota 	 */
38352eda5708SNaohiro Aota 
38362eda5708SNaohiro Aota 	ffe_ctl->search_start = ffe_ctl->found_offset;
38372eda5708SNaohiro Aota 
38382eda5708SNaohiro Aota out:
383940ab3be1SNaohiro Aota 	if (ret && ffe_ctl->for_treelog)
384040ab3be1SNaohiro Aota 		fs_info->treelog_bg = 0;
3841332581bdSNaohiro Aota 	if (ret && ffe_ctl->for_data_reloc)
3842c2707a25SJohannes Thumshirn 		fs_info->data_reloc_bg = 0;
3843c2707a25SJohannes Thumshirn 	spin_unlock(&fs_info->relocation_bg_lock);
384440ab3be1SNaohiro Aota 	spin_unlock(&fs_info->treelog_bg_lock);
38452eda5708SNaohiro Aota 	spin_unlock(&block_group->lock);
38462eda5708SNaohiro Aota 	spin_unlock(&space_info->lock);
38472eda5708SNaohiro Aota 	return ret;
38482eda5708SNaohiro Aota }
38492eda5708SNaohiro Aota 
do_allocation(struct btrfs_block_group * block_group,struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group ** bg_ret)3850c668690dSNaohiro Aota static int do_allocation(struct btrfs_block_group *block_group,
3851c668690dSNaohiro Aota 			 struct find_free_extent_ctl *ffe_ctl,
3852c668690dSNaohiro Aota 			 struct btrfs_block_group **bg_ret)
3853c668690dSNaohiro Aota {
3854c668690dSNaohiro Aota 	switch (ffe_ctl->policy) {
3855c668690dSNaohiro Aota 	case BTRFS_EXTENT_ALLOC_CLUSTERED:
3856c668690dSNaohiro Aota 		return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
38572eda5708SNaohiro Aota 	case BTRFS_EXTENT_ALLOC_ZONED:
38582eda5708SNaohiro Aota 		return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
3859c668690dSNaohiro Aota 	default:
3860c668690dSNaohiro Aota 		BUG();
3861c668690dSNaohiro Aota 	}
3862c668690dSNaohiro Aota }
3863c668690dSNaohiro Aota 
release_block_group(struct btrfs_block_group * block_group,struct find_free_extent_ctl * ffe_ctl,int delalloc)3864baba5062SNaohiro Aota static void release_block_group(struct btrfs_block_group *block_group,
3865baba5062SNaohiro Aota 				struct find_free_extent_ctl *ffe_ctl,
3866baba5062SNaohiro Aota 				int delalloc)
3867baba5062SNaohiro Aota {
3868baba5062SNaohiro Aota 	switch (ffe_ctl->policy) {
3869baba5062SNaohiro Aota 	case BTRFS_EXTENT_ALLOC_CLUSTERED:
3870cd361199SJosef Bacik 		ffe_ctl->retry_uncached = false;
3871baba5062SNaohiro Aota 		break;
38722eda5708SNaohiro Aota 	case BTRFS_EXTENT_ALLOC_ZONED:
38732eda5708SNaohiro Aota 		/* Nothing to do */
38742eda5708SNaohiro Aota 		break;
3875baba5062SNaohiro Aota 	default:
3876baba5062SNaohiro Aota 		BUG();
3877baba5062SNaohiro Aota 	}
3878baba5062SNaohiro Aota 
3879baba5062SNaohiro Aota 	BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
3880baba5062SNaohiro Aota 	       ffe_ctl->index);
3881baba5062SNaohiro Aota 	btrfs_release_block_group(block_group, delalloc);
3882baba5062SNaohiro Aota }
3883baba5062SNaohiro Aota 
found_extent_clustered(struct find_free_extent_ctl * ffe_ctl,struct btrfs_key * ins)38840ab9724bSNaohiro Aota static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
38850ab9724bSNaohiro Aota 				   struct btrfs_key *ins)
38860ab9724bSNaohiro Aota {
38870ab9724bSNaohiro Aota 	struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
38880ab9724bSNaohiro Aota 
38890ab9724bSNaohiro Aota 	if (!ffe_ctl->use_cluster && last_ptr) {
38900ab9724bSNaohiro Aota 		spin_lock(&last_ptr->lock);
38910ab9724bSNaohiro Aota 		last_ptr->window_start = ins->objectid;
38920ab9724bSNaohiro Aota 		spin_unlock(&last_ptr->lock);
38930ab9724bSNaohiro Aota 	}
38940ab9724bSNaohiro Aota }
38950ab9724bSNaohiro Aota 
found_extent(struct find_free_extent_ctl * ffe_ctl,struct btrfs_key * ins)38960ab9724bSNaohiro Aota static void found_extent(struct find_free_extent_ctl *ffe_ctl,
38970ab9724bSNaohiro Aota 			 struct btrfs_key *ins)
38980ab9724bSNaohiro Aota {
38990ab9724bSNaohiro Aota 	switch (ffe_ctl->policy) {
39000ab9724bSNaohiro Aota 	case BTRFS_EXTENT_ALLOC_CLUSTERED:
39010ab9724bSNaohiro Aota 		found_extent_clustered(ffe_ctl, ins);
39020ab9724bSNaohiro Aota 		break;
39032eda5708SNaohiro Aota 	case BTRFS_EXTENT_ALLOC_ZONED:
39042eda5708SNaohiro Aota 		/* Nothing to do */
39052eda5708SNaohiro Aota 		break;
39060ab9724bSNaohiro Aota 	default:
39070ab9724bSNaohiro Aota 		BUG();
39080ab9724bSNaohiro Aota 	}
39090ab9724bSNaohiro Aota }
39100ab9724bSNaohiro Aota 
can_allocate_chunk_zoned(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl)3911393f646eSNaohiro Aota static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
3912393f646eSNaohiro Aota 				    struct find_free_extent_ctl *ffe_ctl)
3913393f646eSNaohiro Aota {
39145a7d107eSNaohiro Aota 	/* Block group's activeness is not a requirement for METADATA block groups. */
39155a7d107eSNaohiro Aota 	if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA))
39165a7d107eSNaohiro Aota 		return 0;
39175a7d107eSNaohiro Aota 
3918393f646eSNaohiro Aota 	/* If we can activate new zone, just allocate a chunk and use it */
3919393f646eSNaohiro Aota 	if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
3920393f646eSNaohiro Aota 		return 0;
3921393f646eSNaohiro Aota 
3922393f646eSNaohiro Aota 	/*
3923393f646eSNaohiro Aota 	 * We already reached the max active zones. Try to finish one block
3924393f646eSNaohiro Aota 	 * group to make a room for a new block group. This is only possible
3925393f646eSNaohiro Aota 	 * for a data block group because btrfs_zone_finish() may need to wait
3926393f646eSNaohiro Aota 	 * for a running transaction which can cause a deadlock for metadata
3927393f646eSNaohiro Aota 	 * allocation.
3928393f646eSNaohiro Aota 	 */
3929393f646eSNaohiro Aota 	if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
3930393f646eSNaohiro Aota 		int ret = btrfs_zone_finish_one_bg(fs_info);
3931393f646eSNaohiro Aota 
3932393f646eSNaohiro Aota 		if (ret == 1)
3933393f646eSNaohiro Aota 			return 0;
3934393f646eSNaohiro Aota 		else if (ret < 0)
3935393f646eSNaohiro Aota 			return ret;
3936393f646eSNaohiro Aota 	}
3937393f646eSNaohiro Aota 
3938393f646eSNaohiro Aota 	/*
3939393f646eSNaohiro Aota 	 * If we have enough free space left in an already active block group
3940393f646eSNaohiro Aota 	 * and we can't activate any other zone now, do not allow allocating a
3941393f646eSNaohiro Aota 	 * new chunk and let find_free_extent() retry with a smaller size.
3942393f646eSNaohiro Aota 	 */
3943393f646eSNaohiro Aota 	if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
3944393f646eSNaohiro Aota 		return -ENOSPC;
3945393f646eSNaohiro Aota 
3946393f646eSNaohiro Aota 	/*
3947898793d9SNaohiro Aota 	 * Even min_alloc_size is not left in any block groups. Since we cannot
3948898793d9SNaohiro Aota 	 * activate a new block group, allocating it may not help. Let's tell a
3949898793d9SNaohiro Aota 	 * caller to try again and hope it progress something by writing some
3950898793d9SNaohiro Aota 	 * parts of the region. That is only possible for data block groups,
3951898793d9SNaohiro Aota 	 * where a part of the region can be written.
3952898793d9SNaohiro Aota 	 */
3953898793d9SNaohiro Aota 	if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
3954898793d9SNaohiro Aota 		return -EAGAIN;
3955898793d9SNaohiro Aota 
3956898793d9SNaohiro Aota 	/*
3957393f646eSNaohiro Aota 	 * We cannot activate a new block group and no enough space left in any
3958393f646eSNaohiro Aota 	 * block groups. So, allocating a new block group may not help. But,
3959393f646eSNaohiro Aota 	 * there is nothing to do anyway, so let's go with it.
3960393f646eSNaohiro Aota 	 */
3961393f646eSNaohiro Aota 	return 0;
3962393f646eSNaohiro Aota }
3963393f646eSNaohiro Aota 
can_allocate_chunk(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl)3964bb9950d3SNaohiro Aota static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
396550475cd5SNaohiro Aota 			      struct find_free_extent_ctl *ffe_ctl)
396650475cd5SNaohiro Aota {
396750475cd5SNaohiro Aota 	switch (ffe_ctl->policy) {
396850475cd5SNaohiro Aota 	case BTRFS_EXTENT_ALLOC_CLUSTERED:
3969bb9950d3SNaohiro Aota 		return 0;
397050475cd5SNaohiro Aota 	case BTRFS_EXTENT_ALLOC_ZONED:
3971393f646eSNaohiro Aota 		return can_allocate_chunk_zoned(fs_info, ffe_ctl);
397250475cd5SNaohiro Aota 	default:
397350475cd5SNaohiro Aota 		BUG();
397450475cd5SNaohiro Aota 	}
397550475cd5SNaohiro Aota }
397650475cd5SNaohiro Aota 
3977e1a41848SQu Wenruo /*
3978e72d79d6SQu Wenruo  * Return >0 means caller needs to re-search for free extent
3979e72d79d6SQu Wenruo  * Return 0 means we have the needed free extent.
3980e72d79d6SQu Wenruo  * Return <0 means we failed to locate any free extent.
3981e72d79d6SQu Wenruo  */
find_free_extent_update_loop(struct btrfs_fs_info * fs_info,struct btrfs_key * ins,struct find_free_extent_ctl * ffe_ctl,bool full_search)3982e72d79d6SQu Wenruo static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3983e72d79d6SQu Wenruo 					struct btrfs_key *ins,
3984e72d79d6SQu Wenruo 					struct find_free_extent_ctl *ffe_ctl,
398515b7ee65SNaohiro Aota 					bool full_search)
3986e72d79d6SQu Wenruo {
39878e1d0290SJosef Bacik 	struct btrfs_root *root = fs_info->chunk_root;
3988e72d79d6SQu Wenruo 	int ret;
3989e72d79d6SQu Wenruo 
3990e72d79d6SQu Wenruo 	if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3991e72d79d6SQu Wenruo 	    ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3992e72d79d6SQu Wenruo 		ffe_ctl->orig_have_caching_bg = true;
3993e72d79d6SQu Wenruo 
3994e72d79d6SQu Wenruo 	if (ins->objectid) {
39950ab9724bSNaohiro Aota 		found_extent(ffe_ctl, ins);
3996e72d79d6SQu Wenruo 		return 0;
3997e72d79d6SQu Wenruo 	}
3998e72d79d6SQu Wenruo 
3999a85f05e5SNaohiro Aota 	if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
4000a85f05e5SNaohiro Aota 		return 1;
4001a85f05e5SNaohiro Aota 
4002a85f05e5SNaohiro Aota 	ffe_ctl->index++;
4003a85f05e5SNaohiro Aota 	if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
4004a85f05e5SNaohiro Aota 		return 1;
4005a85f05e5SNaohiro Aota 
4006b9d97cffSJosef Bacik 	/* See the comments for btrfs_loop_type for an explanation of the phases. */
4007e72d79d6SQu Wenruo 	if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
4008e72d79d6SQu Wenruo 		ffe_ctl->index = 0;
4009e72d79d6SQu Wenruo 		/*
401052bb7a21SBoris Burkov 		 * We want to skip the LOOP_CACHING_WAIT step if we don't have
401152bb7a21SBoris Burkov 		 * any uncached bgs and we've already done a full search
401252bb7a21SBoris Burkov 		 * through.
4013e72d79d6SQu Wenruo 		 */
401452bb7a21SBoris Burkov 		if (ffe_ctl->loop == LOOP_CACHING_NOWAIT &&
401552bb7a21SBoris Burkov 		    (!ffe_ctl->orig_have_caching_bg && full_search))
4016e72d79d6SQu Wenruo 			ffe_ctl->loop++;
401752bb7a21SBoris Burkov 		ffe_ctl->loop++;
4018e72d79d6SQu Wenruo 
4019e72d79d6SQu Wenruo 		if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
4020e72d79d6SQu Wenruo 			struct btrfs_trans_handle *trans;
4021e72d79d6SQu Wenruo 			int exist = 0;
4022e72d79d6SQu Wenruo 
402350475cd5SNaohiro Aota 			/* Check if allocation policy allows to create a new chunk */
4024bb9950d3SNaohiro Aota 			ret = can_allocate_chunk(fs_info, ffe_ctl);
4025bb9950d3SNaohiro Aota 			if (ret)
4026bb9950d3SNaohiro Aota 				return ret;
402750475cd5SNaohiro Aota 
4028e72d79d6SQu Wenruo 			trans = current->journal_info;
4029e72d79d6SQu Wenruo 			if (trans)
4030e72d79d6SQu Wenruo 				exist = 1;
4031e72d79d6SQu Wenruo 			else
4032e72d79d6SQu Wenruo 				trans = btrfs_join_transaction(root);
4033e72d79d6SQu Wenruo 
4034e72d79d6SQu Wenruo 			if (IS_ERR(trans)) {
4035e72d79d6SQu Wenruo 				ret = PTR_ERR(trans);
4036e72d79d6SQu Wenruo 				return ret;
4037e72d79d6SQu Wenruo 			}
4038e72d79d6SQu Wenruo 
4039fc471cb0SJosef Bacik 			ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
4040760e69c4SNaohiro Aota 						CHUNK_ALLOC_FORCE_FOR_EXTENT);
4041e72d79d6SQu Wenruo 
4042e72d79d6SQu Wenruo 			/* Do not bail out on ENOSPC since we can do more. */
404352bb7a21SBoris Burkov 			if (ret == -ENOSPC) {
404452bb7a21SBoris Burkov 				ret = 0;
404552bb7a21SBoris Burkov 				ffe_ctl->loop++;
404652bb7a21SBoris Burkov 			}
4047c70e2139SNaohiro Aota 			else if (ret < 0)
4048e72d79d6SQu Wenruo 				btrfs_abort_transaction(trans, ret);
4049e72d79d6SQu Wenruo 			else
4050e72d79d6SQu Wenruo 				ret = 0;
4051e72d79d6SQu Wenruo 			if (!exist)
4052e72d79d6SQu Wenruo 				btrfs_end_transaction(trans);
4053e72d79d6SQu Wenruo 			if (ret)
4054e72d79d6SQu Wenruo 				return ret;
4055e72d79d6SQu Wenruo 		}
4056e72d79d6SQu Wenruo 
4057e72d79d6SQu Wenruo 		if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
405845d8e033SNaohiro Aota 			if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
405945d8e033SNaohiro Aota 				return -ENOSPC;
406045d8e033SNaohiro Aota 
4061e72d79d6SQu Wenruo 			/*
4062e72d79d6SQu Wenruo 			 * Don't loop again if we already have no empty_size and
4063e72d79d6SQu Wenruo 			 * no empty_cluster.
4064e72d79d6SQu Wenruo 			 */
4065e72d79d6SQu Wenruo 			if (ffe_ctl->empty_size == 0 &&
4066e72d79d6SQu Wenruo 			    ffe_ctl->empty_cluster == 0)
4067e72d79d6SQu Wenruo 				return -ENOSPC;
4068e72d79d6SQu Wenruo 			ffe_ctl->empty_size = 0;
4069e72d79d6SQu Wenruo 			ffe_ctl->empty_cluster = 0;
4070e72d79d6SQu Wenruo 		}
4071e72d79d6SQu Wenruo 		return 1;
4072e72d79d6SQu Wenruo 	}
4073e72d79d6SQu Wenruo 	return -ENOSPC;
4074e72d79d6SQu Wenruo }
4075e72d79d6SQu Wenruo 
find_free_extent_check_size_class(struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group * bg)407652bb7a21SBoris Burkov static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
407752bb7a21SBoris Burkov 					      struct btrfs_block_group *bg)
407852bb7a21SBoris Burkov {
407952bb7a21SBoris Burkov 	if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
408052bb7a21SBoris Burkov 		return true;
4081cb0922f2SBoris Burkov 	if (!btrfs_block_group_should_use_size_class(bg))
408252bb7a21SBoris Burkov 		return true;
408352bb7a21SBoris Burkov 	if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
408452bb7a21SBoris Burkov 		return true;
408552bb7a21SBoris Burkov 	if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
408652bb7a21SBoris Burkov 	    bg->size_class == BTRFS_BG_SZ_NONE)
408752bb7a21SBoris Burkov 		return true;
408852bb7a21SBoris Burkov 	return ffe_ctl->size_class == bg->size_class;
408952bb7a21SBoris Burkov }
409052bb7a21SBoris Burkov 
prepare_allocation_clustered(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl,struct btrfs_space_info * space_info,struct btrfs_key * ins)40917e895409SNaohiro Aota static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
40927e895409SNaohiro Aota 					struct find_free_extent_ctl *ffe_ctl,
40937e895409SNaohiro Aota 					struct btrfs_space_info *space_info,
40947e895409SNaohiro Aota 					struct btrfs_key *ins)
40957e895409SNaohiro Aota {
40967e895409SNaohiro Aota 	/*
40977e895409SNaohiro Aota 	 * If our free space is heavily fragmented we may not be able to make
40987e895409SNaohiro Aota 	 * big contiguous allocations, so instead of doing the expensive search
40997e895409SNaohiro Aota 	 * for free space, simply return ENOSPC with our max_extent_size so we
41007e895409SNaohiro Aota 	 * can go ahead and search for a more manageable chunk.
41017e895409SNaohiro Aota 	 *
41027e895409SNaohiro Aota 	 * If our max_extent_size is large enough for our allocation simply
41037e895409SNaohiro Aota 	 * disable clustering since we will likely not be able to find enough
41047e895409SNaohiro Aota 	 * space to create a cluster and induce latency trying.
41057e895409SNaohiro Aota 	 */
41067e895409SNaohiro Aota 	if (space_info->max_extent_size) {
41077e895409SNaohiro Aota 		spin_lock(&space_info->lock);
41087e895409SNaohiro Aota 		if (space_info->max_extent_size &&
41097e895409SNaohiro Aota 		    ffe_ctl->num_bytes > space_info->max_extent_size) {
41107e895409SNaohiro Aota 			ins->offset = space_info->max_extent_size;
41117e895409SNaohiro Aota 			spin_unlock(&space_info->lock);
41127e895409SNaohiro Aota 			return -ENOSPC;
41137e895409SNaohiro Aota 		} else if (space_info->max_extent_size) {
41147e895409SNaohiro Aota 			ffe_ctl->use_cluster = false;
41157e895409SNaohiro Aota 		}
41167e895409SNaohiro Aota 		spin_unlock(&space_info->lock);
41177e895409SNaohiro Aota 	}
41187e895409SNaohiro Aota 
41197e895409SNaohiro Aota 	ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
41207e895409SNaohiro Aota 					       &ffe_ctl->empty_cluster);
41217e895409SNaohiro Aota 	if (ffe_ctl->last_ptr) {
41227e895409SNaohiro Aota 		struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
41237e895409SNaohiro Aota 
41247e895409SNaohiro Aota 		spin_lock(&last_ptr->lock);
41257e895409SNaohiro Aota 		if (last_ptr->block_group)
41267e895409SNaohiro Aota 			ffe_ctl->hint_byte = last_ptr->window_start;
41277e895409SNaohiro Aota 		if (last_ptr->fragmented) {
41287e895409SNaohiro Aota 			/*
41297e895409SNaohiro Aota 			 * We still set window_start so we can keep track of the
41307e895409SNaohiro Aota 			 * last place we found an allocation to try and save
41317e895409SNaohiro Aota 			 * some time.
41327e895409SNaohiro Aota 			 */
41337e895409SNaohiro Aota 			ffe_ctl->hint_byte = last_ptr->window_start;
41347e895409SNaohiro Aota 			ffe_ctl->use_cluster = false;
41357e895409SNaohiro Aota 		}
41367e895409SNaohiro Aota 		spin_unlock(&last_ptr->lock);
41377e895409SNaohiro Aota 	}
41387e895409SNaohiro Aota 
41397e895409SNaohiro Aota 	return 0;
41407e895409SNaohiro Aota }
41417e895409SNaohiro Aota 
prepare_allocation_zoned(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl)4142b1e30e2fSNaohiro Aota static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
4143b1e30e2fSNaohiro Aota 				    struct find_free_extent_ctl *ffe_ctl)
4144b1e30e2fSNaohiro Aota {
4145b1e30e2fSNaohiro Aota 	if (ffe_ctl->for_treelog) {
4146b1e30e2fSNaohiro Aota 		spin_lock(&fs_info->treelog_bg_lock);
4147b1e30e2fSNaohiro Aota 		if (fs_info->treelog_bg)
4148b1e30e2fSNaohiro Aota 			ffe_ctl->hint_byte = fs_info->treelog_bg;
4149b1e30e2fSNaohiro Aota 		spin_unlock(&fs_info->treelog_bg_lock);
4150b1e30e2fSNaohiro Aota 	} else if (ffe_ctl->for_data_reloc) {
4151b1e30e2fSNaohiro Aota 		spin_lock(&fs_info->relocation_bg_lock);
4152b1e30e2fSNaohiro Aota 		if (fs_info->data_reloc_bg)
4153b1e30e2fSNaohiro Aota 			ffe_ctl->hint_byte = fs_info->data_reloc_bg;
4154b1e30e2fSNaohiro Aota 		spin_unlock(&fs_info->relocation_bg_lock);
4155*46ac4e1fSNaohiro Aota 	} else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
4156*46ac4e1fSNaohiro Aota 		struct btrfs_block_group *block_group;
4157*46ac4e1fSNaohiro Aota 
4158*46ac4e1fSNaohiro Aota 		spin_lock(&fs_info->zone_active_bgs_lock);
4159*46ac4e1fSNaohiro Aota 		list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
4160*46ac4e1fSNaohiro Aota 			/*
4161*46ac4e1fSNaohiro Aota 			 * No lock is OK here because avail is monotinically
4162*46ac4e1fSNaohiro Aota 			 * decreasing, and this is just a hint.
4163*46ac4e1fSNaohiro Aota 			 */
4164*46ac4e1fSNaohiro Aota 			u64 avail = block_group->zone_capacity - block_group->alloc_offset;
4165*46ac4e1fSNaohiro Aota 
4166*46ac4e1fSNaohiro Aota 			if (block_group_bits(block_group, ffe_ctl->flags) &&
4167*46ac4e1fSNaohiro Aota 			    avail >= ffe_ctl->num_bytes) {
4168*46ac4e1fSNaohiro Aota 				ffe_ctl->hint_byte = block_group->start;
4169*46ac4e1fSNaohiro Aota 				break;
4170*46ac4e1fSNaohiro Aota 			}
4171*46ac4e1fSNaohiro Aota 		}
4172*46ac4e1fSNaohiro Aota 		spin_unlock(&fs_info->zone_active_bgs_lock);
4173b1e30e2fSNaohiro Aota 	}
4174b1e30e2fSNaohiro Aota 
4175b1e30e2fSNaohiro Aota 	return 0;
4176b1e30e2fSNaohiro Aota }
4177b1e30e2fSNaohiro Aota 
prepare_allocation(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl,struct btrfs_space_info * space_info,struct btrfs_key * ins)41787e895409SNaohiro Aota static int prepare_allocation(struct btrfs_fs_info *fs_info,
41797e895409SNaohiro Aota 			      struct find_free_extent_ctl *ffe_ctl,
41807e895409SNaohiro Aota 			      struct btrfs_space_info *space_info,
41817e895409SNaohiro Aota 			      struct btrfs_key *ins)
41827e895409SNaohiro Aota {
41837e895409SNaohiro Aota 	switch (ffe_ctl->policy) {
41847e895409SNaohiro Aota 	case BTRFS_EXTENT_ALLOC_CLUSTERED:
41857e895409SNaohiro Aota 		return prepare_allocation_clustered(fs_info, ffe_ctl,
41867e895409SNaohiro Aota 						    space_info, ins);
41872eda5708SNaohiro Aota 	case BTRFS_EXTENT_ALLOC_ZONED:
4188b1e30e2fSNaohiro Aota 		return prepare_allocation_zoned(fs_info, ffe_ctl);
41897e895409SNaohiro Aota 	default:
41907e895409SNaohiro Aota 		BUG();
41917e895409SNaohiro Aota 	}
41927e895409SNaohiro Aota }
41937e895409SNaohiro Aota 
4194e72d79d6SQu Wenruo /*
4195fec577fbSChris Mason  * walks the btree of allocated extents and find a hole of a given size.
4196fec577fbSChris Mason  * The key ins is changed to record the hole:
4197a4820398SMiao Xie  * ins->objectid == start position
419862e2749eSChris Mason  * ins->flags = BTRFS_EXTENT_ITEM_KEY
4199a4820398SMiao Xie  * ins->offset == the size of the hole.
4200fec577fbSChris Mason  * Any available blocks before search_start are skipped.
4201a4820398SMiao Xie  *
4202a4820398SMiao Xie  * If there is no suitable free space, we will record the max size of
4203a4820398SMiao Xie  * the free space extent currently.
4204e72d79d6SQu Wenruo  *
4205e72d79d6SQu Wenruo  * The overall logic and call chain:
4206e72d79d6SQu Wenruo  *
4207e72d79d6SQu Wenruo  * find_free_extent()
4208e72d79d6SQu Wenruo  * |- Iterate through all block groups
4209e72d79d6SQu Wenruo  * |  |- Get a valid block group
4210e72d79d6SQu Wenruo  * |  |- Try to do clustered allocation in that block group
4211e72d79d6SQu Wenruo  * |  |- Try to do unclustered allocation in that block group
4212e72d79d6SQu Wenruo  * |  |- Check if the result is valid
4213e72d79d6SQu Wenruo  * |  |  |- If valid, then exit
4214e72d79d6SQu Wenruo  * |  |- Jump to next block group
4215e72d79d6SQu Wenruo  * |
4216e72d79d6SQu Wenruo  * |- Push harder to find free extents
4217e72d79d6SQu Wenruo  *    |- If not found, re-iterate all block groups
4218fec577fbSChris Mason  */
find_free_extent(struct btrfs_root * root,struct btrfs_key * ins,struct find_free_extent_ctl * ffe_ctl)4219437490feSQu Wenruo static noinline int find_free_extent(struct btrfs_root *root,
4220a12b0dc0SNaohiro Aota 				     struct btrfs_key *ins,
4221a12b0dc0SNaohiro Aota 				     struct find_free_extent_ctl *ffe_ctl)
4222fec577fbSChris Mason {
4223437490feSQu Wenruo 	struct btrfs_fs_info *fs_info = root->fs_info;
422480eb234aSJosef Bacik 	int ret = 0;
4225db8fe64fSJosef Bacik 	int cache_block_group_error = 0;
422632da5386SDavid Sterba 	struct btrfs_block_group *block_group = NULL;
422780eb234aSJosef Bacik 	struct btrfs_space_info *space_info;
4228a5e681d9SJosef Bacik 	bool full_search = false;
4229fec577fbSChris Mason 
4230a12b0dc0SNaohiro Aota 	WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
4231b4bd745dSQu Wenruo 
4232a12b0dc0SNaohiro Aota 	ffe_ctl->search_start = 0;
4233c10859beSNaohiro Aota 	/* For clustered allocation */
4234a12b0dc0SNaohiro Aota 	ffe_ctl->empty_cluster = 0;
4235a12b0dc0SNaohiro Aota 	ffe_ctl->last_ptr = NULL;
4236a12b0dc0SNaohiro Aota 	ffe_ctl->use_cluster = true;
4237a12b0dc0SNaohiro Aota 	ffe_ctl->have_caching_bg = false;
4238a12b0dc0SNaohiro Aota 	ffe_ctl->orig_have_caching_bg = false;
4239a12b0dc0SNaohiro Aota 	ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
4240a12b0dc0SNaohiro Aota 	ffe_ctl->loop = 0;
4241cd361199SJosef Bacik 	ffe_ctl->retry_uncached = false;
4242a12b0dc0SNaohiro Aota 	ffe_ctl->cached = 0;
4243a12b0dc0SNaohiro Aota 	ffe_ctl->max_extent_size = 0;
4244a12b0dc0SNaohiro Aota 	ffe_ctl->total_free_space = 0;
4245a12b0dc0SNaohiro Aota 	ffe_ctl->found_offset = 0;
4246a12b0dc0SNaohiro Aota 	ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
424752bb7a21SBoris Burkov 	ffe_ctl->size_class = btrfs_calc_block_group_size_class(ffe_ctl->num_bytes);
4248c10859beSNaohiro Aota 
42492eda5708SNaohiro Aota 	if (btrfs_is_zoned(fs_info))
4250a12b0dc0SNaohiro Aota 		ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
42512eda5708SNaohiro Aota 
4252962a298fSDavid Sterba 	ins->type = BTRFS_EXTENT_ITEM_KEY;
425380eb234aSJosef Bacik 	ins->objectid = 0;
425480eb234aSJosef Bacik 	ins->offset = 0;
4255b1a4d965SChris Mason 
4256cfc2de0fSBoris Burkov 	trace_find_free_extent(root, ffe_ctl);
42573f7de037SJosef Bacik 
4258a12b0dc0SNaohiro Aota 	space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
42591b1d1f66SJosef Bacik 	if (!space_info) {
4260a12b0dc0SNaohiro Aota 		btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
42611b1d1f66SJosef Bacik 		return -ENOSPC;
42621b1d1f66SJosef Bacik 	}
42632552d17eSJosef Bacik 
4264a12b0dc0SNaohiro Aota 	ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
42657e895409SNaohiro Aota 	if (ret < 0)
42667e895409SNaohiro Aota 		return ret;
4267fa9c0d79SChris Mason 
4268a12b0dc0SNaohiro Aota 	ffe_ctl->search_start = max(ffe_ctl->search_start,
42690eb997bfSFilipe Manana 				    first_logical_byte(fs_info));
4270a12b0dc0SNaohiro Aota 	ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
4271a12b0dc0SNaohiro Aota 	if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
4272b4bd745dSQu Wenruo 		block_group = btrfs_lookup_block_group(fs_info,
4273a12b0dc0SNaohiro Aota 						       ffe_ctl->search_start);
4274817d52f8SJosef Bacik 		/*
4275817d52f8SJosef Bacik 		 * we don't want to use the block group if it doesn't match our
4276817d52f8SJosef Bacik 		 * allocation bits, or if its not cached.
4277ccf0e725SJosef Bacik 		 *
4278ccf0e725SJosef Bacik 		 * However if we are re-searching with an ideal block group
4279ccf0e725SJosef Bacik 		 * picked out then we don't care that the block group is cached.
4280817d52f8SJosef Bacik 		 */
4281a12b0dc0SNaohiro Aota 		if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
4282285ff5afSJosef Bacik 		    block_group->cached != BTRFS_CACHE_NO) {
42832552d17eSJosef Bacik 			down_read(&space_info->groups_sem);
428444fb5511SChris Mason 			if (list_empty(&block_group->list) ||
428544fb5511SChris Mason 			    block_group->ro) {
428644fb5511SChris Mason 				/*
428744fb5511SChris Mason 				 * someone is removing this block group,
428844fb5511SChris Mason 				 * we can't jump into the have_block_group
428944fb5511SChris Mason 				 * target because our list pointers are not
429044fb5511SChris Mason 				 * valid
429144fb5511SChris Mason 				 */
429244fb5511SChris Mason 				btrfs_put_block_group(block_group);
429344fb5511SChris Mason 				up_read(&space_info->groups_sem);
4294ccf0e725SJosef Bacik 			} else {
4295a12b0dc0SNaohiro Aota 				ffe_ctl->index = btrfs_bg_flags_to_raid_index(
42963e72ee88SQu Wenruo 							block_group->flags);
4297a12b0dc0SNaohiro Aota 				btrfs_lock_block_group(block_group,
4298a12b0dc0SNaohiro Aota 						       ffe_ctl->delalloc);
4299854c2f36SBoris Burkov 				ffe_ctl->hinted = true;
43002552d17eSJosef Bacik 				goto have_block_group;
4301ccf0e725SJosef Bacik 			}
43022552d17eSJosef Bacik 		} else if (block_group) {
4303fa9c0d79SChris Mason 			btrfs_put_block_group(block_group);
430442e70e7aSChris Mason 		}
43052552d17eSJosef Bacik 	}
43062552d17eSJosef Bacik search:
4307854c2f36SBoris Burkov 	trace_find_free_extent_search_loop(root, ffe_ctl);
4308a12b0dc0SNaohiro Aota 	ffe_ctl->have_caching_bg = false;
4309a12b0dc0SNaohiro Aota 	if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
4310a12b0dc0SNaohiro Aota 	    ffe_ctl->index == 0)
4311a5e681d9SJosef Bacik 		full_search = true;
431280eb234aSJosef Bacik 	down_read(&space_info->groups_sem);
4313b4bd745dSQu Wenruo 	list_for_each_entry(block_group,
4314a12b0dc0SNaohiro Aota 			    &space_info->block_groups[ffe_ctl->index], list) {
4315c668690dSNaohiro Aota 		struct btrfs_block_group *bg_ret;
4316c668690dSNaohiro Aota 
4317854c2f36SBoris Burkov 		ffe_ctl->hinted = false;
431814443937SJeff Mahoney 		/* If the block group is read-only, we can skip it entirely. */
431940ab3be1SNaohiro Aota 		if (unlikely(block_group->ro)) {
4320a12b0dc0SNaohiro Aota 			if (ffe_ctl->for_treelog)
432140ab3be1SNaohiro Aota 				btrfs_clear_treelog_bg(block_group);
4322c2707a25SJohannes Thumshirn 			if (ffe_ctl->for_data_reloc)
4323c2707a25SJohannes Thumshirn 				btrfs_clear_data_reloc_bg(block_group);
432414443937SJeff Mahoney 			continue;
432540ab3be1SNaohiro Aota 		}
432614443937SJeff Mahoney 
4327a12b0dc0SNaohiro Aota 		btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
4328a12b0dc0SNaohiro Aota 		ffe_ctl->search_start = block_group->start;
432942e70e7aSChris Mason 
433083a50de9SChris Mason 		/*
433183a50de9SChris Mason 		 * this can happen if we end up cycling through all the
433283a50de9SChris Mason 		 * raid types, but we want to make sure we only allocate
433383a50de9SChris Mason 		 * for the proper type.
433483a50de9SChris Mason 		 */
4335a12b0dc0SNaohiro Aota 		if (!block_group_bits(block_group, ffe_ctl->flags)) {
433683a50de9SChris Mason 			u64 extra = BTRFS_BLOCK_GROUP_DUP |
4337c7369b3fSDavid Sterba 				BTRFS_BLOCK_GROUP_RAID1_MASK |
4338a07e8a46SDavid Sterba 				BTRFS_BLOCK_GROUP_RAID56_MASK |
433983a50de9SChris Mason 				BTRFS_BLOCK_GROUP_RAID10;
434083a50de9SChris Mason 
434183a50de9SChris Mason 			/*
434283a50de9SChris Mason 			 * if they asked for extra copies and this block group
434383a50de9SChris Mason 			 * doesn't provide them, bail.  This does allow us to
434483a50de9SChris Mason 			 * fill raid0 from raid1.
434583a50de9SChris Mason 			 */
4346a12b0dc0SNaohiro Aota 			if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
434783a50de9SChris Mason 				goto loop;
43482a28468eSQu Wenruo 
43492a28468eSQu Wenruo 			/*
43502a28468eSQu Wenruo 			 * This block group has different flags than we want.
43512a28468eSQu Wenruo 			 * It's possible that we have MIXED_GROUP flag but no
43522a28468eSQu Wenruo 			 * block group is mixed.  Just skip such block group.
43532a28468eSQu Wenruo 			 */
4354a12b0dc0SNaohiro Aota 			btrfs_release_block_group(block_group, ffe_ctl->delalloc);
43552a28468eSQu Wenruo 			continue;
435683a50de9SChris Mason 		}
435783a50de9SChris Mason 
43582552d17eSJosef Bacik have_block_group:
4359854c2f36SBoris Burkov 		trace_find_free_extent_have_block_group(root, ffe_ctl, block_group);
4360a12b0dc0SNaohiro Aota 		ffe_ctl->cached = btrfs_block_group_done(block_group);
4361a12b0dc0SNaohiro Aota 		if (unlikely(!ffe_ctl->cached)) {
4362a12b0dc0SNaohiro Aota 			ffe_ctl->have_caching_bg = true;
4363ced8ecf0SOmar Sandoval 			ret = btrfs_cache_block_group(block_group, false);
4364db8fe64fSJosef Bacik 
4365db8fe64fSJosef Bacik 			/*
4366db8fe64fSJosef Bacik 			 * If we get ENOMEM here or something else we want to
4367db8fe64fSJosef Bacik 			 * try other block groups, because it may not be fatal.
4368db8fe64fSJosef Bacik 			 * However if we can't find anything else we need to
4369db8fe64fSJosef Bacik 			 * save our return here so that we return the actual
4370db8fe64fSJosef Bacik 			 * error that caused problems, not ENOSPC.
4371db8fe64fSJosef Bacik 			 */
4372db8fe64fSJosef Bacik 			if (ret < 0) {
4373db8fe64fSJosef Bacik 				if (!cache_block_group_error)
4374db8fe64fSJosef Bacik 					cache_block_group_error = ret;
4375db8fe64fSJosef Bacik 				ret = 0;
4376db8fe64fSJosef Bacik 				goto loop;
4377db8fe64fSJosef Bacik 			}
43781d4284bdSChris Mason 			ret = 0;
4379ea6a478eSJosef Bacik 		}
4380ccf0e725SJosef Bacik 
438192fb94b6SJosef Bacik 		if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
438292fb94b6SJosef Bacik 			if (!cache_block_group_error)
438392fb94b6SJosef Bacik 				cache_block_group_error = -EIO;
438436cce922SJosef Bacik 			goto loop;
438592fb94b6SJosef Bacik 		}
438680eb234aSJosef Bacik 
438752bb7a21SBoris Burkov 		if (!find_free_extent_check_size_class(ffe_ctl, block_group))
438852bb7a21SBoris Burkov 			goto loop;
438952bb7a21SBoris Burkov 
4390c668690dSNaohiro Aota 		bg_ret = NULL;
4391a12b0dc0SNaohiro Aota 		ret = do_allocation(block_group, ffe_ctl, &bg_ret);
4392cd361199SJosef Bacik 		if (ret > 0)
4393fa9c0d79SChris Mason 			goto loop;
4394cd361199SJosef Bacik 
4395cd361199SJosef Bacik 		if (bg_ret && bg_ret != block_group) {
4396cd361199SJosef Bacik 			btrfs_release_block_group(block_group, ffe_ctl->delalloc);
4397cd361199SJosef Bacik 			block_group = bg_ret;
4398fa9c0d79SChris Mason 		}
4399fa9c0d79SChris Mason 
4400c668690dSNaohiro Aota 		/* Checks */
4401a12b0dc0SNaohiro Aota 		ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
4402b4bd745dSQu Wenruo 						 fs_info->stripesize);
440380eb234aSJosef Bacik 
440480eb234aSJosef Bacik 		/* move on to the next group */
4405a12b0dc0SNaohiro Aota 		if (ffe_ctl->search_start + ffe_ctl->num_bytes >
4406b3470b5dSDavid Sterba 		    block_group->start + block_group->length) {
44072eda5708SNaohiro Aota 			btrfs_add_free_space_unused(block_group,
4408a12b0dc0SNaohiro Aota 					    ffe_ctl->found_offset,
4409a12b0dc0SNaohiro Aota 					    ffe_ctl->num_bytes);
44102552d17eSJosef Bacik 			goto loop;
44116226cb0aSJosef Bacik 		}
441280eb234aSJosef Bacik 
4413a12b0dc0SNaohiro Aota 		if (ffe_ctl->found_offset < ffe_ctl->search_start)
44142eda5708SNaohiro Aota 			btrfs_add_free_space_unused(block_group,
4415a12b0dc0SNaohiro Aota 					ffe_ctl->found_offset,
4416a12b0dc0SNaohiro Aota 					ffe_ctl->search_start - ffe_ctl->found_offset);
44176226cb0aSJosef Bacik 
4418a12b0dc0SNaohiro Aota 		ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
4419a12b0dc0SNaohiro Aota 					       ffe_ctl->num_bytes,
442052bb7a21SBoris Burkov 					       ffe_ctl->delalloc,
442152bb7a21SBoris Burkov 					       ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
4422f0486c68SYan, Zheng 		if (ret == -EAGAIN) {
44232eda5708SNaohiro Aota 			btrfs_add_free_space_unused(block_group,
4424a12b0dc0SNaohiro Aota 					ffe_ctl->found_offset,
4425a12b0dc0SNaohiro Aota 					ffe_ctl->num_bytes);
4426f0486c68SYan, Zheng 			goto loop;
4427f0486c68SYan, Zheng 		}
44289cfa3e34SFilipe Manana 		btrfs_inc_block_group_reservations(block_group);
442911833d66SYan Zheng 
443080eb234aSJosef Bacik 		/* we are all good, lets return */
4431a12b0dc0SNaohiro Aota 		ins->objectid = ffe_ctl->search_start;
4432a12b0dc0SNaohiro Aota 		ins->offset = ffe_ctl->num_bytes;
4433f0486c68SYan, Zheng 
4434cfc2de0fSBoris Burkov 		trace_btrfs_reserve_extent(block_group, ffe_ctl);
4435a12b0dc0SNaohiro Aota 		btrfs_release_block_group(block_group, ffe_ctl->delalloc);
443680eb234aSJosef Bacik 		break;
44372552d17eSJosef Bacik loop:
4438cd361199SJosef Bacik 		if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
4439cd361199SJosef Bacik 		    !ffe_ctl->retry_uncached) {
4440cd361199SJosef Bacik 			ffe_ctl->retry_uncached = true;
4441cd361199SJosef Bacik 			btrfs_wait_block_group_cache_progress(block_group,
4442cd361199SJosef Bacik 						ffe_ctl->num_bytes +
4443cd361199SJosef Bacik 						ffe_ctl->empty_cluster +
4444cd361199SJosef Bacik 						ffe_ctl->empty_size);
4445cd361199SJosef Bacik 			goto have_block_group;
4446cd361199SJosef Bacik 		}
4447a12b0dc0SNaohiro Aota 		release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
444814443937SJeff Mahoney 		cond_resched();
44492552d17eSJosef Bacik 	}
44502552d17eSJosef Bacik 	up_read(&space_info->groups_sem);
4451f5a31e16SChris Mason 
4452a12b0dc0SNaohiro Aota 	ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
4453e72d79d6SQu Wenruo 	if (ret > 0)
445460d2adbbSMiao Xie 		goto search;
445560d2adbbSMiao Xie 
4456db8fe64fSJosef Bacik 	if (ret == -ENOSPC && !cache_block_group_error) {
4457b4bd745dSQu Wenruo 		/*
4458b4bd745dSQu Wenruo 		 * Use ffe_ctl->total_free_space as fallback if we can't find
4459b4bd745dSQu Wenruo 		 * any contiguous hole.
4460b4bd745dSQu Wenruo 		 */
4461a12b0dc0SNaohiro Aota 		if (!ffe_ctl->max_extent_size)
4462a12b0dc0SNaohiro Aota 			ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
44634f4db217SJosef Bacik 		spin_lock(&space_info->lock);
4464a12b0dc0SNaohiro Aota 		space_info->max_extent_size = ffe_ctl->max_extent_size;
44654f4db217SJosef Bacik 		spin_unlock(&space_info->lock);
4466a12b0dc0SNaohiro Aota 		ins->offset = ffe_ctl->max_extent_size;
4467db8fe64fSJosef Bacik 	} else if (ret == -ENOSPC) {
4468db8fe64fSJosef Bacik 		ret = cache_block_group_error;
44694f4db217SJosef Bacik 	}
44700f70abe2SChris Mason 	return ret;
4471fec577fbSChris Mason }
4472ec44a35cSChris Mason 
44736f47c706SNikolay Borisov /*
44746f47c706SNikolay Borisov  * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
44756f47c706SNikolay Borisov  *			  hole that is at least as big as @num_bytes.
44766f47c706SNikolay Borisov  *
44776f47c706SNikolay Borisov  * @root           -	The root that will contain this extent
44786f47c706SNikolay Borisov  *
44796f47c706SNikolay Borisov  * @ram_bytes      -	The amount of space in ram that @num_bytes take. This
44806f47c706SNikolay Borisov  *			is used for accounting purposes. This value differs
44816f47c706SNikolay Borisov  *			from @num_bytes only in the case of compressed extents.
44826f47c706SNikolay Borisov  *
44836f47c706SNikolay Borisov  * @num_bytes      -	Number of bytes to allocate on-disk.
44846f47c706SNikolay Borisov  *
44856f47c706SNikolay Borisov  * @min_alloc_size -	Indicates the minimum amount of space that the
44866f47c706SNikolay Borisov  *			allocator should try to satisfy. In some cases
44876f47c706SNikolay Borisov  *			@num_bytes may be larger than what is required and if
44886f47c706SNikolay Borisov  *			the filesystem is fragmented then allocation fails.
44896f47c706SNikolay Borisov  *			However, the presence of @min_alloc_size gives a
44906f47c706SNikolay Borisov  *			chance to try and satisfy the smaller allocation.
44916f47c706SNikolay Borisov  *
44926f47c706SNikolay Borisov  * @empty_size     -	A hint that you plan on doing more COW. This is the
44936f47c706SNikolay Borisov  *			size in bytes the allocator should try to find free
44946f47c706SNikolay Borisov  *			next to the block it returns.  This is just a hint and
44956f47c706SNikolay Borisov  *			may be ignored by the allocator.
44966f47c706SNikolay Borisov  *
44976f47c706SNikolay Borisov  * @hint_byte      -	Hint to the allocator to start searching above the byte
44986f47c706SNikolay Borisov  *			address passed. It might be ignored.
44996f47c706SNikolay Borisov  *
45006f47c706SNikolay Borisov  * @ins            -	This key is modified to record the found hole. It will
45016f47c706SNikolay Borisov  *			have the following values:
45026f47c706SNikolay Borisov  *			ins->objectid == start position
45036f47c706SNikolay Borisov  *			ins->flags = BTRFS_EXTENT_ITEM_KEY
45046f47c706SNikolay Borisov  *			ins->offset == the size of the hole.
45056f47c706SNikolay Borisov  *
45066f47c706SNikolay Borisov  * @is_data        -	Boolean flag indicating whether an extent is
45076f47c706SNikolay Borisov  *			allocated for data (true) or metadata (false)
45086f47c706SNikolay Borisov  *
45096f47c706SNikolay Borisov  * @delalloc       -	Boolean flag indicating whether this allocation is for
45106f47c706SNikolay Borisov  *			delalloc or not. If 'true' data_rwsem of block groups
45116f47c706SNikolay Borisov  *			is going to be acquired.
45126f47c706SNikolay Borisov  *
45136f47c706SNikolay Borisov  *
45146f47c706SNikolay Borisov  * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
45156f47c706SNikolay Borisov  * case -ENOSPC is returned then @ins->offset will contain the size of the
45166f47c706SNikolay Borisov  * largest available hole the allocator managed to find.
45176f47c706SNikolay Borisov  */
btrfs_reserve_extent(struct btrfs_root * root,u64 ram_bytes,u64 num_bytes,u64 min_alloc_size,u64 empty_size,u64 hint_byte,struct btrfs_key * ins,int is_data,int delalloc)451818513091SWang Xiaoguang int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
451998d20f67SChris Mason 			 u64 num_bytes, u64 min_alloc_size,
45207bb86316SChris Mason 			 u64 empty_size, u64 hint_byte,
4521e570fd27SMiao Xie 			 struct btrfs_key *ins, int is_data, int delalloc)
4522fec577fbSChris Mason {
4523ab8d0fc4SJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
4524a12b0dc0SNaohiro Aota 	struct find_free_extent_ctl ffe_ctl = {};
452536af4e07SJosef Bacik 	bool final_tried = num_bytes == min_alloc_size;
4526b6919a58SDavid Sterba 	u64 flags;
4527fec577fbSChris Mason 	int ret;
452840ab3be1SNaohiro Aota 	bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4529c2707a25SJohannes Thumshirn 	bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
4530925baeddSChris Mason 
45311b86826dSJeff Mahoney 	flags = get_alloc_profile_by_root(root, is_data);
453298d20f67SChris Mason again:
45330b246afaSJeff Mahoney 	WARN_ON(num_bytes < fs_info->sectorsize);
4534a12b0dc0SNaohiro Aota 
4535a12b0dc0SNaohiro Aota 	ffe_ctl.ram_bytes = ram_bytes;
4536a12b0dc0SNaohiro Aota 	ffe_ctl.num_bytes = num_bytes;
4537a85f05e5SNaohiro Aota 	ffe_ctl.min_alloc_size = min_alloc_size;
4538a12b0dc0SNaohiro Aota 	ffe_ctl.empty_size = empty_size;
4539a12b0dc0SNaohiro Aota 	ffe_ctl.flags = flags;
4540a12b0dc0SNaohiro Aota 	ffe_ctl.delalloc = delalloc;
4541a12b0dc0SNaohiro Aota 	ffe_ctl.hint_byte = hint_byte;
4542a12b0dc0SNaohiro Aota 	ffe_ctl.for_treelog = for_treelog;
4543c2707a25SJohannes Thumshirn 	ffe_ctl.for_data_reloc = for_data_reloc;
4544a12b0dc0SNaohiro Aota 
4545a12b0dc0SNaohiro Aota 	ret = find_free_extent(root, ins, &ffe_ctl);
45469cfa3e34SFilipe Manana 	if (!ret && !is_data) {
4547ab8d0fc4SJeff Mahoney 		btrfs_dec_block_group_reservations(fs_info, ins->objectid);
45489cfa3e34SFilipe Manana 	} else if (ret == -ENOSPC) {
4549a4820398SMiao Xie 		if (!final_tried && ins->offset) {
4550a4820398SMiao Xie 			num_bytes = min(num_bytes >> 1, ins->offset);
4551da17066cSJeff Mahoney 			num_bytes = round_down(num_bytes,
45520b246afaSJeff Mahoney 					       fs_info->sectorsize);
455398d20f67SChris Mason 			num_bytes = max(num_bytes, min_alloc_size);
455418513091SWang Xiaoguang 			ram_bytes = num_bytes;
45559e622d6bSMiao Xie 			if (num_bytes == min_alloc_size)
45569e622d6bSMiao Xie 				final_tried = true;
455798d20f67SChris Mason 			goto again;
4558ab8d0fc4SJeff Mahoney 		} else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
45590f9dd46cSJosef Bacik 			struct btrfs_space_info *sinfo;
45600f9dd46cSJosef Bacik 
4561280c2908SJosef Bacik 			sinfo = btrfs_find_space_info(fs_info, flags);
45620b246afaSJeff Mahoney 			btrfs_err(fs_info,
4563c2707a25SJohannes Thumshirn 	"allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
4564c2707a25SJohannes Thumshirn 				  flags, num_bytes, for_treelog, for_data_reloc);
456553804280SJeff Mahoney 			if (sinfo)
45665da6afebSJosef Bacik 				btrfs_dump_space_info(fs_info, sinfo,
45675da6afebSJosef Bacik 						      num_bytes, 1);
4568925baeddSChris Mason 		}
45699e622d6bSMiao Xie 	}
45700f9dd46cSJosef Bacik 
45710f9dd46cSJosef Bacik 	return ret;
4572e6dcd2dcSChris Mason }
4573e6dcd2dcSChris Mason 
btrfs_free_reserved_extent(struct btrfs_fs_info * fs_info,u64 start,u64 len,int delalloc)45742ff7e61eSJeff Mahoney int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4575e570fd27SMiao Xie 			       u64 start, u64 len, int delalloc)
4576e688b725SChris Mason {
45777ef54d54SNikolay Borisov 	struct btrfs_block_group *cache;
45787ef54d54SNikolay Borisov 
45797ef54d54SNikolay Borisov 	cache = btrfs_lookup_block_group(fs_info, start);
45807ef54d54SNikolay Borisov 	if (!cache) {
4581a0fbf736SNikolay Borisov 		btrfs_err(fs_info, "Unable to find block group for %llu",
4582a0fbf736SNikolay Borisov 			  start);
45837ef54d54SNikolay Borisov 		return -ENOSPC;
45847ef54d54SNikolay Borisov 	}
45857ef54d54SNikolay Borisov 
45867ef54d54SNikolay Borisov 	btrfs_add_free_space(cache, start, len);
45877ef54d54SNikolay Borisov 	btrfs_free_reserved_bytes(cache, len, delalloc);
45887ef54d54SNikolay Borisov 	trace_btrfs_reserved_extent_free(fs_info, start, len);
45897ef54d54SNikolay Borisov 
45907ef54d54SNikolay Borisov 	btrfs_put_block_group(cache);
45917ef54d54SNikolay Borisov 	return 0;
4592e688b725SChris Mason }
4593e688b725SChris Mason 
btrfs_pin_reserved_extent(struct btrfs_trans_handle * trans,u64 start,u64 len)45947bfc1007SNikolay Borisov int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
45957bfc1007SNikolay Borisov 			      u64 len)
4596e688b725SChris Mason {
4597a0fbf736SNikolay Borisov 	struct btrfs_block_group *cache;
4598a0fbf736SNikolay Borisov 	int ret = 0;
4599a0fbf736SNikolay Borisov 
46007bfc1007SNikolay Borisov 	cache = btrfs_lookup_block_group(trans->fs_info, start);
4601a0fbf736SNikolay Borisov 	if (!cache) {
46027bfc1007SNikolay Borisov 		btrfs_err(trans->fs_info, "unable to find block group for %llu",
46037bfc1007SNikolay Borisov 			  start);
4604a0fbf736SNikolay Borisov 		return -ENOSPC;
4605a0fbf736SNikolay Borisov 	}
4606a0fbf736SNikolay Borisov 
46076690d071SNikolay Borisov 	ret = pin_down_extent(trans, cache, start, len, 1);
4608a0fbf736SNikolay Borisov 	btrfs_put_block_group(cache);
4609a0fbf736SNikolay Borisov 	return ret;
4610e688b725SChris Mason }
4611e688b725SChris Mason 
alloc_reserved_extent(struct btrfs_trans_handle * trans,u64 bytenr,u64 num_bytes)461234666705SJosef Bacik static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
461334666705SJosef Bacik 				 u64 num_bytes)
461434666705SJosef Bacik {
461534666705SJosef Bacik 	struct btrfs_fs_info *fs_info = trans->fs_info;
461634666705SJosef Bacik 	int ret;
461734666705SJosef Bacik 
461834666705SJosef Bacik 	ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
461934666705SJosef Bacik 	if (ret)
462034666705SJosef Bacik 		return ret;
462134666705SJosef Bacik 
462234666705SJosef Bacik 	ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
462334666705SJosef Bacik 	if (ret) {
462434666705SJosef Bacik 		ASSERT(!ret);
462534666705SJosef Bacik 		btrfs_err(fs_info, "update block group failed for %llu %llu",
462634666705SJosef Bacik 			  bytenr, num_bytes);
462734666705SJosef Bacik 		return ret;
462834666705SJosef Bacik 	}
462934666705SJosef Bacik 
463034666705SJosef Bacik 	trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
463134666705SJosef Bacik 	return 0;
463234666705SJosef Bacik }
463334666705SJosef Bacik 
alloc_reserved_file_extent(struct btrfs_trans_handle * trans,u64 parent,u64 root_objectid,u64 flags,u64 owner,u64 offset,struct btrfs_key * ins,int ref_mod)46345d4f98a2SYan Zheng static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
46355d4f98a2SYan Zheng 				      u64 parent, u64 root_objectid,
46365d4f98a2SYan Zheng 				      u64 flags, u64 owner, u64 offset,
46375d4f98a2SYan Zheng 				      struct btrfs_key *ins, int ref_mod)
4638e6dcd2dcSChris Mason {
4639ef89b824SNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
464029cbcf40SJosef Bacik 	struct btrfs_root *extent_root;
4641e6dcd2dcSChris Mason 	int ret;
4642e6dcd2dcSChris Mason 	struct btrfs_extent_item *extent_item;
46435d4f98a2SYan Zheng 	struct btrfs_extent_inline_ref *iref;
4644e6dcd2dcSChris Mason 	struct btrfs_path *path;
46455d4f98a2SYan Zheng 	struct extent_buffer *leaf;
46465d4f98a2SYan Zheng 	int type;
46475d4f98a2SYan Zheng 	u32 size;
4648f2654de4SChris Mason 
46495d4f98a2SYan Zheng 	if (parent > 0)
46505d4f98a2SYan Zheng 		type = BTRFS_SHARED_DATA_REF_KEY;
46515d4f98a2SYan Zheng 	else
46525d4f98a2SYan Zheng 		type = BTRFS_EXTENT_DATA_REF_KEY;
465331840ae1SZheng Yan 
46545d4f98a2SYan Zheng 	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
46557bb86316SChris Mason 
46567bb86316SChris Mason 	path = btrfs_alloc_path();
4657db5b493aSTsutomu Itoh 	if (!path)
4658db5b493aSTsutomu Itoh 		return -ENOMEM;
465947e4bb98SChris Mason 
466029cbcf40SJosef Bacik 	extent_root = btrfs_extent_root(fs_info, ins->objectid);
466129cbcf40SJosef Bacik 	ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size);
466279787eaaSJeff Mahoney 	if (ret) {
466379787eaaSJeff Mahoney 		btrfs_free_path(path);
466479787eaaSJeff Mahoney 		return ret;
466579787eaaSJeff Mahoney 	}
46660f9dd46cSJosef Bacik 
46675d4f98a2SYan Zheng 	leaf = path->nodes[0];
46685d4f98a2SYan Zheng 	extent_item = btrfs_item_ptr(leaf, path->slots[0],
466947e4bb98SChris Mason 				     struct btrfs_extent_item);
46705d4f98a2SYan Zheng 	btrfs_set_extent_refs(leaf, extent_item, ref_mod);
46715d4f98a2SYan Zheng 	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
46725d4f98a2SYan Zheng 	btrfs_set_extent_flags(leaf, extent_item,
46735d4f98a2SYan Zheng 			       flags | BTRFS_EXTENT_FLAG_DATA);
467447e4bb98SChris Mason 
46755d4f98a2SYan Zheng 	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
46765d4f98a2SYan Zheng 	btrfs_set_extent_inline_ref_type(leaf, iref, type);
46775d4f98a2SYan Zheng 	if (parent > 0) {
46785d4f98a2SYan Zheng 		struct btrfs_shared_data_ref *ref;
46795d4f98a2SYan Zheng 		ref = (struct btrfs_shared_data_ref *)(iref + 1);
46805d4f98a2SYan Zheng 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
46815d4f98a2SYan Zheng 		btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
46825d4f98a2SYan Zheng 	} else {
46835d4f98a2SYan Zheng 		struct btrfs_extent_data_ref *ref;
46845d4f98a2SYan Zheng 		ref = (struct btrfs_extent_data_ref *)(&iref->offset);
46855d4f98a2SYan Zheng 		btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
46865d4f98a2SYan Zheng 		btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
46875d4f98a2SYan Zheng 		btrfs_set_extent_data_ref_offset(leaf, ref, offset);
46885d4f98a2SYan Zheng 		btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
46895d4f98a2SYan Zheng 	}
469047e4bb98SChris Mason 
4691d5e09e38SFilipe Manana 	btrfs_mark_buffer_dirty(trans, path->nodes[0]);
46927bb86316SChris Mason 	btrfs_free_path(path);
4693f510cfecSChris Mason 
469434666705SJosef Bacik 	return alloc_reserved_extent(trans, ins->objectid, ins->offset);
4695e6dcd2dcSChris Mason }
4696e6dcd2dcSChris Mason 
alloc_reserved_tree_block(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op)46975d4f98a2SYan Zheng static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
46984e6bd4e0SNikolay Borisov 				     struct btrfs_delayed_ref_node *node,
469921ebfbe7SNikolay Borisov 				     struct btrfs_delayed_extent_op *extent_op)
47005d4f98a2SYan Zheng {
47019dcdbe01SNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
470229cbcf40SJosef Bacik 	struct btrfs_root *extent_root;
47035d4f98a2SYan Zheng 	int ret;
47045d4f98a2SYan Zheng 	struct btrfs_extent_item *extent_item;
47054e6bd4e0SNikolay Borisov 	struct btrfs_key extent_key;
47065d4f98a2SYan Zheng 	struct btrfs_tree_block_info *block_info;
47075d4f98a2SYan Zheng 	struct btrfs_extent_inline_ref *iref;
47085d4f98a2SYan Zheng 	struct btrfs_path *path;
47095d4f98a2SYan Zheng 	struct extent_buffer *leaf;
47104e6bd4e0SNikolay Borisov 	struct btrfs_delayed_tree_ref *ref;
47113173a18fSJosef Bacik 	u32 size = sizeof(*extent_item) + sizeof(*iref);
471221ebfbe7SNikolay Borisov 	u64 flags = extent_op->flags_to_set;
47130b246afaSJeff Mahoney 	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
47143173a18fSJosef Bacik 
47154e6bd4e0SNikolay Borisov 	ref = btrfs_delayed_node_to_tree_ref(node);
47164e6bd4e0SNikolay Borisov 
47174e6bd4e0SNikolay Borisov 	extent_key.objectid = node->bytenr;
47184e6bd4e0SNikolay Borisov 	if (skinny_metadata) {
47194e6bd4e0SNikolay Borisov 		extent_key.offset = ref->level;
47204e6bd4e0SNikolay Borisov 		extent_key.type = BTRFS_METADATA_ITEM_KEY;
47214e6bd4e0SNikolay Borisov 	} else {
47224e6bd4e0SNikolay Borisov 		extent_key.offset = node->num_bytes;
47234e6bd4e0SNikolay Borisov 		extent_key.type = BTRFS_EXTENT_ITEM_KEY;
47243173a18fSJosef Bacik 		size += sizeof(*block_info);
47254e6bd4e0SNikolay Borisov 	}
47265d4f98a2SYan Zheng 
47275d4f98a2SYan Zheng 	path = btrfs_alloc_path();
472880ee54bfSJosef Bacik 	if (!path)
4729d8926bb3SMark Fasheh 		return -ENOMEM;
47305d4f98a2SYan Zheng 
473129cbcf40SJosef Bacik 	extent_root = btrfs_extent_root(fs_info, extent_key.objectid);
473229cbcf40SJosef Bacik 	ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key,
473329cbcf40SJosef Bacik 				      size);
473479787eaaSJeff Mahoney 	if (ret) {
4735dd825259SChris Mason 		btrfs_free_path(path);
473679787eaaSJeff Mahoney 		return ret;
473779787eaaSJeff Mahoney 	}
47385d4f98a2SYan Zheng 
47395d4f98a2SYan Zheng 	leaf = path->nodes[0];
47405d4f98a2SYan Zheng 	extent_item = btrfs_item_ptr(leaf, path->slots[0],
47415d4f98a2SYan Zheng 				     struct btrfs_extent_item);
47425d4f98a2SYan Zheng 	btrfs_set_extent_refs(leaf, extent_item, 1);
47435d4f98a2SYan Zheng 	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
47445d4f98a2SYan Zheng 	btrfs_set_extent_flags(leaf, extent_item,
47455d4f98a2SYan Zheng 			       flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
47465d4f98a2SYan Zheng 
47473173a18fSJosef Bacik 	if (skinny_metadata) {
47483173a18fSJosef Bacik 		iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
47493173a18fSJosef Bacik 	} else {
47503173a18fSJosef Bacik 		block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
475121ebfbe7SNikolay Borisov 		btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
47524e6bd4e0SNikolay Borisov 		btrfs_set_tree_block_level(leaf, block_info, ref->level);
47535d4f98a2SYan Zheng 		iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
47543173a18fSJosef Bacik 	}
47553173a18fSJosef Bacik 
4756d4b20733SNikolay Borisov 	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
47575d4f98a2SYan Zheng 		btrfs_set_extent_inline_ref_type(leaf, iref,
47585d4f98a2SYan Zheng 						 BTRFS_SHARED_BLOCK_REF_KEY);
4759d4b20733SNikolay Borisov 		btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
47605d4f98a2SYan Zheng 	} else {
47615d4f98a2SYan Zheng 		btrfs_set_extent_inline_ref_type(leaf, iref,
47625d4f98a2SYan Zheng 						 BTRFS_TREE_BLOCK_REF_KEY);
47634e6bd4e0SNikolay Borisov 		btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
47645d4f98a2SYan Zheng 	}
47655d4f98a2SYan Zheng 
4766d5e09e38SFilipe Manana 	btrfs_mark_buffer_dirty(trans, leaf);
47675d4f98a2SYan Zheng 	btrfs_free_path(path);
47685d4f98a2SYan Zheng 
476934666705SJosef Bacik 	return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
47705d4f98a2SYan Zheng }
47715d4f98a2SYan Zheng 
btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 owner,u64 offset,u64 ram_bytes,struct btrfs_key * ins)47725d4f98a2SYan Zheng int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
477384f7d8e6SJosef Bacik 				     struct btrfs_root *root, u64 owner,
47745846a3c2SQu Wenruo 				     u64 offset, u64 ram_bytes,
47755846a3c2SQu Wenruo 				     struct btrfs_key *ins)
4776e6dcd2dcSChris Mason {
477776675593SQu Wenruo 	struct btrfs_ref generic_ref = { 0 };
47781c2308f8SChris Mason 
477984f7d8e6SJosef Bacik 	BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
478056bec294SChris Mason 
478176675593SQu Wenruo 	btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
478276675593SQu Wenruo 			       ins->objectid, ins->offset, 0);
4783f42c5da6SNikolay Borisov 	btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
4784f42c5da6SNikolay Borisov 			    offset, 0, false);
47858a5040f7SQu Wenruo 	btrfs_ref_tree_mod(root->fs_info, &generic_ref);
47862187374fSJosef Bacik 
47872187374fSJosef Bacik 	return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
4788e6dcd2dcSChris Mason }
4789e02119d5SChris Mason 
4790e02119d5SChris Mason /*
4791e02119d5SChris Mason  * this is used by the tree logging recovery code.  It records that
4792e02119d5SChris Mason  * an extent has been allocated and makes sure to clear the free
4793e02119d5SChris Mason  * space cache bits as well
4794e02119d5SChris Mason  */
btrfs_alloc_logged_file_extent(struct btrfs_trans_handle * trans,u64 root_objectid,u64 owner,u64 offset,struct btrfs_key * ins)47955d4f98a2SYan Zheng int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
47965d4f98a2SYan Zheng 				   u64 root_objectid, u64 owner, u64 offset,
47975d4f98a2SYan Zheng 				   struct btrfs_key *ins)
4798e02119d5SChris Mason {
479961da2abfSNikolay Borisov 	struct btrfs_fs_info *fs_info = trans->fs_info;
4800e02119d5SChris Mason 	int ret;
480132da5386SDavid Sterba 	struct btrfs_block_group *block_group;
4802ed7a6948SWang Xiaoguang 	struct btrfs_space_info *space_info;
48038c2a1a30SJosef Bacik 
48048c2a1a30SJosef Bacik 	/*
48058c2a1a30SJosef Bacik 	 * Mixed block groups will exclude before processing the log so we only
480601327610SNicholas D Steeves 	 * need to do the exclude dance if this fs isn't mixed.
48078c2a1a30SJosef Bacik 	 */
48080b246afaSJeff Mahoney 	if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
48092ff7e61eSJeff Mahoney 		ret = __exclude_logged_extent(fs_info, ins->objectid,
48102ff7e61eSJeff Mahoney 					      ins->offset);
48118c2a1a30SJosef Bacik 		if (ret)
48128c2a1a30SJosef Bacik 			return ret;
48138c2a1a30SJosef Bacik 	}
4814e02119d5SChris Mason 
48150b246afaSJeff Mahoney 	block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
48168c2a1a30SJosef Bacik 	if (!block_group)
48178c2a1a30SJosef Bacik 		return -EINVAL;
481811833d66SYan Zheng 
4819ed7a6948SWang Xiaoguang 	space_info = block_group->space_info;
4820ed7a6948SWang Xiaoguang 	spin_lock(&space_info->lock);
4821ed7a6948SWang Xiaoguang 	spin_lock(&block_group->lock);
4822ed7a6948SWang Xiaoguang 	space_info->bytes_reserved += ins->offset;
4823ed7a6948SWang Xiaoguang 	block_group->reserved += ins->offset;
4824ed7a6948SWang Xiaoguang 	spin_unlock(&block_group->lock);
4825ed7a6948SWang Xiaoguang 	spin_unlock(&space_info->lock);
4826ed7a6948SWang Xiaoguang 
4827ef89b824SNikolay Borisov 	ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4828ef89b824SNikolay Borisov 					 offset, ins, 1);
4829bd727173SJosef Bacik 	if (ret)
4830ab9b2c7bSJosef Bacik 		btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
4831b50c6e25SJosef Bacik 	btrfs_put_block_group(block_group);
4832e02119d5SChris Mason 	return ret;
4833e02119d5SChris Mason }
4834e02119d5SChris Mason 
483548a3b636SEric Sandeen static struct extent_buffer *
btrfs_init_new_buffer(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,int level,u64 owner,enum btrfs_lock_nesting nest)483648a3b636SEric Sandeen btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
48379631e4ccSJosef Bacik 		      u64 bytenr, int level, u64 owner,
48389631e4ccSJosef Bacik 		      enum btrfs_lock_nesting nest)
483965b51a00SChris Mason {
48400b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
484165b51a00SChris Mason 	struct extent_buffer *buf;
4842b40130b2SJosef Bacik 	u64 lockdep_owner = owner;
484365b51a00SChris Mason 
48443fbaf258SJosef Bacik 	buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
4845c871b0f2SLiu Bo 	if (IS_ERR(buf))
4846c871b0f2SLiu Bo 		return buf;
4847c871b0f2SLiu Bo 
4848b72c3abaSQu Wenruo 	/*
4849b72c3abaSQu Wenruo 	 * Extra safety check in case the extent tree is corrupted and extent
4850b72c3abaSQu Wenruo 	 * allocator chooses to use a tree block which is already used and
4851b72c3abaSQu Wenruo 	 * locked.
4852b72c3abaSQu Wenruo 	 */
4853b72c3abaSQu Wenruo 	if (buf->lock_owner == current->pid) {
4854b72c3abaSQu Wenruo 		btrfs_err_rl(fs_info,
4855b72c3abaSQu Wenruo "tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4856b72c3abaSQu Wenruo 			buf->start, btrfs_header_owner(buf), current->pid);
4857b72c3abaSQu Wenruo 		free_extent_buffer(buf);
4858b72c3abaSQu Wenruo 		return ERR_PTR(-EUCLEAN);
4859b72c3abaSQu Wenruo 	}
4860b72c3abaSQu Wenruo 
4861e114c545SJosef Bacik 	/*
4862b40130b2SJosef Bacik 	 * The reloc trees are just snapshots, so we need them to appear to be
4863b40130b2SJosef Bacik 	 * just like any other fs tree WRT lockdep.
4864b40130b2SJosef Bacik 	 *
4865b40130b2SJosef Bacik 	 * The exception however is in replace_path() in relocation, where we
4866b40130b2SJosef Bacik 	 * hold the lock on the original fs root and then search for the reloc
4867b40130b2SJosef Bacik 	 * root.  At that point we need to make sure any reloc root buffers are
4868b40130b2SJosef Bacik 	 * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
4869b40130b2SJosef Bacik 	 * lockdep happy.
4870b40130b2SJosef Bacik 	 */
4871b40130b2SJosef Bacik 	if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
4872b40130b2SJosef Bacik 	    !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
4873b40130b2SJosef Bacik 		lockdep_owner = BTRFS_FS_TREE_OBJECTID;
4874b40130b2SJosef Bacik 
4875618d1d7dSFilipe Manana 	/* btrfs_clear_buffer_dirty() accesses generation field. */
4876cbddcc4fSTetsuo Handa 	btrfs_set_header_generation(buf, trans->transid);
4877cbddcc4fSTetsuo Handa 
4878b40130b2SJosef Bacik 	/*
4879e114c545SJosef Bacik 	 * This needs to stay, because we could allocate a freed block from an
4880e114c545SJosef Bacik 	 * old tree into a new tree, so we need to make sure this new block is
4881e114c545SJosef Bacik 	 * set to the appropriate level and owner.
4882e114c545SJosef Bacik 	 */
4883b40130b2SJosef Bacik 	btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
4884b40130b2SJosef Bacik 
48859631e4ccSJosef Bacik 	__btrfs_tree_lock(buf, nest);
4886190a8339SJosef Bacik 	btrfs_clear_buffer_dirty(trans, buf);
48873083ee2eSJosef Bacik 	clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4888d3575156SNaohiro Aota 	clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
4889b4ce94deSChris Mason 
48904db8c528SDavid Sterba 	set_extent_buffer_uptodate(buf);
4891b4ce94deSChris Mason 
4892bc877d28SNikolay Borisov 	memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4893bc877d28SNikolay Borisov 	btrfs_set_header_level(buf, level);
4894bc877d28SNikolay Borisov 	btrfs_set_header_bytenr(buf, buf->start);
4895bc877d28SNikolay Borisov 	btrfs_set_header_generation(buf, trans->transid);
4896bc877d28SNikolay Borisov 	btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4897bc877d28SNikolay Borisov 	btrfs_set_header_owner(buf, owner);
4898de37aa51SNikolay Borisov 	write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4899bc877d28SNikolay Borisov 	write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4900d0c803c4SChris Mason 	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4901656f30dbSFilipe Manana 		buf->log_index = root->log_transid % 2;
49028cef4e16SYan, Zheng 		/*
49038cef4e16SYan, Zheng 		 * we allow two log transactions at a time, use different
490452042d8eSAndrea Gelmini 		 * EXTENT bit to differentiate dirty pages.
49058cef4e16SYan, Zheng 		 */
4906656f30dbSFilipe Manana 		if (buf->log_index == 0)
4907fe1a598cSDavid Sterba 			set_extent_bit(&root->dirty_log_pages, buf->start,
4908fe1a598cSDavid Sterba 				       buf->start + buf->len - 1,
49091d126800SDavid Sterba 				       EXTENT_DIRTY, NULL);
49108cef4e16SYan, Zheng 		else
4911eea8686eSDavid Sterba 			set_extent_bit(&root->dirty_log_pages, buf->start,
4912eea8686eSDavid Sterba 				       buf->start + buf->len - 1,
49131d126800SDavid Sterba 				       EXTENT_NEW, NULL);
4914d0c803c4SChris Mason 	} else {
4915656f30dbSFilipe Manana 		buf->log_index = -1;
4916fe1a598cSDavid Sterba 		set_extent_bit(&trans->transaction->dirty_pages, buf->start,
49171d126800SDavid Sterba 			       buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
4918d0c803c4SChris Mason 	}
4919b4ce94deSChris Mason 	/* this returns a buffer locked for blocking */
492065b51a00SChris Mason 	return buf;
492165b51a00SChris Mason }
492265b51a00SChris Mason 
4923fec577fbSChris Mason /*
4924f0486c68SYan, Zheng  * finds a free extent and does all the dirty work required for allocation
492567b7859eSOmar Sandoval  * returns the tree buffer or an ERR_PTR on error.
4926fec577fbSChris Mason  */
btrfs_alloc_tree_block(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 parent,u64 root_objectid,const struct btrfs_disk_key * key,int level,u64 hint,u64 empty_size,enum btrfs_lock_nesting nest)49274d75f8a9SDavid Sterba struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
49284d75f8a9SDavid Sterba 					     struct btrfs_root *root,
49295d4f98a2SYan Zheng 					     u64 parent, u64 root_objectid,
4930310712b2SOmar Sandoval 					     const struct btrfs_disk_key *key,
4931310712b2SOmar Sandoval 					     int level, u64 hint,
49329631e4ccSJosef Bacik 					     u64 empty_size,
49339631e4ccSJosef Bacik 					     enum btrfs_lock_nesting nest)
4934fec577fbSChris Mason {
49350b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
4936e2fa7227SChris Mason 	struct btrfs_key ins;
4937f0486c68SYan, Zheng 	struct btrfs_block_rsv *block_rsv;
49385f39d397SChris Mason 	struct extent_buffer *buf;
493967b7859eSOmar Sandoval 	struct btrfs_delayed_extent_op *extent_op;
4940ed4f255bSQu Wenruo 	struct btrfs_ref generic_ref = { 0 };
4941f0486c68SYan, Zheng 	u64 flags = 0;
4942f0486c68SYan, Zheng 	int ret;
49430b246afaSJeff Mahoney 	u32 blocksize = fs_info->nodesize;
49440b246afaSJeff Mahoney 	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4945f0486c68SYan, Zheng 
494605653ef3SDavid Sterba #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
49470b246afaSJeff Mahoney 	if (btrfs_is_testing(fs_info)) {
4948faa2dbf0SJosef Bacik 		buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
49499631e4ccSJosef Bacik 					    level, root_objectid, nest);
4950faa2dbf0SJosef Bacik 		if (!IS_ERR(buf))
4951faa2dbf0SJosef Bacik 			root->alloc_bytenr += blocksize;
4952faa2dbf0SJosef Bacik 		return buf;
4953faa2dbf0SJosef Bacik 	}
495405653ef3SDavid Sterba #endif
4955fccb84c9SDavid Sterba 
495667f9c220SJosef Bacik 	block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4957f0486c68SYan, Zheng 	if (IS_ERR(block_rsv))
4958f0486c68SYan, Zheng 		return ERR_CAST(block_rsv);
4959f0486c68SYan, Zheng 
496018513091SWang Xiaoguang 	ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4961e570fd27SMiao Xie 				   empty_size, hint, &ins, 0, 0);
496267b7859eSOmar Sandoval 	if (ret)
496367b7859eSOmar Sandoval 		goto out_unuse;
496455c69072SChris Mason 
4965bc877d28SNikolay Borisov 	buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
49669631e4ccSJosef Bacik 				    root_objectid, nest);
496767b7859eSOmar Sandoval 	if (IS_ERR(buf)) {
496867b7859eSOmar Sandoval 		ret = PTR_ERR(buf);
496967b7859eSOmar Sandoval 		goto out_free_reserved;
497067b7859eSOmar Sandoval 	}
4971f0486c68SYan, Zheng 
4972f0486c68SYan, Zheng 	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4973f0486c68SYan, Zheng 		if (parent == 0)
4974f0486c68SYan, Zheng 			parent = ins.objectid;
4975f0486c68SYan, Zheng 		flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4976f0486c68SYan, Zheng 	} else
4977f0486c68SYan, Zheng 		BUG_ON(parent > 0);
4978f0486c68SYan, Zheng 
4979f0486c68SYan, Zheng 	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
498078a6184aSMiao Xie 		extent_op = btrfs_alloc_delayed_extent_op();
498167b7859eSOmar Sandoval 		if (!extent_op) {
498267b7859eSOmar Sandoval 			ret = -ENOMEM;
498367b7859eSOmar Sandoval 			goto out_free_buf;
498467b7859eSOmar Sandoval 		}
4985f0486c68SYan, Zheng 		if (key)
4986f0486c68SYan, Zheng 			memcpy(&extent_op->key, key, sizeof(extent_op->key));
4987f0486c68SYan, Zheng 		else
4988f0486c68SYan, Zheng 			memset(&extent_op->key, 0, sizeof(extent_op->key));
4989f0486c68SYan, Zheng 		extent_op->flags_to_set = flags;
499035b3ad50SDavid Sterba 		extent_op->update_key = skinny_metadata ? false : true;
499135b3ad50SDavid Sterba 		extent_op->update_flags = true;
4992b1c79e09SJosef Bacik 		extent_op->level = level;
4993f0486c68SYan, Zheng 
4994ed4f255bSQu Wenruo 		btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4995ed4f255bSQu Wenruo 				       ins.objectid, ins.offset, parent);
4996f42c5da6SNikolay Borisov 		btrfs_init_tree_ref(&generic_ref, level, root_objectid,
4997f42c5da6SNikolay Borisov 				    root->root_key.objectid, false);
49988a5040f7SQu Wenruo 		btrfs_ref_tree_mod(fs_info, &generic_ref);
49992187374fSJosef Bacik 		ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
500067b7859eSOmar Sandoval 		if (ret)
500167b7859eSOmar Sandoval 			goto out_free_delayed;
5002f0486c68SYan, Zheng 	}
5003fec577fbSChris Mason 	return buf;
500467b7859eSOmar Sandoval 
500567b7859eSOmar Sandoval out_free_delayed:
500667b7859eSOmar Sandoval 	btrfs_free_delayed_extent_op(extent_op);
500767b7859eSOmar Sandoval out_free_buf:
500819ea40ddSQu Wenruo 	btrfs_tree_unlock(buf);
500967b7859eSOmar Sandoval 	free_extent_buffer(buf);
501067b7859eSOmar Sandoval out_free_reserved:
50112ff7e61eSJeff Mahoney 	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
501267b7859eSOmar Sandoval out_unuse:
501367f9c220SJosef Bacik 	btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
501467b7859eSOmar Sandoval 	return ERR_PTR(ret);
5015fec577fbSChris Mason }
5016a28ec197SChris Mason 
50172c47e605SYan Zheng struct walk_control {
50182c47e605SYan Zheng 	u64 refs[BTRFS_MAX_LEVEL];
50192c47e605SYan Zheng 	u64 flags[BTRFS_MAX_LEVEL];
50202c47e605SYan Zheng 	struct btrfs_key update_progress;
5021aea6f028SJosef Bacik 	struct btrfs_key drop_progress;
5022aea6f028SJosef Bacik 	int drop_level;
50232c47e605SYan Zheng 	int stage;
50242c47e605SYan Zheng 	int level;
50252c47e605SYan Zheng 	int shared_level;
50262c47e605SYan Zheng 	int update_ref;
50272c47e605SYan Zheng 	int keep_locks;
50281c4850e2SYan, Zheng 	int reada_slot;
50291c4850e2SYan, Zheng 	int reada_count;
503078c52d9eSJosef Bacik 	int restarted;
50312c47e605SYan Zheng };
50322c47e605SYan Zheng 
50332c47e605SYan Zheng #define DROP_REFERENCE	1
50342c47e605SYan Zheng #define UPDATE_BACKREF	2
50352c47e605SYan Zheng 
reada_walk_down(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct walk_control * wc,struct btrfs_path * path)50361c4850e2SYan, Zheng static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
50371c4850e2SYan, Zheng 				     struct btrfs_root *root,
50381c4850e2SYan, Zheng 				     struct walk_control *wc,
50391c4850e2SYan, Zheng 				     struct btrfs_path *path)
50401c4850e2SYan, Zheng {
50410b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
50421c4850e2SYan, Zheng 	u64 bytenr;
50431c4850e2SYan, Zheng 	u64 generation;
50441c4850e2SYan, Zheng 	u64 refs;
504594fcca9fSYan, Zheng 	u64 flags;
50461c4850e2SYan, Zheng 	u32 nritems;
50471c4850e2SYan, Zheng 	struct btrfs_key key;
50481c4850e2SYan, Zheng 	struct extent_buffer *eb;
50491c4850e2SYan, Zheng 	int ret;
50501c4850e2SYan, Zheng 	int slot;
50511c4850e2SYan, Zheng 	int nread = 0;
50521c4850e2SYan, Zheng 
50531c4850e2SYan, Zheng 	if (path->slots[wc->level] < wc->reada_slot) {
50541c4850e2SYan, Zheng 		wc->reada_count = wc->reada_count * 2 / 3;
50551c4850e2SYan, Zheng 		wc->reada_count = max(wc->reada_count, 2);
50561c4850e2SYan, Zheng 	} else {
50571c4850e2SYan, Zheng 		wc->reada_count = wc->reada_count * 3 / 2;
50581c4850e2SYan, Zheng 		wc->reada_count = min_t(int, wc->reada_count,
50590b246afaSJeff Mahoney 					BTRFS_NODEPTRS_PER_BLOCK(fs_info));
50601c4850e2SYan, Zheng 	}
50611c4850e2SYan, Zheng 
50621c4850e2SYan, Zheng 	eb = path->nodes[wc->level];
50631c4850e2SYan, Zheng 	nritems = btrfs_header_nritems(eb);
50641c4850e2SYan, Zheng 
50651c4850e2SYan, Zheng 	for (slot = path->slots[wc->level]; slot < nritems; slot++) {
50661c4850e2SYan, Zheng 		if (nread >= wc->reada_count)
50671c4850e2SYan, Zheng 			break;
50681c4850e2SYan, Zheng 
50691c4850e2SYan, Zheng 		cond_resched();
50701c4850e2SYan, Zheng 		bytenr = btrfs_node_blockptr(eb, slot);
50711c4850e2SYan, Zheng 		generation = btrfs_node_ptr_generation(eb, slot);
50721c4850e2SYan, Zheng 
50731c4850e2SYan, Zheng 		if (slot == path->slots[wc->level])
50741c4850e2SYan, Zheng 			goto reada;
50751c4850e2SYan, Zheng 
50761c4850e2SYan, Zheng 		if (wc->stage == UPDATE_BACKREF &&
50771c4850e2SYan, Zheng 		    generation <= root->root_key.offset)
50781c4850e2SYan, Zheng 			continue;
50791c4850e2SYan, Zheng 
508094fcca9fSYan, Zheng 		/* We don't lock the tree block, it's OK to be racy here */
50812ff7e61eSJeff Mahoney 		ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
50823173a18fSJosef Bacik 					       wc->level - 1, 1, &refs,
50833173a18fSJosef Bacik 					       &flags);
508479787eaaSJeff Mahoney 		/* We don't care about errors in readahead. */
508579787eaaSJeff Mahoney 		if (ret < 0)
508679787eaaSJeff Mahoney 			continue;
50871c4850e2SYan, Zheng 		BUG_ON(refs == 0);
508894fcca9fSYan, Zheng 
508994fcca9fSYan, Zheng 		if (wc->stage == DROP_REFERENCE) {
50901c4850e2SYan, Zheng 			if (refs == 1)
50911c4850e2SYan, Zheng 				goto reada;
50921c4850e2SYan, Zheng 
509394fcca9fSYan, Zheng 			if (wc->level == 1 &&
509494fcca9fSYan, Zheng 			    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
509594fcca9fSYan, Zheng 				continue;
50961c4850e2SYan, Zheng 			if (!wc->update_ref ||
50971c4850e2SYan, Zheng 			    generation <= root->root_key.offset)
50981c4850e2SYan, Zheng 				continue;
50991c4850e2SYan, Zheng 			btrfs_node_key_to_cpu(eb, &key, slot);
51001c4850e2SYan, Zheng 			ret = btrfs_comp_cpu_keys(&key,
51011c4850e2SYan, Zheng 						  &wc->update_progress);
51021c4850e2SYan, Zheng 			if (ret < 0)
51031c4850e2SYan, Zheng 				continue;
510494fcca9fSYan, Zheng 		} else {
510594fcca9fSYan, Zheng 			if (wc->level == 1 &&
510694fcca9fSYan, Zheng 			    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
510794fcca9fSYan, Zheng 				continue;
51081c4850e2SYan, Zheng 		}
51091c4850e2SYan, Zheng reada:
5110bfb484d9SJosef Bacik 		btrfs_readahead_node_child(eb, slot);
51111c4850e2SYan, Zheng 		nread++;
51121c4850e2SYan, Zheng 	}
51131c4850e2SYan, Zheng 	wc->reada_slot = slot;
51141c4850e2SYan, Zheng }
51151c4850e2SYan, Zheng 
51169aca1d51SChris Mason /*
51172c016dc2SLiu Bo  * helper to process tree block while walking down the tree.
51182c47e605SYan Zheng  *
51192c47e605SYan Zheng  * when wc->stage == UPDATE_BACKREF, this function updates
51202c47e605SYan Zheng  * back refs for pointers in the block.
51212c47e605SYan Zheng  *
51222c47e605SYan Zheng  * NOTE: return value 1 means we should stop walking down.
5123f82d02d9SYan Zheng  */
walk_down_proc(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int lookup_info)51242c47e605SYan Zheng static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
51252c47e605SYan Zheng 				   struct btrfs_root *root,
51262c47e605SYan Zheng 				   struct btrfs_path *path,
512794fcca9fSYan, Zheng 				   struct walk_control *wc, int lookup_info)
51282c47e605SYan Zheng {
51292ff7e61eSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
51302c47e605SYan Zheng 	int level = wc->level;
51312c47e605SYan Zheng 	struct extent_buffer *eb = path->nodes[level];
51322c47e605SYan Zheng 	u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
51332c47e605SYan Zheng 	int ret;
51342c47e605SYan Zheng 
51352c47e605SYan Zheng 	if (wc->stage == UPDATE_BACKREF &&
51362c47e605SYan Zheng 	    btrfs_header_owner(eb) != root->root_key.objectid)
51372c47e605SYan Zheng 		return 1;
51382c47e605SYan Zheng 
51392c47e605SYan Zheng 	/*
51402c47e605SYan Zheng 	 * when reference count of tree block is 1, it won't increase
51412c47e605SYan Zheng 	 * again. once full backref flag is set, we never clear it.
51422c47e605SYan Zheng 	 */
514394fcca9fSYan, Zheng 	if (lookup_info &&
514494fcca9fSYan, Zheng 	    ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
514594fcca9fSYan, Zheng 	     (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
51462c47e605SYan Zheng 		BUG_ON(!path->locks[level]);
51472ff7e61eSJeff Mahoney 		ret = btrfs_lookup_extent_info(trans, fs_info,
51483173a18fSJosef Bacik 					       eb->start, level, 1,
51492c47e605SYan Zheng 					       &wc->refs[level],
51502c47e605SYan Zheng 					       &wc->flags[level]);
515179787eaaSJeff Mahoney 		BUG_ON(ret == -ENOMEM);
515279787eaaSJeff Mahoney 		if (ret)
515379787eaaSJeff Mahoney 			return ret;
51542c47e605SYan Zheng 		BUG_ON(wc->refs[level] == 0);
51552c47e605SYan Zheng 	}
51562c47e605SYan Zheng 
51572c47e605SYan Zheng 	if (wc->stage == DROP_REFERENCE) {
51582c47e605SYan Zheng 		if (wc->refs[level] > 1)
51592c47e605SYan Zheng 			return 1;
51602c47e605SYan Zheng 
51612c47e605SYan Zheng 		if (path->locks[level] && !wc->keep_locks) {
5162bd681513SChris Mason 			btrfs_tree_unlock_rw(eb, path->locks[level]);
51632c47e605SYan Zheng 			path->locks[level] = 0;
51642c47e605SYan Zheng 		}
51652c47e605SYan Zheng 		return 0;
51662c47e605SYan Zheng 	}
51672c47e605SYan Zheng 
51682c47e605SYan Zheng 	/* wc->stage == UPDATE_BACKREF */
51692c47e605SYan Zheng 	if (!(wc->flags[level] & flag)) {
51702c47e605SYan Zheng 		BUG_ON(!path->locks[level]);
5171e339a6b0SJosef Bacik 		ret = btrfs_inc_ref(trans, root, eb, 1);
517279787eaaSJeff Mahoney 		BUG_ON(ret); /* -ENOMEM */
5173e339a6b0SJosef Bacik 		ret = btrfs_dec_ref(trans, root, eb, 0);
517479787eaaSJeff Mahoney 		BUG_ON(ret); /* -ENOMEM */
51754aec05faSJosef Bacik 		ret = btrfs_set_disk_extent_flags(trans, eb, flag);
517679787eaaSJeff Mahoney 		BUG_ON(ret); /* -ENOMEM */
51772c47e605SYan Zheng 		wc->flags[level] |= flag;
51782c47e605SYan Zheng 	}
51792c47e605SYan Zheng 
51802c47e605SYan Zheng 	/*
51812c47e605SYan Zheng 	 * the block is shared by multiple trees, so it's not good to
51822c47e605SYan Zheng 	 * keep the tree lock
51832c47e605SYan Zheng 	 */
51842c47e605SYan Zheng 	if (path->locks[level] && level > 0) {
5185bd681513SChris Mason 		btrfs_tree_unlock_rw(eb, path->locks[level]);
51862c47e605SYan Zheng 		path->locks[level] = 0;
51872c47e605SYan Zheng 	}
51882c47e605SYan Zheng 	return 0;
51892c47e605SYan Zheng }
51902c47e605SYan Zheng 
51912c47e605SYan Zheng /*
519278c52d9eSJosef Bacik  * This is used to verify a ref exists for this root to deal with a bug where we
519378c52d9eSJosef Bacik  * would have a drop_progress key that hadn't been updated properly.
519478c52d9eSJosef Bacik  */
check_ref_exists(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 parent,int level)519578c52d9eSJosef Bacik static int check_ref_exists(struct btrfs_trans_handle *trans,
519678c52d9eSJosef Bacik 			    struct btrfs_root *root, u64 bytenr, u64 parent,
519778c52d9eSJosef Bacik 			    int level)
519878c52d9eSJosef Bacik {
519978c52d9eSJosef Bacik 	struct btrfs_path *path;
520078c52d9eSJosef Bacik 	struct btrfs_extent_inline_ref *iref;
520178c52d9eSJosef Bacik 	int ret;
520278c52d9eSJosef Bacik 
520378c52d9eSJosef Bacik 	path = btrfs_alloc_path();
520478c52d9eSJosef Bacik 	if (!path)
520578c52d9eSJosef Bacik 		return -ENOMEM;
520678c52d9eSJosef Bacik 
520778c52d9eSJosef Bacik 	ret = lookup_extent_backref(trans, path, &iref, bytenr,
520878c52d9eSJosef Bacik 				    root->fs_info->nodesize, parent,
520978c52d9eSJosef Bacik 				    root->root_key.objectid, level, 0);
521078c52d9eSJosef Bacik 	btrfs_free_path(path);
521178c52d9eSJosef Bacik 	if (ret == -ENOENT)
521278c52d9eSJosef Bacik 		return 0;
521378c52d9eSJosef Bacik 	if (ret < 0)
521478c52d9eSJosef Bacik 		return ret;
521578c52d9eSJosef Bacik 	return 1;
521678c52d9eSJosef Bacik }
521778c52d9eSJosef Bacik 
521878c52d9eSJosef Bacik /*
52192c016dc2SLiu Bo  * helper to process tree block pointer.
52201c4850e2SYan, Zheng  *
52211c4850e2SYan, Zheng  * when wc->stage == DROP_REFERENCE, this function checks
52221c4850e2SYan, Zheng  * reference count of the block pointed to. if the block
52231c4850e2SYan, Zheng  * is shared and we need update back refs for the subtree
52241c4850e2SYan, Zheng  * rooted at the block, this function changes wc->stage to
52251c4850e2SYan, Zheng  * UPDATE_BACKREF. if the block is shared and there is no
52261c4850e2SYan, Zheng  * need to update back, this function drops the reference
52271c4850e2SYan, Zheng  * to the block.
52281c4850e2SYan, Zheng  *
52291c4850e2SYan, Zheng  * NOTE: return value 1 means we should stop walking down.
52301c4850e2SYan, Zheng  */
do_walk_down(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int * lookup_info)52311c4850e2SYan, Zheng static noinline int do_walk_down(struct btrfs_trans_handle *trans,
52321c4850e2SYan, Zheng 				 struct btrfs_root *root,
52331c4850e2SYan, Zheng 				 struct btrfs_path *path,
523494fcca9fSYan, Zheng 				 struct walk_control *wc, int *lookup_info)
52351c4850e2SYan, Zheng {
52360b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
52371c4850e2SYan, Zheng 	u64 bytenr;
52381c4850e2SYan, Zheng 	u64 generation;
52391c4850e2SYan, Zheng 	u64 parent;
5240789d6a3aSQu Wenruo 	struct btrfs_tree_parent_check check = { 0 };
52411c4850e2SYan, Zheng 	struct btrfs_key key;
5242ffd4bb2aSQu Wenruo 	struct btrfs_ref ref = { 0 };
52431c4850e2SYan, Zheng 	struct extent_buffer *next;
52441c4850e2SYan, Zheng 	int level = wc->level;
52451c4850e2SYan, Zheng 	int reada = 0;
52461c4850e2SYan, Zheng 	int ret = 0;
52471152651aSMark Fasheh 	bool need_account = false;
52481c4850e2SYan, Zheng 
52491c4850e2SYan, Zheng 	generation = btrfs_node_ptr_generation(path->nodes[level],
52501c4850e2SYan, Zheng 					       path->slots[level]);
52511c4850e2SYan, Zheng 	/*
52521c4850e2SYan, Zheng 	 * if the lower level block was created before the snapshot
52531c4850e2SYan, Zheng 	 * was created, we know there is no need to update back refs
52541c4850e2SYan, Zheng 	 * for the subtree
52551c4850e2SYan, Zheng 	 */
52561c4850e2SYan, Zheng 	if (wc->stage == UPDATE_BACKREF &&
525794fcca9fSYan, Zheng 	    generation <= root->root_key.offset) {
525894fcca9fSYan, Zheng 		*lookup_info = 1;
52591c4850e2SYan, Zheng 		return 1;
526094fcca9fSYan, Zheng 	}
52611c4850e2SYan, Zheng 
52621c4850e2SYan, Zheng 	bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5263789d6a3aSQu Wenruo 
5264789d6a3aSQu Wenruo 	check.level = level - 1;
5265789d6a3aSQu Wenruo 	check.transid = generation;
5266789d6a3aSQu Wenruo 	check.owner_root = root->root_key.objectid;
5267789d6a3aSQu Wenruo 	check.has_first_key = true;
5268789d6a3aSQu Wenruo 	btrfs_node_key_to_cpu(path->nodes[level], &check.first_key,
5269581c1760SQu Wenruo 			      path->slots[level]);
52701c4850e2SYan, Zheng 
52710b246afaSJeff Mahoney 	next = find_extent_buffer(fs_info, bytenr);
52721c4850e2SYan, Zheng 	if (!next) {
52733fbaf258SJosef Bacik 		next = btrfs_find_create_tree_block(fs_info, bytenr,
52743fbaf258SJosef Bacik 				root->root_key.objectid, level - 1);
5275c871b0f2SLiu Bo 		if (IS_ERR(next))
5276c871b0f2SLiu Bo 			return PTR_ERR(next);
52771c4850e2SYan, Zheng 		reada = 1;
52781c4850e2SYan, Zheng 	}
52791c4850e2SYan, Zheng 	btrfs_tree_lock(next);
52801c4850e2SYan, Zheng 
52812ff7e61eSJeff Mahoney 	ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
52821c4850e2SYan, Zheng 				       &wc->refs[level - 1],
52831c4850e2SYan, Zheng 				       &wc->flags[level - 1]);
52844867268cSJosef Bacik 	if (ret < 0)
52854867268cSJosef Bacik 		goto out_unlock;
528679787eaaSJeff Mahoney 
5287c2cf52ebSSimon Kirby 	if (unlikely(wc->refs[level - 1] == 0)) {
52880b246afaSJeff Mahoney 		btrfs_err(fs_info, "Missing references.");
52894867268cSJosef Bacik 		ret = -EIO;
52904867268cSJosef Bacik 		goto out_unlock;
5291c2cf52ebSSimon Kirby 	}
529294fcca9fSYan, Zheng 	*lookup_info = 0;
52931c4850e2SYan, Zheng 
529494fcca9fSYan, Zheng 	if (wc->stage == DROP_REFERENCE) {
52951c4850e2SYan, Zheng 		if (wc->refs[level - 1] > 1) {
52961152651aSMark Fasheh 			need_account = true;
529794fcca9fSYan, Zheng 			if (level == 1 &&
529894fcca9fSYan, Zheng 			    (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
529994fcca9fSYan, Zheng 				goto skip;
530094fcca9fSYan, Zheng 
53011c4850e2SYan, Zheng 			if (!wc->update_ref ||
53021c4850e2SYan, Zheng 			    generation <= root->root_key.offset)
53031c4850e2SYan, Zheng 				goto skip;
53041c4850e2SYan, Zheng 
53051c4850e2SYan, Zheng 			btrfs_node_key_to_cpu(path->nodes[level], &key,
53061c4850e2SYan, Zheng 					      path->slots[level]);
53071c4850e2SYan, Zheng 			ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
53081c4850e2SYan, Zheng 			if (ret < 0)
53091c4850e2SYan, Zheng 				goto skip;
53101c4850e2SYan, Zheng 
53111c4850e2SYan, Zheng 			wc->stage = UPDATE_BACKREF;
53121c4850e2SYan, Zheng 			wc->shared_level = level - 1;
53131c4850e2SYan, Zheng 		}
531494fcca9fSYan, Zheng 	} else {
531594fcca9fSYan, Zheng 		if (level == 1 &&
531694fcca9fSYan, Zheng 		    (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
531794fcca9fSYan, Zheng 			goto skip;
53181c4850e2SYan, Zheng 	}
53191c4850e2SYan, Zheng 
5320b9fab919SChris Mason 	if (!btrfs_buffer_uptodate(next, generation, 0)) {
53211c4850e2SYan, Zheng 		btrfs_tree_unlock(next);
53221c4850e2SYan, Zheng 		free_extent_buffer(next);
53231c4850e2SYan, Zheng 		next = NULL;
532494fcca9fSYan, Zheng 		*lookup_info = 1;
53251c4850e2SYan, Zheng 	}
53261c4850e2SYan, Zheng 
53271c4850e2SYan, Zheng 	if (!next) {
53281c4850e2SYan, Zheng 		if (reada && level == 1)
53291c4850e2SYan, Zheng 			reada_walk_down(trans, root, wc, path);
5330789d6a3aSQu Wenruo 		next = read_tree_block(fs_info, bytenr, &check);
533164c043deSLiu Bo 		if (IS_ERR(next)) {
533264c043deSLiu Bo 			return PTR_ERR(next);
533364c043deSLiu Bo 		} else if (!extent_buffer_uptodate(next)) {
5334416bc658SJosef Bacik 			free_extent_buffer(next);
533597d9a8a4STsutomu Itoh 			return -EIO;
5336416bc658SJosef Bacik 		}
53371c4850e2SYan, Zheng 		btrfs_tree_lock(next);
53381c4850e2SYan, Zheng 	}
53391c4850e2SYan, Zheng 
53401c4850e2SYan, Zheng 	level--;
53414867268cSJosef Bacik 	ASSERT(level == btrfs_header_level(next));
53424867268cSJosef Bacik 	if (level != btrfs_header_level(next)) {
53434867268cSJosef Bacik 		btrfs_err(root->fs_info, "mismatched level");
53444867268cSJosef Bacik 		ret = -EIO;
53454867268cSJosef Bacik 		goto out_unlock;
53464867268cSJosef Bacik 	}
53471c4850e2SYan, Zheng 	path->nodes[level] = next;
53481c4850e2SYan, Zheng 	path->slots[level] = 0;
5349ac5887c8SJosef Bacik 	path->locks[level] = BTRFS_WRITE_LOCK;
53501c4850e2SYan, Zheng 	wc->level = level;
53511c4850e2SYan, Zheng 	if (wc->level == 1)
53521c4850e2SYan, Zheng 		wc->reada_slot = 0;
53531c4850e2SYan, Zheng 	return 0;
53541c4850e2SYan, Zheng skip:
53551c4850e2SYan, Zheng 	wc->refs[level - 1] = 0;
53561c4850e2SYan, Zheng 	wc->flags[level - 1] = 0;
535794fcca9fSYan, Zheng 	if (wc->stage == DROP_REFERENCE) {
53581c4850e2SYan, Zheng 		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
53591c4850e2SYan, Zheng 			parent = path->nodes[level]->start;
53601c4850e2SYan, Zheng 		} else {
53614867268cSJosef Bacik 			ASSERT(root->root_key.objectid ==
53621c4850e2SYan, Zheng 			       btrfs_header_owner(path->nodes[level]));
53634867268cSJosef Bacik 			if (root->root_key.objectid !=
53644867268cSJosef Bacik 			    btrfs_header_owner(path->nodes[level])) {
53654867268cSJosef Bacik 				btrfs_err(root->fs_info,
53664867268cSJosef Bacik 						"mismatched block owner");
53674867268cSJosef Bacik 				ret = -EIO;
53684867268cSJosef Bacik 				goto out_unlock;
53694867268cSJosef Bacik 			}
53701c4850e2SYan, Zheng 			parent = 0;
53711c4850e2SYan, Zheng 		}
53721c4850e2SYan, Zheng 
53732cd86d30SQu Wenruo 		/*
537478c52d9eSJosef Bacik 		 * If we had a drop_progress we need to verify the refs are set
537578c52d9eSJosef Bacik 		 * as expected.  If we find our ref then we know that from here
537678c52d9eSJosef Bacik 		 * on out everything should be correct, and we can clear the
537778c52d9eSJosef Bacik 		 * ->restarted flag.
537878c52d9eSJosef Bacik 		 */
537978c52d9eSJosef Bacik 		if (wc->restarted) {
538078c52d9eSJosef Bacik 			ret = check_ref_exists(trans, root, bytenr, parent,
538178c52d9eSJosef Bacik 					       level - 1);
538278c52d9eSJosef Bacik 			if (ret < 0)
538378c52d9eSJosef Bacik 				goto out_unlock;
538478c52d9eSJosef Bacik 			if (ret == 0)
538578c52d9eSJosef Bacik 				goto no_delete;
538678c52d9eSJosef Bacik 			ret = 0;
538778c52d9eSJosef Bacik 			wc->restarted = 0;
538878c52d9eSJosef Bacik 		}
538978c52d9eSJosef Bacik 
539078c52d9eSJosef Bacik 		/*
53912cd86d30SQu Wenruo 		 * Reloc tree doesn't contribute to qgroup numbers, and we have
53922cd86d30SQu Wenruo 		 * already accounted them at merge time (replace_path),
53932cd86d30SQu Wenruo 		 * thus we could skip expensive subtree trace here.
53942cd86d30SQu Wenruo 		 */
53952cd86d30SQu Wenruo 		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
53962cd86d30SQu Wenruo 		    need_account) {
5397deb40627SLu Fengqi 			ret = btrfs_qgroup_trace_subtree(trans, next,
53981152651aSMark Fasheh 							 generation, level - 1);
53991152651aSMark Fasheh 			if (ret) {
54000b246afaSJeff Mahoney 				btrfs_err_rl(fs_info,
54015d163e0eSJeff Mahoney 					     "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
540294647322SDavid Sterba 					     ret);
54031152651aSMark Fasheh 			}
54041152651aSMark Fasheh 		}
5405aea6f028SJosef Bacik 
5406aea6f028SJosef Bacik 		/*
5407aea6f028SJosef Bacik 		 * We need to update the next key in our walk control so we can
5408aea6f028SJosef Bacik 		 * update the drop_progress key accordingly.  We don't care if
5409aea6f028SJosef Bacik 		 * find_next_key doesn't find a key because that means we're at
5410aea6f028SJosef Bacik 		 * the end and are going to clean up now.
5411aea6f028SJosef Bacik 		 */
5412aea6f028SJosef Bacik 		wc->drop_level = level;
5413aea6f028SJosef Bacik 		find_next_key(path, level, &wc->drop_progress);
5414aea6f028SJosef Bacik 
5415ffd4bb2aSQu Wenruo 		btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
5416ffd4bb2aSQu Wenruo 				       fs_info->nodesize, parent);
5417f42c5da6SNikolay Borisov 		btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
5418f42c5da6SNikolay Borisov 				    0, false);
5419ffd4bb2aSQu Wenruo 		ret = btrfs_free_extent(trans, &ref);
54204867268cSJosef Bacik 		if (ret)
54214867268cSJosef Bacik 			goto out_unlock;
542294fcca9fSYan, Zheng 	}
542378c52d9eSJosef Bacik no_delete:
54244867268cSJosef Bacik 	*lookup_info = 1;
54254867268cSJosef Bacik 	ret = 1;
54264867268cSJosef Bacik 
54274867268cSJosef Bacik out_unlock:
54281c4850e2SYan, Zheng 	btrfs_tree_unlock(next);
54291c4850e2SYan, Zheng 	free_extent_buffer(next);
54304867268cSJosef Bacik 
54314867268cSJosef Bacik 	return ret;
54321c4850e2SYan, Zheng }
54331c4850e2SYan, Zheng 
54341c4850e2SYan, Zheng /*
54352c016dc2SLiu Bo  * helper to process tree block while walking up the tree.
54362c47e605SYan Zheng  *
54372c47e605SYan Zheng  * when wc->stage == DROP_REFERENCE, this function drops
54382c47e605SYan Zheng  * reference count on the block.
54392c47e605SYan Zheng  *
54402c47e605SYan Zheng  * when wc->stage == UPDATE_BACKREF, this function changes
54412c47e605SYan Zheng  * wc->stage back to DROP_REFERENCE if we changed wc->stage
54422c47e605SYan Zheng  * to UPDATE_BACKREF previously while processing the block.
54432c47e605SYan Zheng  *
54442c47e605SYan Zheng  * NOTE: return value 1 means we should stop walking up.
54452c47e605SYan Zheng  */
walk_up_proc(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc)54462c47e605SYan Zheng static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
54472c47e605SYan Zheng 				 struct btrfs_root *root,
54482c47e605SYan Zheng 				 struct btrfs_path *path,
54492c47e605SYan Zheng 				 struct walk_control *wc)
54502c47e605SYan Zheng {
54510b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
5452f0486c68SYan, Zheng 	int ret;
54532c47e605SYan Zheng 	int level = wc->level;
54542c47e605SYan Zheng 	struct extent_buffer *eb = path->nodes[level];
54552c47e605SYan Zheng 	u64 parent = 0;
54562c47e605SYan Zheng 
54572c47e605SYan Zheng 	if (wc->stage == UPDATE_BACKREF) {
54582c47e605SYan Zheng 		BUG_ON(wc->shared_level < level);
54592c47e605SYan Zheng 		if (level < wc->shared_level)
54602c47e605SYan Zheng 			goto out;
54612c47e605SYan Zheng 
54622c47e605SYan Zheng 		ret = find_next_key(path, level + 1, &wc->update_progress);
54632c47e605SYan Zheng 		if (ret > 0)
54642c47e605SYan Zheng 			wc->update_ref = 0;
54652c47e605SYan Zheng 
54662c47e605SYan Zheng 		wc->stage = DROP_REFERENCE;
54672c47e605SYan Zheng 		wc->shared_level = -1;
54682c47e605SYan Zheng 		path->slots[level] = 0;
54692c47e605SYan Zheng 
54702c47e605SYan Zheng 		/*
54712c47e605SYan Zheng 		 * check reference count again if the block isn't locked.
54722c47e605SYan Zheng 		 * we should start walking down the tree again if reference
54732c47e605SYan Zheng 		 * count is one.
54742c47e605SYan Zheng 		 */
54752c47e605SYan Zheng 		if (!path->locks[level]) {
54762c47e605SYan Zheng 			BUG_ON(level == 0);
54772c47e605SYan Zheng 			btrfs_tree_lock(eb);
5478ac5887c8SJosef Bacik 			path->locks[level] = BTRFS_WRITE_LOCK;
54792c47e605SYan Zheng 
54802ff7e61eSJeff Mahoney 			ret = btrfs_lookup_extent_info(trans, fs_info,
54813173a18fSJosef Bacik 						       eb->start, level, 1,
54822c47e605SYan Zheng 						       &wc->refs[level],
54832c47e605SYan Zheng 						       &wc->flags[level]);
548479787eaaSJeff Mahoney 			if (ret < 0) {
548579787eaaSJeff Mahoney 				btrfs_tree_unlock_rw(eb, path->locks[level]);
54863268a246SLiu Bo 				path->locks[level] = 0;
548779787eaaSJeff Mahoney 				return ret;
548879787eaaSJeff Mahoney 			}
54892c47e605SYan Zheng 			BUG_ON(wc->refs[level] == 0);
54902c47e605SYan Zheng 			if (wc->refs[level] == 1) {
5491bd681513SChris Mason 				btrfs_tree_unlock_rw(eb, path->locks[level]);
54923268a246SLiu Bo 				path->locks[level] = 0;
54932c47e605SYan Zheng 				return 1;
54942c47e605SYan Zheng 			}
54952c47e605SYan Zheng 		}
54962c47e605SYan Zheng 	}
54972c47e605SYan Zheng 
54982c47e605SYan Zheng 	/* wc->stage == DROP_REFERENCE */
54992c47e605SYan Zheng 	BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
55002c47e605SYan Zheng 
55012c47e605SYan Zheng 	if (wc->refs[level] == 1) {
55022c47e605SYan Zheng 		if (level == 0) {
55032c47e605SYan Zheng 			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5504e339a6b0SJosef Bacik 				ret = btrfs_dec_ref(trans, root, eb, 1);
55052c47e605SYan Zheng 			else
5506e339a6b0SJosef Bacik 				ret = btrfs_dec_ref(trans, root, eb, 0);
550779787eaaSJeff Mahoney 			BUG_ON(ret); /* -ENOMEM */
5508c4140cbfSQu Wenruo 			if (is_fstree(root->root_key.objectid)) {
55098d38d7ebSLu Fengqi 				ret = btrfs_qgroup_trace_leaf_items(trans, eb);
55101152651aSMark Fasheh 				if (ret) {
55110b246afaSJeff Mahoney 					btrfs_err_rl(fs_info,
5512c4140cbfSQu Wenruo 	"error %d accounting leaf items, quota is out of sync, rescan required",
551394647322SDavid Sterba 					     ret);
55141152651aSMark Fasheh 				}
55152c47e605SYan Zheng 			}
5516c4140cbfSQu Wenruo 		}
5517190a8339SJosef Bacik 		/* Make block locked assertion in btrfs_clear_buffer_dirty happy. */
5518d3fb6615SJosef Bacik 		if (!path->locks[level]) {
55192c47e605SYan Zheng 			btrfs_tree_lock(eb);
5520ac5887c8SJosef Bacik 			path->locks[level] = BTRFS_WRITE_LOCK;
55212c47e605SYan Zheng 		}
5522190a8339SJosef Bacik 		btrfs_clear_buffer_dirty(trans, eb);
55232c47e605SYan Zheng 	}
55242c47e605SYan Zheng 
55252c47e605SYan Zheng 	if (eb == root->node) {
55262c47e605SYan Zheng 		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
55272c47e605SYan Zheng 			parent = eb->start;
552865c6e82bSQu Wenruo 		else if (root->root_key.objectid != btrfs_header_owner(eb))
552965c6e82bSQu Wenruo 			goto owner_mismatch;
55302c47e605SYan Zheng 	} else {
55312c47e605SYan Zheng 		if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
55322c47e605SYan Zheng 			parent = path->nodes[level + 1]->start;
553365c6e82bSQu Wenruo 		else if (root->root_key.objectid !=
553465c6e82bSQu Wenruo 			 btrfs_header_owner(path->nodes[level + 1]))
553565c6e82bSQu Wenruo 			goto owner_mismatch;
55362c47e605SYan Zheng 	}
55372c47e605SYan Zheng 
55387a163608SFilipe Manana 	btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
55397a163608SFilipe Manana 			      wc->refs[level] == 1);
55402c47e605SYan Zheng out:
55412c47e605SYan Zheng 	wc->refs[level] = 0;
55422c47e605SYan Zheng 	wc->flags[level] = 0;
5543f0486c68SYan, Zheng 	return 0;
554465c6e82bSQu Wenruo 
554565c6e82bSQu Wenruo owner_mismatch:
554665c6e82bSQu Wenruo 	btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
554765c6e82bSQu Wenruo 		     btrfs_header_owner(eb), root->root_key.objectid);
554865c6e82bSQu Wenruo 	return -EUCLEAN;
55492c47e605SYan Zheng }
55502c47e605SYan Zheng 
walk_down_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc)55515d4f98a2SYan Zheng static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5552f82d02d9SYan Zheng 				   struct btrfs_root *root,
55532c47e605SYan Zheng 				   struct btrfs_path *path,
55542c47e605SYan Zheng 				   struct walk_control *wc)
5555f82d02d9SYan Zheng {
55562c47e605SYan Zheng 	int level = wc->level;
555794fcca9fSYan, Zheng 	int lookup_info = 1;
55584e194384SJosef Bacik 	int ret = 0;
5559f82d02d9SYan Zheng 
55602c47e605SYan Zheng 	while (level >= 0) {
556194fcca9fSYan, Zheng 		ret = walk_down_proc(trans, root, path, wc, lookup_info);
55624e194384SJosef Bacik 		if (ret)
5563f82d02d9SYan Zheng 			break;
5564f82d02d9SYan Zheng 
55652c47e605SYan Zheng 		if (level == 0)
55662c47e605SYan Zheng 			break;
55672c47e605SYan Zheng 
55687a7965f8SYan, Zheng 		if (path->slots[level] >=
55697a7965f8SYan, Zheng 		    btrfs_header_nritems(path->nodes[level]))
55707a7965f8SYan, Zheng 			break;
55717a7965f8SYan, Zheng 
557294fcca9fSYan, Zheng 		ret = do_walk_down(trans, root, path, wc, &lookup_info);
55731c4850e2SYan, Zheng 		if (ret > 0) {
55741c4850e2SYan, Zheng 			path->slots[level]++;
55751c4850e2SYan, Zheng 			continue;
557690d2c51dSMiao Xie 		} else if (ret < 0)
55774e194384SJosef Bacik 			break;
55781c4850e2SYan, Zheng 		level = wc->level;
5579f82d02d9SYan Zheng 	}
55804e194384SJosef Bacik 	return (ret == 1) ? 0 : ret;
5581f82d02d9SYan Zheng }
5582f82d02d9SYan Zheng 
walk_up_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int max_level)5583d397712bSChris Mason static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
558498ed5174SChris Mason 				 struct btrfs_root *root,
5585f82d02d9SYan Zheng 				 struct btrfs_path *path,
55862c47e605SYan Zheng 				 struct walk_control *wc, int max_level)
558720524f02SChris Mason {
55882c47e605SYan Zheng 	int level = wc->level;
558920524f02SChris Mason 	int ret;
55909f3a7427SChris Mason 
55912c47e605SYan Zheng 	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
55922c47e605SYan Zheng 	while (level < max_level && path->nodes[level]) {
55932c47e605SYan Zheng 		wc->level = level;
55942c47e605SYan Zheng 		if (path->slots[level] + 1 <
55952c47e605SYan Zheng 		    btrfs_header_nritems(path->nodes[level])) {
55962c47e605SYan Zheng 			path->slots[level]++;
559720524f02SChris Mason 			return 0;
559820524f02SChris Mason 		} else {
55992c47e605SYan Zheng 			ret = walk_up_proc(trans, root, path, wc);
56002c47e605SYan Zheng 			if (ret > 0)
56012c47e605SYan Zheng 				return 0;
560265c6e82bSQu Wenruo 			if (ret < 0)
560365c6e82bSQu Wenruo 				return ret;
5604bd56b302SChris Mason 
56052c47e605SYan Zheng 			if (path->locks[level]) {
5606bd681513SChris Mason 				btrfs_tree_unlock_rw(path->nodes[level],
5607bd681513SChris Mason 						     path->locks[level]);
56082c47e605SYan Zheng 				path->locks[level] = 0;
5609f82d02d9SYan Zheng 			}
56102c47e605SYan Zheng 			free_extent_buffer(path->nodes[level]);
56112c47e605SYan Zheng 			path->nodes[level] = NULL;
56122c47e605SYan Zheng 			level++;
561320524f02SChris Mason 		}
561420524f02SChris Mason 	}
561520524f02SChris Mason 	return 1;
561620524f02SChris Mason }
561720524f02SChris Mason 
56189aca1d51SChris Mason /*
56192c47e605SYan Zheng  * drop a subvolume tree.
56202c47e605SYan Zheng  *
56212c47e605SYan Zheng  * this function traverses the tree freeing any blocks that only
56222c47e605SYan Zheng  * referenced by the tree.
56232c47e605SYan Zheng  *
56242c47e605SYan Zheng  * when a shared tree block is found. this function decreases its
56252c47e605SYan Zheng  * reference count by one. if update_ref is true, this function
56262c47e605SYan Zheng  * also make sure backrefs for the shared block and all lower level
56272c47e605SYan Zheng  * blocks are properly updated.
56289d1a2a3aSDavid Sterba  *
56299d1a2a3aSDavid Sterba  * If called with for_reloc == 0, may exit early with -EAGAIN
56309aca1d51SChris Mason  */
btrfs_drop_snapshot(struct btrfs_root * root,int update_ref,int for_reloc)56310078a9f9SNikolay Borisov int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
563220524f02SChris Mason {
563312a824dcSFilipe Manana 	const bool is_reloc_root = (root->root_key.objectid ==
563412a824dcSFilipe Manana 				    BTRFS_TREE_RELOC_OBJECTID);
5635ab8d0fc4SJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
56365caf2a00SChris Mason 	struct btrfs_path *path;
56372c47e605SYan Zheng 	struct btrfs_trans_handle *trans;
5638ab8d0fc4SJeff Mahoney 	struct btrfs_root *tree_root = fs_info->tree_root;
56399f3a7427SChris Mason 	struct btrfs_root_item *root_item = &root->root_item;
56402c47e605SYan Zheng 	struct walk_control *wc;
56412c47e605SYan Zheng 	struct btrfs_key key;
56422c47e605SYan Zheng 	int err = 0;
56432c47e605SYan Zheng 	int ret;
56442c47e605SYan Zheng 	int level;
5645d29a9f62SJosef Bacik 	bool root_dropped = false;
5646b4be6aefSJosef Bacik 	bool unfinished_drop = false;
564720524f02SChris Mason 
56484fd786e6SMisono Tomohiro 	btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
56491152651aSMark Fasheh 
56505caf2a00SChris Mason 	path = btrfs_alloc_path();
5651cb1b69f4STsutomu Itoh 	if (!path) {
5652cb1b69f4STsutomu Itoh 		err = -ENOMEM;
5653cb1b69f4STsutomu Itoh 		goto out;
5654cb1b69f4STsutomu Itoh 	}
565520524f02SChris Mason 
56562c47e605SYan Zheng 	wc = kzalloc(sizeof(*wc), GFP_NOFS);
565738a1a919SMark Fasheh 	if (!wc) {
565838a1a919SMark Fasheh 		btrfs_free_path(path);
5659cb1b69f4STsutomu Itoh 		err = -ENOMEM;
5660cb1b69f4STsutomu Itoh 		goto out;
566138a1a919SMark Fasheh 	}
56622c47e605SYan Zheng 
5663f3e3d9ccSQu Wenruo 	/*
5664f3e3d9ccSQu Wenruo 	 * Use join to avoid potential EINTR from transaction start. See
5665f3e3d9ccSQu Wenruo 	 * wait_reserve_ticket and the whole reservation callchain.
5666f3e3d9ccSQu Wenruo 	 */
5667f3e3d9ccSQu Wenruo 	if (for_reloc)
5668f3e3d9ccSQu Wenruo 		trans = btrfs_join_transaction(tree_root);
5669f3e3d9ccSQu Wenruo 	else
5670a22285a6SYan, Zheng 		trans = btrfs_start_transaction(tree_root, 0);
567179787eaaSJeff Mahoney 	if (IS_ERR(trans)) {
567279787eaaSJeff Mahoney 		err = PTR_ERR(trans);
567379787eaaSJeff Mahoney 		goto out_free;
567479787eaaSJeff Mahoney 	}
567598d5dc13STsutomu Itoh 
56760568e82dSJosef Bacik 	err = btrfs_run_delayed_items(trans);
56770568e82dSJosef Bacik 	if (err)
56780568e82dSJosef Bacik 		goto out_end_trans;
56790568e82dSJosef Bacik 
568083354f07SJosef Bacik 	/*
568183354f07SJosef Bacik 	 * This will help us catch people modifying the fs tree while we're
568283354f07SJosef Bacik 	 * dropping it.  It is unsafe to mess with the fs tree while it's being
568383354f07SJosef Bacik 	 * dropped as we unlock the root node and parent nodes as we walk down
568483354f07SJosef Bacik 	 * the tree, assuming nothing will change.  If something does change
568583354f07SJosef Bacik 	 * then we'll have stale information and drop references to blocks we've
568683354f07SJosef Bacik 	 * already dropped.
568783354f07SJosef Bacik 	 */
568883354f07SJosef Bacik 	set_bit(BTRFS_ROOT_DELETING, &root->state);
5689b4be6aefSJosef Bacik 	unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
5690b4be6aefSJosef Bacik 
56919f3a7427SChris Mason 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
56922c47e605SYan Zheng 		level = btrfs_header_level(root->node);
56935d4f98a2SYan Zheng 		path->nodes[level] = btrfs_lock_root_node(root);
56945caf2a00SChris Mason 		path->slots[level] = 0;
5695ac5887c8SJosef Bacik 		path->locks[level] = BTRFS_WRITE_LOCK;
56962c47e605SYan Zheng 		memset(&wc->update_progress, 0,
56972c47e605SYan Zheng 		       sizeof(wc->update_progress));
56989f3a7427SChris Mason 	} else {
56999f3a7427SChris Mason 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
57002c47e605SYan Zheng 		memcpy(&wc->update_progress, &key,
57012c47e605SYan Zheng 		       sizeof(wc->update_progress));
57022c47e605SYan Zheng 
5703c8422684SDavid Sterba 		level = btrfs_root_drop_level(root_item);
57042c47e605SYan Zheng 		BUG_ON(level == 0);
57056702ed49SChris Mason 		path->lowest_level = level;
57062c47e605SYan Zheng 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
57072c47e605SYan Zheng 		path->lowest_level = 0;
57082c47e605SYan Zheng 		if (ret < 0) {
57092c47e605SYan Zheng 			err = ret;
571079787eaaSJeff Mahoney 			goto out_end_trans;
57119f3a7427SChris Mason 		}
57121c4850e2SYan, Zheng 		WARN_ON(ret > 0);
57132c47e605SYan Zheng 
57147d9eb12cSChris Mason 		/*
57157d9eb12cSChris Mason 		 * unlock our path, this is safe because only this
57167d9eb12cSChris Mason 		 * function is allowed to delete this snapshot
57177d9eb12cSChris Mason 		 */
57185d4f98a2SYan Zheng 		btrfs_unlock_up_safe(path, 0);
57199aca1d51SChris Mason 
57202c47e605SYan Zheng 		level = btrfs_header_level(root->node);
57212c47e605SYan Zheng 		while (1) {
57222c47e605SYan Zheng 			btrfs_tree_lock(path->nodes[level]);
5723ac5887c8SJosef Bacik 			path->locks[level] = BTRFS_WRITE_LOCK;
57242c47e605SYan Zheng 
57252ff7e61eSJeff Mahoney 			ret = btrfs_lookup_extent_info(trans, fs_info,
57262c47e605SYan Zheng 						path->nodes[level]->start,
57273173a18fSJosef Bacik 						level, 1, &wc->refs[level],
57282c47e605SYan Zheng 						&wc->flags[level]);
572979787eaaSJeff Mahoney 			if (ret < 0) {
573079787eaaSJeff Mahoney 				err = ret;
573179787eaaSJeff Mahoney 				goto out_end_trans;
573279787eaaSJeff Mahoney 			}
57332c47e605SYan Zheng 			BUG_ON(wc->refs[level] == 0);
57342c47e605SYan Zheng 
5735c8422684SDavid Sterba 			if (level == btrfs_root_drop_level(root_item))
573620524f02SChris Mason 				break;
57372c47e605SYan Zheng 
57382c47e605SYan Zheng 			btrfs_tree_unlock(path->nodes[level]);
5739fec386acSJosef Bacik 			path->locks[level] = 0;
57402c47e605SYan Zheng 			WARN_ON(wc->refs[level] != 1);
57412c47e605SYan Zheng 			level--;
57422c47e605SYan Zheng 		}
57432c47e605SYan Zheng 	}
57442c47e605SYan Zheng 
574578c52d9eSJosef Bacik 	wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
57462c47e605SYan Zheng 	wc->level = level;
57472c47e605SYan Zheng 	wc->shared_level = -1;
57482c47e605SYan Zheng 	wc->stage = DROP_REFERENCE;
57492c47e605SYan Zheng 	wc->update_ref = update_ref;
57502c47e605SYan Zheng 	wc->keep_locks = 0;
57510b246afaSJeff Mahoney 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
57522c47e605SYan Zheng 
57532c47e605SYan Zheng 	while (1) {
57549d1a2a3aSDavid Sterba 
57552c47e605SYan Zheng 		ret = walk_down_tree(trans, root, path, wc);
57562c47e605SYan Zheng 		if (ret < 0) {
57579a93b5a3SJosef Bacik 			btrfs_abort_transaction(trans, ret);
57582c47e605SYan Zheng 			err = ret;
57592c47e605SYan Zheng 			break;
57602c47e605SYan Zheng 		}
57612c47e605SYan Zheng 
57622c47e605SYan Zheng 		ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
57632c47e605SYan Zheng 		if (ret < 0) {
57649a93b5a3SJosef Bacik 			btrfs_abort_transaction(trans, ret);
57652c47e605SYan Zheng 			err = ret;
57662c47e605SYan Zheng 			break;
57672c47e605SYan Zheng 		}
57682c47e605SYan Zheng 
57692c47e605SYan Zheng 		if (ret > 0) {
57702c47e605SYan Zheng 			BUG_ON(wc->stage != DROP_REFERENCE);
57712c47e605SYan Zheng 			break;
57722c47e605SYan Zheng 		}
57732c47e605SYan Zheng 
57742c47e605SYan Zheng 		if (wc->stage == DROP_REFERENCE) {
5775aea6f028SJosef Bacik 			wc->drop_level = wc->level;
5776aea6f028SJosef Bacik 			btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5777aea6f028SJosef Bacik 					      &wc->drop_progress,
5778aea6f028SJosef Bacik 					      path->slots[wc->drop_level]);
57792c47e605SYan Zheng 		}
5780aea6f028SJosef Bacik 		btrfs_cpu_key_to_disk(&root_item->drop_progress,
5781aea6f028SJosef Bacik 				      &wc->drop_progress);
5782c8422684SDavid Sterba 		btrfs_set_root_drop_level(root_item, wc->drop_level);
57832c47e605SYan Zheng 
57842c47e605SYan Zheng 		BUG_ON(wc->level == 0);
57853a45bb20SJeff Mahoney 		if (btrfs_should_end_transaction(trans) ||
57862ff7e61eSJeff Mahoney 		    (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
57872c47e605SYan Zheng 			ret = btrfs_update_root(trans, tree_root,
57882c47e605SYan Zheng 						&root->root_key,
57892c47e605SYan Zheng 						root_item);
579079787eaaSJeff Mahoney 			if (ret) {
579166642832SJeff Mahoney 				btrfs_abort_transaction(trans, ret);
579279787eaaSJeff Mahoney 				err = ret;
579379787eaaSJeff Mahoney 				goto out_end_trans;
579479787eaaSJeff Mahoney 			}
57952c47e605SYan Zheng 
579612a824dcSFilipe Manana 			if (!is_reloc_root)
579712a824dcSFilipe Manana 				btrfs_set_last_root_drop_gen(fs_info, trans->transid);
579812a824dcSFilipe Manana 
57993a45bb20SJeff Mahoney 			btrfs_end_transaction_throttle(trans);
58002ff7e61eSJeff Mahoney 			if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5801ab8d0fc4SJeff Mahoney 				btrfs_debug(fs_info,
5802ab8d0fc4SJeff Mahoney 					    "drop snapshot early exit");
58033c8f2422SJosef Bacik 				err = -EAGAIN;
58043c8f2422SJosef Bacik 				goto out_free;
58053c8f2422SJosef Bacik 			}
58063c8f2422SJosef Bacik 
580718d3bff4SJosef Bacik 		       /*
580818d3bff4SJosef Bacik 			* Use join to avoid potential EINTR from transaction
580918d3bff4SJosef Bacik 			* start. See wait_reserve_ticket and the whole
581018d3bff4SJosef Bacik 			* reservation callchain.
581118d3bff4SJosef Bacik 			*/
581218d3bff4SJosef Bacik 			if (for_reloc)
581318d3bff4SJosef Bacik 				trans = btrfs_join_transaction(tree_root);
581418d3bff4SJosef Bacik 			else
5815a22285a6SYan, Zheng 				trans = btrfs_start_transaction(tree_root, 0);
581679787eaaSJeff Mahoney 			if (IS_ERR(trans)) {
581779787eaaSJeff Mahoney 				err = PTR_ERR(trans);
581879787eaaSJeff Mahoney 				goto out_free;
581979787eaaSJeff Mahoney 			}
5820c3e69d58SChris Mason 		}
58219f3a7427SChris Mason 	}
5822b3b4aa74SDavid Sterba 	btrfs_release_path(path);
582379787eaaSJeff Mahoney 	if (err)
582479787eaaSJeff Mahoney 		goto out_end_trans;
58252c47e605SYan Zheng 
5826ab9ce7d4SLu Fengqi 	ret = btrfs_del_root(trans, &root->root_key);
582779787eaaSJeff Mahoney 	if (ret) {
582866642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
5829e19182c0SJeff Mahoney 		err = ret;
583079787eaaSJeff Mahoney 		goto out_end_trans;
583179787eaaSJeff Mahoney 	}
58322c47e605SYan Zheng 
583312a824dcSFilipe Manana 	if (!is_reloc_root) {
5834cb517eabSMiao Xie 		ret = btrfs_find_root(tree_root, &root->root_key, path,
583576dda93cSYan, Zheng 				      NULL, NULL);
583679787eaaSJeff Mahoney 		if (ret < 0) {
583766642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
583879787eaaSJeff Mahoney 			err = ret;
583979787eaaSJeff Mahoney 			goto out_end_trans;
584079787eaaSJeff Mahoney 		} else if (ret > 0) {
584184cd948cSJosef Bacik 			/* if we fail to delete the orphan item this time
584284cd948cSJosef Bacik 			 * around, it'll get picked up the next time.
584384cd948cSJosef Bacik 			 *
584484cd948cSJosef Bacik 			 * The most common failure here is just -ENOENT.
584584cd948cSJosef Bacik 			 */
584684cd948cSJosef Bacik 			btrfs_del_orphan_item(trans, tree_root,
584776dda93cSYan, Zheng 					      root->root_key.objectid);
584876dda93cSYan, Zheng 		}
584976dda93cSYan, Zheng 	}
585076dda93cSYan, Zheng 
5851a3cf0e43SQu Wenruo 	/*
5852a3cf0e43SQu Wenruo 	 * This subvolume is going to be completely dropped, and won't be
5853a3cf0e43SQu Wenruo 	 * recorded as dirty roots, thus pertrans meta rsv will not be freed at
5854a3cf0e43SQu Wenruo 	 * commit transaction time.  So free it here manually.
5855a3cf0e43SQu Wenruo 	 */
5856a3cf0e43SQu Wenruo 	btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
5857a3cf0e43SQu Wenruo 	btrfs_qgroup_free_meta_all_pertrans(root);
5858a3cf0e43SQu Wenruo 
5859fc7cbcd4SDavid Sterba 	if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
58602b9dbef2SJosef Bacik 		btrfs_add_dropped_root(trans, root);
58618c38938cSJosef Bacik 	else
586200246528SJosef Bacik 		btrfs_put_root(root);
5863d29a9f62SJosef Bacik 	root_dropped = true;
586479787eaaSJeff Mahoney out_end_trans:
586512a824dcSFilipe Manana 	if (!is_reloc_root)
586612a824dcSFilipe Manana 		btrfs_set_last_root_drop_gen(fs_info, trans->transid);
586712a824dcSFilipe Manana 
58683a45bb20SJeff Mahoney 	btrfs_end_transaction_throttle(trans);
586979787eaaSJeff Mahoney out_free:
58702c47e605SYan Zheng 	kfree(wc);
58715caf2a00SChris Mason 	btrfs_free_path(path);
5872cb1b69f4STsutomu Itoh out:
5873d29a9f62SJosef Bacik 	/*
5874b4be6aefSJosef Bacik 	 * We were an unfinished drop root, check to see if there are any
5875b4be6aefSJosef Bacik 	 * pending, and if not clear and wake up any waiters.
5876b4be6aefSJosef Bacik 	 */
5877b4be6aefSJosef Bacik 	if (!err && unfinished_drop)
5878b4be6aefSJosef Bacik 		btrfs_maybe_wake_unfinished_drop(fs_info);
5879b4be6aefSJosef Bacik 
5880b4be6aefSJosef Bacik 	/*
5881d29a9f62SJosef Bacik 	 * So if we need to stop dropping the snapshot for whatever reason we
5882d29a9f62SJosef Bacik 	 * need to make sure to add it back to the dead root list so that we
5883d29a9f62SJosef Bacik 	 * keep trying to do the work later.  This also cleans up roots if we
5884d29a9f62SJosef Bacik 	 * don't have it in the radix (like when we recover after a power fail
5885d29a9f62SJosef Bacik 	 * or unmount) so we don't leak memory.
5886d29a9f62SJosef Bacik 	 */
5887897ca819SThomas Meyer 	if (!for_reloc && !root_dropped)
5888d29a9f62SJosef Bacik 		btrfs_add_dead_root(root);
58892c536799SJeff Mahoney 	return err;
589020524f02SChris Mason }
58919078a3e1SChris Mason 
58922c47e605SYan Zheng /*
58932c47e605SYan Zheng  * drop subtree rooted at tree block 'node'.
58942c47e605SYan Zheng  *
58952c47e605SYan Zheng  * NOTE: this function will unlock and release tree block 'node'
589666d7e7f0SArne Jansen  * only used by relocation code
58972c47e605SYan Zheng  */
btrfs_drop_subtree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * node,struct extent_buffer * parent)5898f82d02d9SYan Zheng int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5899f82d02d9SYan Zheng 			struct btrfs_root *root,
5900f82d02d9SYan Zheng 			struct extent_buffer *node,
5901f82d02d9SYan Zheng 			struct extent_buffer *parent)
5902f82d02d9SYan Zheng {
59030b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
5904f82d02d9SYan Zheng 	struct btrfs_path *path;
59052c47e605SYan Zheng 	struct walk_control *wc;
5906f82d02d9SYan Zheng 	int level;
5907f82d02d9SYan Zheng 	int parent_level;
5908f82d02d9SYan Zheng 	int ret = 0;
5909f82d02d9SYan Zheng 	int wret;
5910f82d02d9SYan Zheng 
59112c47e605SYan Zheng 	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
59122c47e605SYan Zheng 
5913f82d02d9SYan Zheng 	path = btrfs_alloc_path();
5914db5b493aSTsutomu Itoh 	if (!path)
5915db5b493aSTsutomu Itoh 		return -ENOMEM;
5916f82d02d9SYan Zheng 
59172c47e605SYan Zheng 	wc = kzalloc(sizeof(*wc), GFP_NOFS);
5918db5b493aSTsutomu Itoh 	if (!wc) {
5919db5b493aSTsutomu Itoh 		btrfs_free_path(path);
5920db5b493aSTsutomu Itoh 		return -ENOMEM;
5921db5b493aSTsutomu Itoh 	}
59222c47e605SYan Zheng 
592349d0c642SFilipe Manana 	btrfs_assert_tree_write_locked(parent);
5924f82d02d9SYan Zheng 	parent_level = btrfs_header_level(parent);
592567439dadSDavid Sterba 	atomic_inc(&parent->refs);
5926f82d02d9SYan Zheng 	path->nodes[parent_level] = parent;
5927f82d02d9SYan Zheng 	path->slots[parent_level] = btrfs_header_nritems(parent);
5928f82d02d9SYan Zheng 
592949d0c642SFilipe Manana 	btrfs_assert_tree_write_locked(node);
5930f82d02d9SYan Zheng 	level = btrfs_header_level(node);
5931f82d02d9SYan Zheng 	path->nodes[level] = node;
5932f82d02d9SYan Zheng 	path->slots[level] = 0;
5933ac5887c8SJosef Bacik 	path->locks[level] = BTRFS_WRITE_LOCK;
59342c47e605SYan Zheng 
59352c47e605SYan Zheng 	wc->refs[parent_level] = 1;
59362c47e605SYan Zheng 	wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
59372c47e605SYan Zheng 	wc->level = level;
59382c47e605SYan Zheng 	wc->shared_level = -1;
59392c47e605SYan Zheng 	wc->stage = DROP_REFERENCE;
59402c47e605SYan Zheng 	wc->update_ref = 0;
59412c47e605SYan Zheng 	wc->keep_locks = 1;
59420b246afaSJeff Mahoney 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5943f82d02d9SYan Zheng 
5944f82d02d9SYan Zheng 	while (1) {
59452c47e605SYan Zheng 		wret = walk_down_tree(trans, root, path, wc);
59462c47e605SYan Zheng 		if (wret < 0) {
5947f82d02d9SYan Zheng 			ret = wret;
5948f82d02d9SYan Zheng 			break;
59492c47e605SYan Zheng 		}
5950f82d02d9SYan Zheng 
59512c47e605SYan Zheng 		wret = walk_up_tree(trans, root, path, wc, parent_level);
5952f82d02d9SYan Zheng 		if (wret < 0)
5953f82d02d9SYan Zheng 			ret = wret;
5954f82d02d9SYan Zheng 		if (wret != 0)
5955f82d02d9SYan Zheng 			break;
5956f82d02d9SYan Zheng 	}
5957f82d02d9SYan Zheng 
59582c47e605SYan Zheng 	kfree(wc);
5959f82d02d9SYan Zheng 	btrfs_free_path(path);
5960f82d02d9SYan Zheng 	return ret;
5961f82d02d9SYan Zheng }
5962f82d02d9SYan Zheng 
btrfs_error_unpin_extent_range(struct btrfs_fs_info * fs_info,u64 start,u64 end)59632ff7e61eSJeff Mahoney int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
59642ff7e61eSJeff Mahoney 				   u64 start, u64 end)
5965acce952bSliubo {
59662ff7e61eSJeff Mahoney 	return unpin_extent_range(fs_info, start, end, false);
5967acce952bSliubo }
5968acce952bSliubo 
5969499f377fSJeff Mahoney /*
5970499f377fSJeff Mahoney  * It used to be that old block groups would be left around forever.
5971499f377fSJeff Mahoney  * Iterating over them would be enough to trim unused space.  Since we
5972499f377fSJeff Mahoney  * now automatically remove them, we also need to iterate over unallocated
5973499f377fSJeff Mahoney  * space.
5974499f377fSJeff Mahoney  *
5975499f377fSJeff Mahoney  * We don't want a transaction for this since the discard may take a
5976499f377fSJeff Mahoney  * substantial amount of time.  We don't require that a transaction be
5977499f377fSJeff Mahoney  * running, but we do need to take a running transaction into account
5978fee7acc3SJeff Mahoney  * to ensure that we're not discarding chunks that were released or
5979fee7acc3SJeff Mahoney  * allocated in the current transaction.
5980499f377fSJeff Mahoney  *
5981499f377fSJeff Mahoney  * Holding the chunks lock will prevent other threads from allocating
5982499f377fSJeff Mahoney  * or releasing chunks, but it won't prevent a running transaction
5983499f377fSJeff Mahoney  * from committing and releasing the memory that the pending chunks
5984499f377fSJeff Mahoney  * list head uses.  For that, we need to take a reference to the
5985fee7acc3SJeff Mahoney  * transaction and hold the commit root sem.  We only need to hold
5986fee7acc3SJeff Mahoney  * it while performing the free space search since we have already
5987fee7acc3SJeff Mahoney  * held back allocations.
5988499f377fSJeff Mahoney  */
btrfs_trim_free_extents(struct btrfs_device * device,u64 * trimmed)59898103d10bSNikolay Borisov static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5990499f377fSJeff Mahoney {
599137f85ec3SQu Wenruo 	u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
5992499f377fSJeff Mahoney 	int ret;
5993499f377fSJeff Mahoney 
5994499f377fSJeff Mahoney 	*trimmed = 0;
5995499f377fSJeff Mahoney 
59960be88e36SJeff Mahoney 	/* Discard not supported = nothing to do. */
599770200574SChristoph Hellwig 	if (!bdev_max_discard_sectors(device->bdev))
59980be88e36SJeff Mahoney 		return 0;
59990be88e36SJeff Mahoney 
600052042d8eSAndrea Gelmini 	/* Not writable = nothing to do. */
6001ebbede42SAnand Jain 	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
6002499f377fSJeff Mahoney 		return 0;
6003499f377fSJeff Mahoney 
6004499f377fSJeff Mahoney 	/* No free space = nothing to do. */
6005499f377fSJeff Mahoney 	if (device->total_bytes <= device->bytes_used)
6006499f377fSJeff Mahoney 		return 0;
6007499f377fSJeff Mahoney 
6008499f377fSJeff Mahoney 	ret = 0;
6009499f377fSJeff Mahoney 
6010499f377fSJeff Mahoney 	while (1) {
6011fb456252SJeff Mahoney 		struct btrfs_fs_info *fs_info = device->fs_info;
6012499f377fSJeff Mahoney 		u64 bytes;
6013499f377fSJeff Mahoney 
6014499f377fSJeff Mahoney 		ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
6015499f377fSJeff Mahoney 		if (ret)
6016fee7acc3SJeff Mahoney 			break;
6017499f377fSJeff Mahoney 
6018929be17aSNikolay Borisov 		find_first_clear_extent_bit(&device->alloc_state, start,
6019929be17aSNikolay Borisov 					    &start, &end,
6020929be17aSNikolay Borisov 					    CHUNK_TRIMMED | CHUNK_ALLOCATED);
602153460a45SNikolay Borisov 
6022c57dd1f2SQu Wenruo 		/* Check if there are any CHUNK_* bits left */
6023c57dd1f2SQu Wenruo 		if (start > device->total_bytes) {
6024c57dd1f2SQu Wenruo 			WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
6025c57dd1f2SQu Wenruo 			btrfs_warn_in_rcu(fs_info,
6026c57dd1f2SQu Wenruo "ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
6027c57dd1f2SQu Wenruo 					  start, end - start + 1,
6028cb3e217bSQu Wenruo 					  btrfs_dev_name(device),
6029c57dd1f2SQu Wenruo 					  device->total_bytes);
6030c57dd1f2SQu Wenruo 			mutex_unlock(&fs_info->chunk_mutex);
6031c57dd1f2SQu Wenruo 			ret = 0;
6032c57dd1f2SQu Wenruo 			break;
6033c57dd1f2SQu Wenruo 		}
6034c57dd1f2SQu Wenruo 
603537f85ec3SQu Wenruo 		/* Ensure we skip the reserved space on each device. */
603637f85ec3SQu Wenruo 		start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
603753460a45SNikolay Borisov 
6038929be17aSNikolay Borisov 		/*
6039929be17aSNikolay Borisov 		 * If find_first_clear_extent_bit find a range that spans the
6040929be17aSNikolay Borisov 		 * end of the device it will set end to -1, in this case it's up
6041929be17aSNikolay Borisov 		 * to the caller to trim the value to the size of the device.
6042929be17aSNikolay Borisov 		 */
6043929be17aSNikolay Borisov 		end = min(end, device->total_bytes - 1);
604453460a45SNikolay Borisov 
6045929be17aSNikolay Borisov 		len = end - start + 1;
6046499f377fSJeff Mahoney 
6047929be17aSNikolay Borisov 		/* We didn't find any extents */
6048929be17aSNikolay Borisov 		if (!len) {
6049499f377fSJeff Mahoney 			mutex_unlock(&fs_info->chunk_mutex);
6050499f377fSJeff Mahoney 			ret = 0;
6051499f377fSJeff Mahoney 			break;
6052499f377fSJeff Mahoney 		}
6053499f377fSJeff Mahoney 
60548811133dSNikolay Borisov 		ret = btrfs_issue_discard(device->bdev, start, len,
60558811133dSNikolay Borisov 					  &bytes);
60568811133dSNikolay Borisov 		if (!ret)
60570acd32c2SDavid Sterba 			set_extent_bit(&device->alloc_state, start,
60581d126800SDavid Sterba 				       start + bytes - 1, CHUNK_TRIMMED, NULL);
6059499f377fSJeff Mahoney 		mutex_unlock(&fs_info->chunk_mutex);
6060499f377fSJeff Mahoney 
6061499f377fSJeff Mahoney 		if (ret)
6062499f377fSJeff Mahoney 			break;
6063499f377fSJeff Mahoney 
6064499f377fSJeff Mahoney 		start += len;
6065499f377fSJeff Mahoney 		*trimmed += bytes;
6066499f377fSJeff Mahoney 
6067499f377fSJeff Mahoney 		if (fatal_signal_pending(current)) {
6068499f377fSJeff Mahoney 			ret = -ERESTARTSYS;
6069499f377fSJeff Mahoney 			break;
6070499f377fSJeff Mahoney 		}
6071499f377fSJeff Mahoney 
6072499f377fSJeff Mahoney 		cond_resched();
6073499f377fSJeff Mahoney 	}
6074499f377fSJeff Mahoney 
6075499f377fSJeff Mahoney 	return ret;
6076499f377fSJeff Mahoney }
6077499f377fSJeff Mahoney 
607893bba24dSQu Wenruo /*
607993bba24dSQu Wenruo  * Trim the whole filesystem by:
608093bba24dSQu Wenruo  * 1) trimming the free space in each block group
608193bba24dSQu Wenruo  * 2) trimming the unallocated space on each device
608293bba24dSQu Wenruo  *
608393bba24dSQu Wenruo  * This will also continue trimming even if a block group or device encounters
608493bba24dSQu Wenruo  * an error.  The return value will be the last error, or 0 if nothing bad
608593bba24dSQu Wenruo  * happens.
608693bba24dSQu Wenruo  */
btrfs_trim_fs(struct btrfs_fs_info * fs_info,struct fstrim_range * range)60872ff7e61eSJeff Mahoney int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
6088f7039b1dSLi Dongyang {
608923608d51SAnand Jain 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
609032da5386SDavid Sterba 	struct btrfs_block_group *cache = NULL;
6091499f377fSJeff Mahoney 	struct btrfs_device *device;
6092f7039b1dSLi Dongyang 	u64 group_trimmed;
609307301df7SQu Wenruo 	u64 range_end = U64_MAX;
6094f7039b1dSLi Dongyang 	u64 start;
6095f7039b1dSLi Dongyang 	u64 end;
6096f7039b1dSLi Dongyang 	u64 trimmed = 0;
609793bba24dSQu Wenruo 	u64 bg_failed = 0;
609893bba24dSQu Wenruo 	u64 dev_failed = 0;
609993bba24dSQu Wenruo 	int bg_ret = 0;
610093bba24dSQu Wenruo 	int dev_ret = 0;
6101f7039b1dSLi Dongyang 	int ret = 0;
6102f7039b1dSLi Dongyang 
6103f981fec1SJosef Bacik 	if (range->start == U64_MAX)
6104f981fec1SJosef Bacik 		return -EINVAL;
6105f981fec1SJosef Bacik 
610607301df7SQu Wenruo 	/*
610707301df7SQu Wenruo 	 * Check range overflow if range->len is set.
610807301df7SQu Wenruo 	 * The default range->len is U64_MAX.
610907301df7SQu Wenruo 	 */
611007301df7SQu Wenruo 	if (range->len != U64_MAX &&
611107301df7SQu Wenruo 	    check_add_overflow(range->start, range->len, &range_end))
611207301df7SQu Wenruo 		return -EINVAL;
611307301df7SQu Wenruo 
61142cac13e4SLiu Bo 	cache = btrfs_lookup_first_block_group(fs_info, range->start);
61152e405ad8SJosef Bacik 	for (; cache; cache = btrfs_next_block_group(cache)) {
6116b3470b5dSDavid Sterba 		if (cache->start >= range_end) {
6117f7039b1dSLi Dongyang 			btrfs_put_block_group(cache);
6118f7039b1dSLi Dongyang 			break;
6119f7039b1dSLi Dongyang 		}
6120f7039b1dSLi Dongyang 
6121b3470b5dSDavid Sterba 		start = max(range->start, cache->start);
6122b3470b5dSDavid Sterba 		end = min(range_end, cache->start + cache->length);
6123f7039b1dSLi Dongyang 
6124f7039b1dSLi Dongyang 		if (end - start >= range->minlen) {
612532da5386SDavid Sterba 			if (!btrfs_block_group_done(cache)) {
6126ced8ecf0SOmar Sandoval 				ret = btrfs_cache_block_group(cache, true);
61271be41b78SJosef Bacik 				if (ret) {
612893bba24dSQu Wenruo 					bg_failed++;
612993bba24dSQu Wenruo 					bg_ret = ret;
613093bba24dSQu Wenruo 					continue;
61311be41b78SJosef Bacik 				}
6132f7039b1dSLi Dongyang 			}
6133f7039b1dSLi Dongyang 			ret = btrfs_trim_block_group(cache,
6134f7039b1dSLi Dongyang 						     &group_trimmed,
6135f7039b1dSLi Dongyang 						     start,
6136f7039b1dSLi Dongyang 						     end,
6137f7039b1dSLi Dongyang 						     range->minlen);
6138f7039b1dSLi Dongyang 
6139f7039b1dSLi Dongyang 			trimmed += group_trimmed;
6140f7039b1dSLi Dongyang 			if (ret) {
614193bba24dSQu Wenruo 				bg_failed++;
614293bba24dSQu Wenruo 				bg_ret = ret;
614393bba24dSQu Wenruo 				continue;
614493bba24dSQu Wenruo 			}
6145f7039b1dSLi Dongyang 		}
6146f7039b1dSLi Dongyang 	}
6147f7039b1dSLi Dongyang 
614893bba24dSQu Wenruo 	if (bg_failed)
614993bba24dSQu Wenruo 		btrfs_warn(fs_info,
615093bba24dSQu Wenruo 			"failed to trim %llu block group(s), last error %d",
615193bba24dSQu Wenruo 			bg_failed, bg_ret);
615223608d51SAnand Jain 
615323608d51SAnand Jain 	mutex_lock(&fs_devices->device_list_mutex);
615423608d51SAnand Jain 	list_for_each_entry(device, &fs_devices->devices, dev_list) {
615516a200f6SAnand Jain 		if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
615616a200f6SAnand Jain 			continue;
615716a200f6SAnand Jain 
61588103d10bSNikolay Borisov 		ret = btrfs_trim_free_extents(device, &group_trimmed);
615993bba24dSQu Wenruo 		if (ret) {
616093bba24dSQu Wenruo 			dev_failed++;
616193bba24dSQu Wenruo 			dev_ret = ret;
6162499f377fSJeff Mahoney 			break;
616393bba24dSQu Wenruo 		}
6164499f377fSJeff Mahoney 
6165499f377fSJeff Mahoney 		trimmed += group_trimmed;
6166499f377fSJeff Mahoney 	}
616723608d51SAnand Jain 	mutex_unlock(&fs_devices->device_list_mutex);
6168499f377fSJeff Mahoney 
616993bba24dSQu Wenruo 	if (dev_failed)
617093bba24dSQu Wenruo 		btrfs_warn(fs_info,
617193bba24dSQu Wenruo 			"failed to trim %llu device(s), last error %d",
617293bba24dSQu Wenruo 			dev_failed, dev_ret);
6173f7039b1dSLi Dongyang 	range->len = trimmed;
617493bba24dSQu Wenruo 	if (bg_ret)
617593bba24dSQu Wenruo 		return bg_ret;
617693bba24dSQu Wenruo 	return dev_ret;
6177f7039b1dSLi Dongyang }
6178