1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0
26cbd5570SChris Mason /*
36cbd5570SChris Mason * Copyright (C) 2007 Oracle. All rights reserved.
46cbd5570SChris Mason */
5c1d7c514SDavid Sterba
6ec6b910fSZach Brown #include <linux/sched.h>
7f361bf4aSIngo Molnar #include <linux/sched/signal.h>
8edbd8d4eSChris Mason #include <linux/pagemap.h>
9ec44a35cSChris Mason #include <linux/writeback.h>
1021af804cSDavid Woodhouse #include <linux/blkdev.h>
11b7a9f29fSChris Mason #include <linux/sort.h>
124184ea7fSChris Mason #include <linux/rcupdate.h>
13817d52f8SJosef Bacik #include <linux/kthread.h>
145a0e3ad6STejun Heo #include <linux/slab.h>
15dff51cd1SDavid Sterba #include <linux/ratelimit.h>
16b150a4f1SJosef Bacik #include <linux/percpu_counter.h>
1769fe2d75SJosef Bacik #include <linux/lockdep.h>
189678c543SNikolay Borisov #include <linux/crc32c.h>
19cfc2de0fSBoris Burkov #include "ctree.h"
20cfc2de0fSBoris Burkov #include "extent-tree.h"
21995946ddSMiao Xie #include "tree-log.h"
22fec577fbSChris Mason #include "disk-io.h"
23fec577fbSChris Mason #include "print-tree.h"
240b86a832SChris Mason #include "volumes.h"
2553b381b3SDavid Woodhouse #include "raid56.h"
26925baeddSChris Mason #include "locking.h"
27fa9c0d79SChris Mason #include "free-space-cache.h"
281e144fb8SOmar Sandoval #include "free-space-tree.h"
296ab0a202SJeff Mahoney #include "sysfs.h"
30fcebe456SJosef Bacik #include "qgroup.h"
31fd708b81SJosef Bacik #include "ref-verify.h"
328719aaaeSJosef Bacik #include "space-info.h"
33d12ffdd1SJosef Bacik #include "block-rsv.h"
3486736342SJosef Bacik #include "delalloc-space.h"
35b0643e59SDennis Zhou #include "discard.h"
36c57dd1f2SQu Wenruo #include "rcu-string.h"
37169e0da9SNaohiro Aota #include "zoned.h"
386143c23cSNaohiro Aota #include "dev-replace.h"
39c7f13d42SJosef Bacik #include "fs.h"
4007e81dc9SJosef Bacik #include "accessors.h"
4145c40c8fSJosef Bacik #include "root-tree.h"
427c8ede16SJosef Bacik #include "file-item.h"
43aa5d3003SJosef Bacik #include "orphan.h"
44103c1972SChristoph Hellwig #include "tree-checker.h"
45fec577fbSChris Mason
46709c0486SArne Jansen #undef SCRAMBLE_DELAYED_REFS
47709c0486SArne Jansen
489f9b8e8dSQu Wenruo
495d4f98a2SYan Zheng static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
50c682f9b3SQu Wenruo struct btrfs_delayed_ref_node *node, u64 parent,
515d4f98a2SYan Zheng u64 root_objectid, u64 owner_objectid,
525d4f98a2SYan Zheng u64 owner_offset, int refs_to_drop,
53c682f9b3SQu Wenruo struct btrfs_delayed_extent_op *extra_op);
545d4f98a2SYan Zheng static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
555d4f98a2SYan Zheng struct extent_buffer *leaf,
565d4f98a2SYan Zheng struct btrfs_extent_item *ei);
575d4f98a2SYan Zheng static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
585d4f98a2SYan Zheng u64 parent, u64 root_objectid,
595d4f98a2SYan Zheng u64 flags, u64 owner, u64 offset,
605d4f98a2SYan Zheng struct btrfs_key *ins, int ref_mod);
615d4f98a2SYan Zheng static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
624e6bd4e0SNikolay Borisov struct btrfs_delayed_ref_node *node,
6321ebfbe7SNikolay Borisov struct btrfs_delayed_extent_op *extent_op);
6411833d66SYan Zheng static int find_next_key(struct btrfs_path *path, int level,
6511833d66SYan Zheng struct btrfs_key *key);
666a63209fSJosef Bacik
block_group_bits(struct btrfs_block_group * cache,u64 bits)6732da5386SDavid Sterba static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
680f9dd46cSJosef Bacik {
690f9dd46cSJosef Bacik return (cache->flags & bits) == bits;
700f9dd46cSJosef Bacik }
710f9dd46cSJosef Bacik
721a4ed8fdSFilipe Manana /* simple helper to search for an existing data extent at a given offset */
btrfs_lookup_data_extent(struct btrfs_fs_info * fs_info,u64 start,u64 len)732ff7e61eSJeff Mahoney int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
74e02119d5SChris Mason {
7529cbcf40SJosef Bacik struct btrfs_root *root = btrfs_extent_root(fs_info, start);
76e02119d5SChris Mason int ret;
77e02119d5SChris Mason struct btrfs_key key;
7831840ae1SZheng Yan struct btrfs_path *path;
79e02119d5SChris Mason
8031840ae1SZheng Yan path = btrfs_alloc_path();
81d8926bb3SMark Fasheh if (!path)
82d8926bb3SMark Fasheh return -ENOMEM;
83d8926bb3SMark Fasheh
84e02119d5SChris Mason key.objectid = start;
85e02119d5SChris Mason key.offset = len;
863173a18fSJosef Bacik key.type = BTRFS_EXTENT_ITEM_KEY;
8729cbcf40SJosef Bacik ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8831840ae1SZheng Yan btrfs_free_path(path);
897bb86316SChris Mason return ret;
907bb86316SChris Mason }
917bb86316SChris Mason
92d8d5f3e1SChris Mason /*
933173a18fSJosef Bacik * helper function to lookup reference count and flags of a tree block.
94a22285a6SYan, Zheng *
95a22285a6SYan, Zheng * the head node for delayed ref is used to store the sum of all the
96a22285a6SYan, Zheng * reference count modifications queued up in the rbtree. the head
97a22285a6SYan, Zheng * node may also store the extent flags to set. This way you can check
98a22285a6SYan, Zheng * to see what the reference count and extent flags would be if all of
99a22285a6SYan, Zheng * the delayed refs are not processed.
100a22285a6SYan, Zheng */
btrfs_lookup_extent_info(struct btrfs_trans_handle * trans,struct btrfs_fs_info * fs_info,u64 bytenr,u64 offset,int metadata,u64 * refs,u64 * flags)101a22285a6SYan, Zheng int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
1022ff7e61eSJeff Mahoney struct btrfs_fs_info *fs_info, u64 bytenr,
1033173a18fSJosef Bacik u64 offset, int metadata, u64 *refs, u64 *flags)
104a22285a6SYan, Zheng {
10529cbcf40SJosef Bacik struct btrfs_root *extent_root;
106a22285a6SYan, Zheng struct btrfs_delayed_ref_head *head;
107a22285a6SYan, Zheng struct btrfs_delayed_ref_root *delayed_refs;
108a22285a6SYan, Zheng struct btrfs_path *path;
109a22285a6SYan, Zheng struct btrfs_extent_item *ei;
110a22285a6SYan, Zheng struct extent_buffer *leaf;
111a22285a6SYan, Zheng struct btrfs_key key;
112a22285a6SYan, Zheng u32 item_size;
113a22285a6SYan, Zheng u64 num_refs;
114a22285a6SYan, Zheng u64 extent_flags;
115a22285a6SYan, Zheng int ret;
116a22285a6SYan, Zheng
1173173a18fSJosef Bacik /*
1183173a18fSJosef Bacik * If we don't have skinny metadata, don't bother doing anything
1193173a18fSJosef Bacik * different
1203173a18fSJosef Bacik */
1210b246afaSJeff Mahoney if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
1220b246afaSJeff Mahoney offset = fs_info->nodesize;
1233173a18fSJosef Bacik metadata = 0;
1243173a18fSJosef Bacik }
1253173a18fSJosef Bacik
126a22285a6SYan, Zheng path = btrfs_alloc_path();
127a22285a6SYan, Zheng if (!path)
128a22285a6SYan, Zheng return -ENOMEM;
129a22285a6SYan, Zheng
130a22285a6SYan, Zheng if (!trans) {
131a22285a6SYan, Zheng path->skip_locking = 1;
132a22285a6SYan, Zheng path->search_commit_root = 1;
133a22285a6SYan, Zheng }
134639eefc8SFilipe David Borba Manana
135639eefc8SFilipe David Borba Manana search_again:
136639eefc8SFilipe David Borba Manana key.objectid = bytenr;
137639eefc8SFilipe David Borba Manana key.offset = offset;
138639eefc8SFilipe David Borba Manana if (metadata)
139639eefc8SFilipe David Borba Manana key.type = BTRFS_METADATA_ITEM_KEY;
140639eefc8SFilipe David Borba Manana else
141639eefc8SFilipe David Borba Manana key.type = BTRFS_EXTENT_ITEM_KEY;
142639eefc8SFilipe David Borba Manana
14329cbcf40SJosef Bacik extent_root = btrfs_extent_root(fs_info, bytenr);
14429cbcf40SJosef Bacik ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
145a22285a6SYan, Zheng if (ret < 0)
146a22285a6SYan, Zheng goto out_free;
147a22285a6SYan, Zheng
1483173a18fSJosef Bacik if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
14974be9510SFilipe David Borba Manana if (path->slots[0]) {
15074be9510SFilipe David Borba Manana path->slots[0]--;
15174be9510SFilipe David Borba Manana btrfs_item_key_to_cpu(path->nodes[0], &key,
15274be9510SFilipe David Borba Manana path->slots[0]);
15374be9510SFilipe David Borba Manana if (key.objectid == bytenr &&
15474be9510SFilipe David Borba Manana key.type == BTRFS_EXTENT_ITEM_KEY &&
1550b246afaSJeff Mahoney key.offset == fs_info->nodesize)
15674be9510SFilipe David Borba Manana ret = 0;
15774be9510SFilipe David Borba Manana }
15874be9510SFilipe David Borba Manana }
1593173a18fSJosef Bacik
160a22285a6SYan, Zheng if (ret == 0) {
161a22285a6SYan, Zheng leaf = path->nodes[0];
1623212fa14SJosef Bacik item_size = btrfs_item_size(leaf, path->slots[0]);
163a22285a6SYan, Zheng if (item_size >= sizeof(*ei)) {
164a22285a6SYan, Zheng ei = btrfs_item_ptr(leaf, path->slots[0],
165a22285a6SYan, Zheng struct btrfs_extent_item);
166a22285a6SYan, Zheng num_refs = btrfs_extent_refs(leaf, ei);
167a22285a6SYan, Zheng extent_flags = btrfs_extent_flags(leaf, ei);
168a22285a6SYan, Zheng } else {
169182741d2SQu Wenruo ret = -EUCLEAN;
170182741d2SQu Wenruo btrfs_err(fs_info,
171182741d2SQu Wenruo "unexpected extent item size, has %u expect >= %zu",
172182741d2SQu Wenruo item_size, sizeof(*ei));
173ba3c2b19SNikolay Borisov if (trans)
174ba3c2b19SNikolay Borisov btrfs_abort_transaction(trans, ret);
175ba3c2b19SNikolay Borisov else
176ba3c2b19SNikolay Borisov btrfs_handle_fs_error(fs_info, ret, NULL);
177ba3c2b19SNikolay Borisov
178ba3c2b19SNikolay Borisov goto out_free;
179a22285a6SYan, Zheng }
180ba3c2b19SNikolay Borisov
181a22285a6SYan, Zheng BUG_ON(num_refs == 0);
182a22285a6SYan, Zheng } else {
183a22285a6SYan, Zheng num_refs = 0;
184a22285a6SYan, Zheng extent_flags = 0;
185a22285a6SYan, Zheng ret = 0;
186a22285a6SYan, Zheng }
187a22285a6SYan, Zheng
188a22285a6SYan, Zheng if (!trans)
189a22285a6SYan, Zheng goto out;
190a22285a6SYan, Zheng
191a22285a6SYan, Zheng delayed_refs = &trans->transaction->delayed_refs;
192a22285a6SYan, Zheng spin_lock(&delayed_refs->lock);
193f72ad18eSLiu Bo head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
194a22285a6SYan, Zheng if (head) {
195a22285a6SYan, Zheng if (!mutex_trylock(&head->mutex)) {
196d278850eSJosef Bacik refcount_inc(&head->refs);
197a22285a6SYan, Zheng spin_unlock(&delayed_refs->lock);
198a22285a6SYan, Zheng
199b3b4aa74SDavid Sterba btrfs_release_path(path);
200a22285a6SYan, Zheng
2018cc33e5cSDavid Sterba /*
2028cc33e5cSDavid Sterba * Mutex was contended, block until it's released and try
2038cc33e5cSDavid Sterba * again
2048cc33e5cSDavid Sterba */
205a22285a6SYan, Zheng mutex_lock(&head->mutex);
206a22285a6SYan, Zheng mutex_unlock(&head->mutex);
207d278850eSJosef Bacik btrfs_put_delayed_ref_head(head);
208639eefc8SFilipe David Borba Manana goto search_again;
209a22285a6SYan, Zheng }
210d7df2c79SJosef Bacik spin_lock(&head->lock);
211a22285a6SYan, Zheng if (head->extent_op && head->extent_op->update_flags)
212a22285a6SYan, Zheng extent_flags |= head->extent_op->flags_to_set;
213a22285a6SYan, Zheng else
214a22285a6SYan, Zheng BUG_ON(num_refs == 0);
215a22285a6SYan, Zheng
216d278850eSJosef Bacik num_refs += head->ref_mod;
217d7df2c79SJosef Bacik spin_unlock(&head->lock);
218a22285a6SYan, Zheng mutex_unlock(&head->mutex);
219a22285a6SYan, Zheng }
220a22285a6SYan, Zheng spin_unlock(&delayed_refs->lock);
221a22285a6SYan, Zheng out:
222a22285a6SYan, Zheng WARN_ON(num_refs == 0);
223a22285a6SYan, Zheng if (refs)
224a22285a6SYan, Zheng *refs = num_refs;
225a22285a6SYan, Zheng if (flags)
226a22285a6SYan, Zheng *flags = extent_flags;
227a22285a6SYan, Zheng out_free:
228a22285a6SYan, Zheng btrfs_free_path(path);
229a22285a6SYan, Zheng return ret;
230a22285a6SYan, Zheng }
231a22285a6SYan, Zheng
232a22285a6SYan, Zheng /*
233d8d5f3e1SChris Mason * Back reference rules. Back refs have three main goals:
234d8d5f3e1SChris Mason *
235d8d5f3e1SChris Mason * 1) differentiate between all holders of references to an extent so that
236d8d5f3e1SChris Mason * when a reference is dropped we can make sure it was a valid reference
237d8d5f3e1SChris Mason * before freeing the extent.
238d8d5f3e1SChris Mason *
239d8d5f3e1SChris Mason * 2) Provide enough information to quickly find the holders of an extent
240d8d5f3e1SChris Mason * if we notice a given block is corrupted or bad.
241d8d5f3e1SChris Mason *
242d8d5f3e1SChris Mason * 3) Make it easy to migrate blocks for FS shrinking or storage pool
243d8d5f3e1SChris Mason * maintenance. This is actually the same as #2, but with a slightly
244d8d5f3e1SChris Mason * different use case.
245d8d5f3e1SChris Mason *
2465d4f98a2SYan Zheng * There are two kinds of back refs. The implicit back refs is optimized
2475d4f98a2SYan Zheng * for pointers in non-shared tree blocks. For a given pointer in a block,
2485d4f98a2SYan Zheng * back refs of this kind provide information about the block's owner tree
2495d4f98a2SYan Zheng * and the pointer's key. These information allow us to find the block by
2505d4f98a2SYan Zheng * b-tree searching. The full back refs is for pointers in tree blocks not
2515d4f98a2SYan Zheng * referenced by their owner trees. The location of tree block is recorded
2525d4f98a2SYan Zheng * in the back refs. Actually the full back refs is generic, and can be
2535d4f98a2SYan Zheng * used in all cases the implicit back refs is used. The major shortcoming
2545d4f98a2SYan Zheng * of the full back refs is its overhead. Every time a tree block gets
2555d4f98a2SYan Zheng * COWed, we have to update back refs entry for all pointers in it.
2565d4f98a2SYan Zheng *
2575d4f98a2SYan Zheng * For a newly allocated tree block, we use implicit back refs for
2585d4f98a2SYan Zheng * pointers in it. This means most tree related operations only involve
2595d4f98a2SYan Zheng * implicit back refs. For a tree block created in old transaction, the
2605d4f98a2SYan Zheng * only way to drop a reference to it is COW it. So we can detect the
2615d4f98a2SYan Zheng * event that tree block loses its owner tree's reference and do the
2625d4f98a2SYan Zheng * back refs conversion.
2635d4f98a2SYan Zheng *
26401327610SNicholas D Steeves * When a tree block is COWed through a tree, there are four cases:
2655d4f98a2SYan Zheng *
2665d4f98a2SYan Zheng * The reference count of the block is one and the tree is the block's
2675d4f98a2SYan Zheng * owner tree. Nothing to do in this case.
2685d4f98a2SYan Zheng *
2695d4f98a2SYan Zheng * The reference count of the block is one and the tree is not the
2705d4f98a2SYan Zheng * block's owner tree. In this case, full back refs is used for pointers
2715d4f98a2SYan Zheng * in the block. Remove these full back refs, add implicit back refs for
2725d4f98a2SYan Zheng * every pointers in the new block.
2735d4f98a2SYan Zheng *
2745d4f98a2SYan Zheng * The reference count of the block is greater than one and the tree is
2755d4f98a2SYan Zheng * the block's owner tree. In this case, implicit back refs is used for
2765d4f98a2SYan Zheng * pointers in the block. Add full back refs for every pointers in the
2775d4f98a2SYan Zheng * block, increase lower level extents' reference counts. The original
2785d4f98a2SYan Zheng * implicit back refs are entailed to the new block.
2795d4f98a2SYan Zheng *
2805d4f98a2SYan Zheng * The reference count of the block is greater than one and the tree is
2815d4f98a2SYan Zheng * not the block's owner tree. Add implicit back refs for every pointer in
2825d4f98a2SYan Zheng * the new block, increase lower level extents' reference count.
2835d4f98a2SYan Zheng *
2845d4f98a2SYan Zheng * Back Reference Key composing:
2855d4f98a2SYan Zheng *
2865d4f98a2SYan Zheng * The key objectid corresponds to the first byte in the extent,
2875d4f98a2SYan Zheng * The key type is used to differentiate between types of back refs.
2885d4f98a2SYan Zheng * There are different meanings of the key offset for different types
2895d4f98a2SYan Zheng * of back refs.
2905d4f98a2SYan Zheng *
291d8d5f3e1SChris Mason * File extents can be referenced by:
292d8d5f3e1SChris Mason *
293d8d5f3e1SChris Mason * - multiple snapshots, subvolumes, or different generations in one subvol
29431840ae1SZheng Yan * - different files inside a single subvolume
295d8d5f3e1SChris Mason * - different offsets inside a file (bookend extents in file.c)
296d8d5f3e1SChris Mason *
2975d4f98a2SYan Zheng * The extent ref structure for the implicit back refs has fields for:
298d8d5f3e1SChris Mason *
299d8d5f3e1SChris Mason * - Objectid of the subvolume root
300d8d5f3e1SChris Mason * - objectid of the file holding the reference
3015d4f98a2SYan Zheng * - original offset in the file
3025d4f98a2SYan Zheng * - how many bookend extents
30331840ae1SZheng Yan *
3045d4f98a2SYan Zheng * The key offset for the implicit back refs is hash of the first
3055d4f98a2SYan Zheng * three fields.
306d8d5f3e1SChris Mason *
3075d4f98a2SYan Zheng * The extent ref structure for the full back refs has field for:
308d8d5f3e1SChris Mason *
3095d4f98a2SYan Zheng * - number of pointers in the tree leaf
310d8d5f3e1SChris Mason *
3115d4f98a2SYan Zheng * The key offset for the implicit back refs is the first byte of
3125d4f98a2SYan Zheng * the tree leaf
313d8d5f3e1SChris Mason *
3145d4f98a2SYan Zheng * When a file extent is allocated, The implicit back refs is used.
3155d4f98a2SYan Zheng * the fields are filled in:
316d8d5f3e1SChris Mason *
3175d4f98a2SYan Zheng * (root_key.objectid, inode objectid, offset in file, 1)
3185d4f98a2SYan Zheng *
3195d4f98a2SYan Zheng * When a file extent is removed file truncation, we find the
3205d4f98a2SYan Zheng * corresponding implicit back refs and check the following fields:
3215d4f98a2SYan Zheng *
3225d4f98a2SYan Zheng * (btrfs_header_owner(leaf), inode objectid, offset in file)
323d8d5f3e1SChris Mason *
324d8d5f3e1SChris Mason * Btree extents can be referenced by:
325d8d5f3e1SChris Mason *
326d8d5f3e1SChris Mason * - Different subvolumes
327d8d5f3e1SChris Mason *
3285d4f98a2SYan Zheng * Both the implicit back refs and the full back refs for tree blocks
3295d4f98a2SYan Zheng * only consist of key. The key offset for the implicit back refs is
3305d4f98a2SYan Zheng * objectid of block's owner tree. The key offset for the full back refs
3315d4f98a2SYan Zheng * is the first byte of parent block.
332d8d5f3e1SChris Mason *
3335d4f98a2SYan Zheng * When implicit back refs is used, information about the lowest key and
3345d4f98a2SYan Zheng * level of the tree block are required. These information are stored in
3355d4f98a2SYan Zheng * tree block info structure.
336d8d5f3e1SChris Mason */
33731840ae1SZheng Yan
338167ce953SLiu Bo /*
339167ce953SLiu Bo * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
34052042d8eSAndrea Gelmini * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
341167ce953SLiu Bo * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
342167ce953SLiu Bo */
btrfs_get_extent_inline_ref_type(const struct extent_buffer * eb,struct btrfs_extent_inline_ref * iref,enum btrfs_inline_ref_type is_data)343167ce953SLiu Bo int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
344167ce953SLiu Bo struct btrfs_extent_inline_ref *iref,
345167ce953SLiu Bo enum btrfs_inline_ref_type is_data)
346167ce953SLiu Bo {
347167ce953SLiu Bo int type = btrfs_extent_inline_ref_type(eb, iref);
34864ecdb64SLiu Bo u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
349167ce953SLiu Bo
350167ce953SLiu Bo if (type == BTRFS_TREE_BLOCK_REF_KEY ||
351167ce953SLiu Bo type == BTRFS_SHARED_BLOCK_REF_KEY ||
352167ce953SLiu Bo type == BTRFS_SHARED_DATA_REF_KEY ||
353167ce953SLiu Bo type == BTRFS_EXTENT_DATA_REF_KEY) {
354167ce953SLiu Bo if (is_data == BTRFS_REF_TYPE_BLOCK) {
35564ecdb64SLiu Bo if (type == BTRFS_TREE_BLOCK_REF_KEY)
356167ce953SLiu Bo return type;
35764ecdb64SLiu Bo if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
35864ecdb64SLiu Bo ASSERT(eb->fs_info);
35964ecdb64SLiu Bo /*
360ea57788eSQu Wenruo * Every shared one has parent tree block,
361ea57788eSQu Wenruo * which must be aligned to sector size.
36264ecdb64SLiu Bo */
36364ecdb64SLiu Bo if (offset &&
364ea57788eSQu Wenruo IS_ALIGNED(offset, eb->fs_info->sectorsize))
36564ecdb64SLiu Bo return type;
36664ecdb64SLiu Bo }
367167ce953SLiu Bo } else if (is_data == BTRFS_REF_TYPE_DATA) {
36864ecdb64SLiu Bo if (type == BTRFS_EXTENT_DATA_REF_KEY)
369167ce953SLiu Bo return type;
37064ecdb64SLiu Bo if (type == BTRFS_SHARED_DATA_REF_KEY) {
37164ecdb64SLiu Bo ASSERT(eb->fs_info);
37264ecdb64SLiu Bo /*
373ea57788eSQu Wenruo * Every shared one has parent tree block,
374ea57788eSQu Wenruo * which must be aligned to sector size.
37564ecdb64SLiu Bo */
37664ecdb64SLiu Bo if (offset &&
377ea57788eSQu Wenruo IS_ALIGNED(offset, eb->fs_info->sectorsize))
37864ecdb64SLiu Bo return type;
37964ecdb64SLiu Bo }
380167ce953SLiu Bo } else {
381167ce953SLiu Bo ASSERT(is_data == BTRFS_REF_TYPE_ANY);
382167ce953SLiu Bo return type;
383167ce953SLiu Bo }
384167ce953SLiu Bo }
385167ce953SLiu Bo
38625761430SQu Wenruo WARN_ON(1);
3876c75a589SQu Wenruo btrfs_print_leaf(eb);
388ea57788eSQu Wenruo btrfs_err(eb->fs_info,
389ea57788eSQu Wenruo "eb %llu iref 0x%lx invalid extent inline ref type %d",
390ea57788eSQu Wenruo eb->start, (unsigned long)iref, type);
391167ce953SLiu Bo
392167ce953SLiu Bo return BTRFS_REF_TYPE_INVALID;
393167ce953SLiu Bo }
394167ce953SLiu Bo
hash_extent_data_ref(u64 root_objectid,u64 owner,u64 offset)3950785a9aaSQu Wenruo u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
3965d4f98a2SYan Zheng {
3975d4f98a2SYan Zheng u32 high_crc = ~(u32)0;
3985d4f98a2SYan Zheng u32 low_crc = ~(u32)0;
3995d4f98a2SYan Zheng __le64 lenum;
4005d4f98a2SYan Zheng
4015d4f98a2SYan Zheng lenum = cpu_to_le64(root_objectid);
40265019df8SJohannes Thumshirn high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
4035d4f98a2SYan Zheng lenum = cpu_to_le64(owner);
40465019df8SJohannes Thumshirn low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
4055d4f98a2SYan Zheng lenum = cpu_to_le64(offset);
40665019df8SJohannes Thumshirn low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
4075d4f98a2SYan Zheng
4085d4f98a2SYan Zheng return ((u64)high_crc << 31) ^ (u64)low_crc;
4095d4f98a2SYan Zheng }
4105d4f98a2SYan Zheng
hash_extent_data_ref_item(struct extent_buffer * leaf,struct btrfs_extent_data_ref * ref)4115d4f98a2SYan Zheng static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
4125d4f98a2SYan Zheng struct btrfs_extent_data_ref *ref)
4135d4f98a2SYan Zheng {
4145d4f98a2SYan Zheng return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
4155d4f98a2SYan Zheng btrfs_extent_data_ref_objectid(leaf, ref),
4165d4f98a2SYan Zheng btrfs_extent_data_ref_offset(leaf, ref));
4175d4f98a2SYan Zheng }
4185d4f98a2SYan Zheng
match_extent_data_ref(struct extent_buffer * leaf,struct btrfs_extent_data_ref * ref,u64 root_objectid,u64 owner,u64 offset)4195d4f98a2SYan Zheng static int match_extent_data_ref(struct extent_buffer *leaf,
4205d4f98a2SYan Zheng struct btrfs_extent_data_ref *ref,
4215d4f98a2SYan Zheng u64 root_objectid, u64 owner, u64 offset)
4225d4f98a2SYan Zheng {
4235d4f98a2SYan Zheng if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
4245d4f98a2SYan Zheng btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
4255d4f98a2SYan Zheng btrfs_extent_data_ref_offset(leaf, ref) != offset)
4265d4f98a2SYan Zheng return 0;
4275d4f98a2SYan Zheng return 1;
4285d4f98a2SYan Zheng }
4295d4f98a2SYan Zheng
lookup_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid,u64 owner,u64 offset)4305d4f98a2SYan Zheng static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
4313bb1a1bcSYan Zheng struct btrfs_path *path,
4323bb1a1bcSYan Zheng u64 bytenr, u64 parent,
4335d4f98a2SYan Zheng u64 root_objectid,
4345d4f98a2SYan Zheng u64 owner, u64 offset)
43574493f7aSChris Mason {
43629cbcf40SJosef Bacik struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
43774493f7aSChris Mason struct btrfs_key key;
4385d4f98a2SYan Zheng struct btrfs_extent_data_ref *ref;
43931840ae1SZheng Yan struct extent_buffer *leaf;
4405d4f98a2SYan Zheng u32 nritems;
44174493f7aSChris Mason int ret;
4425d4f98a2SYan Zheng int recow;
4435d4f98a2SYan Zheng int err = -ENOENT;
44474493f7aSChris Mason
44574493f7aSChris Mason key.objectid = bytenr;
4465d4f98a2SYan Zheng if (parent) {
4475d4f98a2SYan Zheng key.type = BTRFS_SHARED_DATA_REF_KEY;
44831840ae1SZheng Yan key.offset = parent;
4495d4f98a2SYan Zheng } else {
4505d4f98a2SYan Zheng key.type = BTRFS_EXTENT_DATA_REF_KEY;
4515d4f98a2SYan Zheng key.offset = hash_extent_data_ref(root_objectid,
4525d4f98a2SYan Zheng owner, offset);
4535d4f98a2SYan Zheng }
4545d4f98a2SYan Zheng again:
4555d4f98a2SYan Zheng recow = 0;
4565d4f98a2SYan Zheng ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4575d4f98a2SYan Zheng if (ret < 0) {
4585d4f98a2SYan Zheng err = ret;
4595d4f98a2SYan Zheng goto fail;
4605d4f98a2SYan Zheng }
46174493f7aSChris Mason
4625d4f98a2SYan Zheng if (parent) {
4635d4f98a2SYan Zheng if (!ret)
4645d4f98a2SYan Zheng return 0;
4655d4f98a2SYan Zheng goto fail;
46674493f7aSChris Mason }
46731840ae1SZheng Yan
46831840ae1SZheng Yan leaf = path->nodes[0];
4695d4f98a2SYan Zheng nritems = btrfs_header_nritems(leaf);
4705d4f98a2SYan Zheng while (1) {
4715d4f98a2SYan Zheng if (path->slots[0] >= nritems) {
4725d4f98a2SYan Zheng ret = btrfs_next_leaf(root, path);
4735d4f98a2SYan Zheng if (ret < 0)
4745d4f98a2SYan Zheng err = ret;
4755d4f98a2SYan Zheng if (ret)
4765d4f98a2SYan Zheng goto fail;
4775d4f98a2SYan Zheng
4785d4f98a2SYan Zheng leaf = path->nodes[0];
4795d4f98a2SYan Zheng nritems = btrfs_header_nritems(leaf);
4805d4f98a2SYan Zheng recow = 1;
48131840ae1SZheng Yan }
48231840ae1SZheng Yan
4835d4f98a2SYan Zheng btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4845d4f98a2SYan Zheng if (key.objectid != bytenr ||
4855d4f98a2SYan Zheng key.type != BTRFS_EXTENT_DATA_REF_KEY)
4865d4f98a2SYan Zheng goto fail;
4875d4f98a2SYan Zheng
4885d4f98a2SYan Zheng ref = btrfs_item_ptr(leaf, path->slots[0],
4895d4f98a2SYan Zheng struct btrfs_extent_data_ref);
4905d4f98a2SYan Zheng
4915d4f98a2SYan Zheng if (match_extent_data_ref(leaf, ref, root_objectid,
4925d4f98a2SYan Zheng owner, offset)) {
4935d4f98a2SYan Zheng if (recow) {
494b3b4aa74SDavid Sterba btrfs_release_path(path);
4955d4f98a2SYan Zheng goto again;
4965d4f98a2SYan Zheng }
4975d4f98a2SYan Zheng err = 0;
4985d4f98a2SYan Zheng break;
4995d4f98a2SYan Zheng }
5005d4f98a2SYan Zheng path->slots[0]++;
5015d4f98a2SYan Zheng }
5025d4f98a2SYan Zheng fail:
5035d4f98a2SYan Zheng return err;
5045d4f98a2SYan Zheng }
5055d4f98a2SYan Zheng
insert_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add)5065d4f98a2SYan Zheng static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
50731840ae1SZheng Yan struct btrfs_path *path,
50831840ae1SZheng Yan u64 bytenr, u64 parent,
5095d4f98a2SYan Zheng u64 root_objectid, u64 owner,
5105d4f98a2SYan Zheng u64 offset, int refs_to_add)
51131840ae1SZheng Yan {
51229cbcf40SJosef Bacik struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
51331840ae1SZheng Yan struct btrfs_key key;
51431840ae1SZheng Yan struct extent_buffer *leaf;
5155d4f98a2SYan Zheng u32 size;
51631840ae1SZheng Yan u32 num_refs;
51731840ae1SZheng Yan int ret;
51831840ae1SZheng Yan
51931840ae1SZheng Yan key.objectid = bytenr;
5205d4f98a2SYan Zheng if (parent) {
5215d4f98a2SYan Zheng key.type = BTRFS_SHARED_DATA_REF_KEY;
52231840ae1SZheng Yan key.offset = parent;
5235d4f98a2SYan Zheng size = sizeof(struct btrfs_shared_data_ref);
52431840ae1SZheng Yan } else {
5255d4f98a2SYan Zheng key.type = BTRFS_EXTENT_DATA_REF_KEY;
5265d4f98a2SYan Zheng key.offset = hash_extent_data_ref(root_objectid,
5275d4f98a2SYan Zheng owner, offset);
5285d4f98a2SYan Zheng size = sizeof(struct btrfs_extent_data_ref);
52931840ae1SZheng Yan }
5305d4f98a2SYan Zheng
5315d4f98a2SYan Zheng ret = btrfs_insert_empty_item(trans, root, path, &key, size);
5325d4f98a2SYan Zheng if (ret && ret != -EEXIST)
5335d4f98a2SYan Zheng goto fail;
5345d4f98a2SYan Zheng
5355d4f98a2SYan Zheng leaf = path->nodes[0];
5365d4f98a2SYan Zheng if (parent) {
5375d4f98a2SYan Zheng struct btrfs_shared_data_ref *ref;
5385d4f98a2SYan Zheng ref = btrfs_item_ptr(leaf, path->slots[0],
5395d4f98a2SYan Zheng struct btrfs_shared_data_ref);
5405d4f98a2SYan Zheng if (ret == 0) {
5415d4f98a2SYan Zheng btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
5425d4f98a2SYan Zheng } else {
5435d4f98a2SYan Zheng num_refs = btrfs_shared_data_ref_count(leaf, ref);
5445d4f98a2SYan Zheng num_refs += refs_to_add;
5455d4f98a2SYan Zheng btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
5465d4f98a2SYan Zheng }
5475d4f98a2SYan Zheng } else {
5485d4f98a2SYan Zheng struct btrfs_extent_data_ref *ref;
5495d4f98a2SYan Zheng while (ret == -EEXIST) {
5505d4f98a2SYan Zheng ref = btrfs_item_ptr(leaf, path->slots[0],
5515d4f98a2SYan Zheng struct btrfs_extent_data_ref);
5525d4f98a2SYan Zheng if (match_extent_data_ref(leaf, ref, root_objectid,
5535d4f98a2SYan Zheng owner, offset))
5545d4f98a2SYan Zheng break;
555b3b4aa74SDavid Sterba btrfs_release_path(path);
5565d4f98a2SYan Zheng key.offset++;
5575d4f98a2SYan Zheng ret = btrfs_insert_empty_item(trans, root, path, &key,
5585d4f98a2SYan Zheng size);
5595d4f98a2SYan Zheng if (ret && ret != -EEXIST)
5605d4f98a2SYan Zheng goto fail;
5615d4f98a2SYan Zheng
5625d4f98a2SYan Zheng leaf = path->nodes[0];
5635d4f98a2SYan Zheng }
5645d4f98a2SYan Zheng ref = btrfs_item_ptr(leaf, path->slots[0],
5655d4f98a2SYan Zheng struct btrfs_extent_data_ref);
5665d4f98a2SYan Zheng if (ret == 0) {
5675d4f98a2SYan Zheng btrfs_set_extent_data_ref_root(leaf, ref,
5685d4f98a2SYan Zheng root_objectid);
5695d4f98a2SYan Zheng btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
5705d4f98a2SYan Zheng btrfs_set_extent_data_ref_offset(leaf, ref, offset);
5715d4f98a2SYan Zheng btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
5725d4f98a2SYan Zheng } else {
5735d4f98a2SYan Zheng num_refs = btrfs_extent_data_ref_count(leaf, ref);
5745d4f98a2SYan Zheng num_refs += refs_to_add;
5755d4f98a2SYan Zheng btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
5765d4f98a2SYan Zheng }
5775d4f98a2SYan Zheng }
578d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
5795d4f98a2SYan Zheng ret = 0;
5805d4f98a2SYan Zheng fail:
581b3b4aa74SDavid Sterba btrfs_release_path(path);
5827bb86316SChris Mason return ret;
58374493f7aSChris Mason }
58474493f7aSChris Mason
remove_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int refs_to_drop)5855d4f98a2SYan Zheng static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
58676d76e78SJosef Bacik struct btrfs_root *root,
58756bec294SChris Mason struct btrfs_path *path,
5885b2a54bbSJosef Bacik int refs_to_drop)
58931840ae1SZheng Yan {
5905d4f98a2SYan Zheng struct btrfs_key key;
5915d4f98a2SYan Zheng struct btrfs_extent_data_ref *ref1 = NULL;
5925d4f98a2SYan Zheng struct btrfs_shared_data_ref *ref2 = NULL;
59331840ae1SZheng Yan struct extent_buffer *leaf;
5945d4f98a2SYan Zheng u32 num_refs = 0;
59531840ae1SZheng Yan int ret = 0;
59631840ae1SZheng Yan
59731840ae1SZheng Yan leaf = path->nodes[0];
5985d4f98a2SYan Zheng btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5995d4f98a2SYan Zheng
6005d4f98a2SYan Zheng if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6015d4f98a2SYan Zheng ref1 = btrfs_item_ptr(leaf, path->slots[0],
6025d4f98a2SYan Zheng struct btrfs_extent_data_ref);
6035d4f98a2SYan Zheng num_refs = btrfs_extent_data_ref_count(leaf, ref1);
6045d4f98a2SYan Zheng } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6055d4f98a2SYan Zheng ref2 = btrfs_item_ptr(leaf, path->slots[0],
6065d4f98a2SYan Zheng struct btrfs_shared_data_ref);
6075d4f98a2SYan Zheng num_refs = btrfs_shared_data_ref_count(leaf, ref2);
6085d4f98a2SYan Zheng } else {
609182741d2SQu Wenruo btrfs_err(trans->fs_info,
610182741d2SQu Wenruo "unrecognized backref key (%llu %u %llu)",
611182741d2SQu Wenruo key.objectid, key.type, key.offset);
612182741d2SQu Wenruo btrfs_abort_transaction(trans, -EUCLEAN);
613182741d2SQu Wenruo return -EUCLEAN;
6145d4f98a2SYan Zheng }
6155d4f98a2SYan Zheng
61656bec294SChris Mason BUG_ON(num_refs < refs_to_drop);
61756bec294SChris Mason num_refs -= refs_to_drop;
6185d4f98a2SYan Zheng
61931840ae1SZheng Yan if (num_refs == 0) {
62076d76e78SJosef Bacik ret = btrfs_del_item(trans, root, path);
62131840ae1SZheng Yan } else {
6225d4f98a2SYan Zheng if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
6235d4f98a2SYan Zheng btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
6245d4f98a2SYan Zheng else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
6255d4f98a2SYan Zheng btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
626d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
62731840ae1SZheng Yan }
6285d4f98a2SYan Zheng return ret;
6295d4f98a2SYan Zheng }
6305d4f98a2SYan Zheng
extent_data_ref_count(struct btrfs_path * path,struct btrfs_extent_inline_ref * iref)6319ed0dea0SZhaolei static noinline u32 extent_data_ref_count(struct btrfs_path *path,
6325d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref)
6335d4f98a2SYan Zheng {
6345d4f98a2SYan Zheng struct btrfs_key key;
6355d4f98a2SYan Zheng struct extent_buffer *leaf;
6365d4f98a2SYan Zheng struct btrfs_extent_data_ref *ref1;
6375d4f98a2SYan Zheng struct btrfs_shared_data_ref *ref2;
6385d4f98a2SYan Zheng u32 num_refs = 0;
6393de28d57SLiu Bo int type;
6405d4f98a2SYan Zheng
6415d4f98a2SYan Zheng leaf = path->nodes[0];
6425d4f98a2SYan Zheng btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
643ba3c2b19SNikolay Borisov
6445d4f98a2SYan Zheng if (iref) {
6453de28d57SLiu Bo /*
6463de28d57SLiu Bo * If type is invalid, we should have bailed out earlier than
6473de28d57SLiu Bo * this call.
6483de28d57SLiu Bo */
6493de28d57SLiu Bo type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
6503de28d57SLiu Bo ASSERT(type != BTRFS_REF_TYPE_INVALID);
6513de28d57SLiu Bo if (type == BTRFS_EXTENT_DATA_REF_KEY) {
6525d4f98a2SYan Zheng ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
6535d4f98a2SYan Zheng num_refs = btrfs_extent_data_ref_count(leaf, ref1);
6545d4f98a2SYan Zheng } else {
6555d4f98a2SYan Zheng ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
6565d4f98a2SYan Zheng num_refs = btrfs_shared_data_ref_count(leaf, ref2);
6575d4f98a2SYan Zheng }
6585d4f98a2SYan Zheng } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
6595d4f98a2SYan Zheng ref1 = btrfs_item_ptr(leaf, path->slots[0],
6605d4f98a2SYan Zheng struct btrfs_extent_data_ref);
6615d4f98a2SYan Zheng num_refs = btrfs_extent_data_ref_count(leaf, ref1);
6625d4f98a2SYan Zheng } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
6635d4f98a2SYan Zheng ref2 = btrfs_item_ptr(leaf, path->slots[0],
6645d4f98a2SYan Zheng struct btrfs_shared_data_ref);
6655d4f98a2SYan Zheng num_refs = btrfs_shared_data_ref_count(leaf, ref2);
6665d4f98a2SYan Zheng } else {
6675d4f98a2SYan Zheng WARN_ON(1);
6685d4f98a2SYan Zheng }
6695d4f98a2SYan Zheng return num_refs;
6705d4f98a2SYan Zheng }
6715d4f98a2SYan Zheng
lookup_tree_block_ref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid)6725d4f98a2SYan Zheng static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
6735d4f98a2SYan Zheng struct btrfs_path *path,
6745d4f98a2SYan Zheng u64 bytenr, u64 parent,
6755d4f98a2SYan Zheng u64 root_objectid)
6765d4f98a2SYan Zheng {
67729cbcf40SJosef Bacik struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
6785d4f98a2SYan Zheng struct btrfs_key key;
6795d4f98a2SYan Zheng int ret;
6805d4f98a2SYan Zheng
6815d4f98a2SYan Zheng key.objectid = bytenr;
6825d4f98a2SYan Zheng if (parent) {
6835d4f98a2SYan Zheng key.type = BTRFS_SHARED_BLOCK_REF_KEY;
6845d4f98a2SYan Zheng key.offset = parent;
6855d4f98a2SYan Zheng } else {
6865d4f98a2SYan Zheng key.type = BTRFS_TREE_BLOCK_REF_KEY;
6875d4f98a2SYan Zheng key.offset = root_objectid;
6885d4f98a2SYan Zheng }
6895d4f98a2SYan Zheng
6905d4f98a2SYan Zheng ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
6915d4f98a2SYan Zheng if (ret > 0)
6925d4f98a2SYan Zheng ret = -ENOENT;
6935d4f98a2SYan Zheng return ret;
6945d4f98a2SYan Zheng }
6955d4f98a2SYan Zheng
insert_tree_block_ref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid)6965d4f98a2SYan Zheng static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
6975d4f98a2SYan Zheng struct btrfs_path *path,
6985d4f98a2SYan Zheng u64 bytenr, u64 parent,
6995d4f98a2SYan Zheng u64 root_objectid)
7005d4f98a2SYan Zheng {
70129cbcf40SJosef Bacik struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
7025d4f98a2SYan Zheng struct btrfs_key key;
7035d4f98a2SYan Zheng int ret;
7045d4f98a2SYan Zheng
7055d4f98a2SYan Zheng key.objectid = bytenr;
7065d4f98a2SYan Zheng if (parent) {
7075d4f98a2SYan Zheng key.type = BTRFS_SHARED_BLOCK_REF_KEY;
7085d4f98a2SYan Zheng key.offset = parent;
7095d4f98a2SYan Zheng } else {
7105d4f98a2SYan Zheng key.type = BTRFS_TREE_BLOCK_REF_KEY;
7115d4f98a2SYan Zheng key.offset = root_objectid;
7125d4f98a2SYan Zheng }
7135d4f98a2SYan Zheng
71429cbcf40SJosef Bacik ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
715b3b4aa74SDavid Sterba btrfs_release_path(path);
7165d4f98a2SYan Zheng return ret;
7175d4f98a2SYan Zheng }
7185d4f98a2SYan Zheng
extent_ref_type(u64 parent,u64 owner)7195d4f98a2SYan Zheng static inline int extent_ref_type(u64 parent, u64 owner)
7205d4f98a2SYan Zheng {
7215d4f98a2SYan Zheng int type;
7225d4f98a2SYan Zheng if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7235d4f98a2SYan Zheng if (parent > 0)
7245d4f98a2SYan Zheng type = BTRFS_SHARED_BLOCK_REF_KEY;
7255d4f98a2SYan Zheng else
7265d4f98a2SYan Zheng type = BTRFS_TREE_BLOCK_REF_KEY;
7275d4f98a2SYan Zheng } else {
7285d4f98a2SYan Zheng if (parent > 0)
7295d4f98a2SYan Zheng type = BTRFS_SHARED_DATA_REF_KEY;
7305d4f98a2SYan Zheng else
7315d4f98a2SYan Zheng type = BTRFS_EXTENT_DATA_REF_KEY;
7325d4f98a2SYan Zheng }
7335d4f98a2SYan Zheng return type;
7345d4f98a2SYan Zheng }
7355d4f98a2SYan Zheng
find_next_key(struct btrfs_path * path,int level,struct btrfs_key * key)7362c47e605SYan Zheng static int find_next_key(struct btrfs_path *path, int level,
7372c47e605SYan Zheng struct btrfs_key *key)
7385d4f98a2SYan Zheng
7395d4f98a2SYan Zheng {
7402c47e605SYan Zheng for (; level < BTRFS_MAX_LEVEL; level++) {
7415d4f98a2SYan Zheng if (!path->nodes[level])
7425d4f98a2SYan Zheng break;
7435d4f98a2SYan Zheng if (path->slots[level] + 1 >=
7445d4f98a2SYan Zheng btrfs_header_nritems(path->nodes[level]))
7455d4f98a2SYan Zheng continue;
7465d4f98a2SYan Zheng if (level == 0)
7475d4f98a2SYan Zheng btrfs_item_key_to_cpu(path->nodes[level], key,
7485d4f98a2SYan Zheng path->slots[level] + 1);
7495d4f98a2SYan Zheng else
7505d4f98a2SYan Zheng btrfs_node_key_to_cpu(path->nodes[level], key,
7515d4f98a2SYan Zheng path->slots[level] + 1);
7525d4f98a2SYan Zheng return 0;
7535d4f98a2SYan Zheng }
7545d4f98a2SYan Zheng return 1;
7555d4f98a2SYan Zheng }
7565d4f98a2SYan Zheng
7575d4f98a2SYan Zheng /*
7585d4f98a2SYan Zheng * look for inline back ref. if back ref is found, *ref_ret is set
7595d4f98a2SYan Zheng * to the address of inline back ref, and 0 is returned.
7605d4f98a2SYan Zheng *
7615d4f98a2SYan Zheng * if back ref isn't found, *ref_ret is set to the address where it
7625d4f98a2SYan Zheng * should be inserted, and -ENOENT is returned.
7635d4f98a2SYan Zheng *
7645d4f98a2SYan Zheng * if insert is true and there are too many inline back refs, the path
7655d4f98a2SYan Zheng * points to the extent item, and -EAGAIN is returned.
7665d4f98a2SYan Zheng *
7675d4f98a2SYan Zheng * NOTE: inline back refs are ordered in the same way that back ref
7685d4f98a2SYan Zheng * items in the tree are ordered.
7695d4f98a2SYan Zheng */
7705d4f98a2SYan Zheng static noinline_for_stack
lookup_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_extent_inline_ref ** ref_ret,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset,int insert)7715d4f98a2SYan Zheng int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
7725d4f98a2SYan Zheng struct btrfs_path *path,
7735d4f98a2SYan Zheng struct btrfs_extent_inline_ref **ref_ret,
7745d4f98a2SYan Zheng u64 bytenr, u64 num_bytes,
7755d4f98a2SYan Zheng u64 parent, u64 root_objectid,
7765d4f98a2SYan Zheng u64 owner, u64 offset, int insert)
7775d4f98a2SYan Zheng {
778867cc1fbSNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
77929cbcf40SJosef Bacik struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
7805d4f98a2SYan Zheng struct btrfs_key key;
7815d4f98a2SYan Zheng struct extent_buffer *leaf;
7825d4f98a2SYan Zheng struct btrfs_extent_item *ei;
7835d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref;
7845d4f98a2SYan Zheng u64 flags;
7855d4f98a2SYan Zheng u64 item_size;
7865d4f98a2SYan Zheng unsigned long ptr;
7875d4f98a2SYan Zheng unsigned long end;
7885d4f98a2SYan Zheng int extra_size;
7895d4f98a2SYan Zheng int type;
7905d4f98a2SYan Zheng int want;
7915d4f98a2SYan Zheng int ret;
7925d4f98a2SYan Zheng int err = 0;
7930b246afaSJeff Mahoney bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
7943de28d57SLiu Bo int needed;
7955d4f98a2SYan Zheng
7965d4f98a2SYan Zheng key.objectid = bytenr;
7975d4f98a2SYan Zheng key.type = BTRFS_EXTENT_ITEM_KEY;
7985d4f98a2SYan Zheng key.offset = num_bytes;
7995d4f98a2SYan Zheng
8005d4f98a2SYan Zheng want = extent_ref_type(parent, owner);
8015d4f98a2SYan Zheng if (insert) {
8025d4f98a2SYan Zheng extra_size = btrfs_extent_inline_ref_size(want);
8039a664971Sethanwu path->search_for_extension = 1;
8045d4f98a2SYan Zheng path->keep_locks = 1;
8055d4f98a2SYan Zheng } else
8065d4f98a2SYan Zheng extra_size = -1;
8073173a18fSJosef Bacik
8083173a18fSJosef Bacik /*
80916d1c062SNikolay Borisov * Owner is our level, so we can just add one to get the level for the
81016d1c062SNikolay Borisov * block we are interested in.
8113173a18fSJosef Bacik */
8123173a18fSJosef Bacik if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
8133173a18fSJosef Bacik key.type = BTRFS_METADATA_ITEM_KEY;
8143173a18fSJosef Bacik key.offset = owner;
8153173a18fSJosef Bacik }
8163173a18fSJosef Bacik
8173173a18fSJosef Bacik again:
8185d4f98a2SYan Zheng ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
8195d4f98a2SYan Zheng if (ret < 0) {
8205d4f98a2SYan Zheng err = ret;
8215d4f98a2SYan Zheng goto out;
8225d4f98a2SYan Zheng }
8233173a18fSJosef Bacik
8243173a18fSJosef Bacik /*
8253173a18fSJosef Bacik * We may be a newly converted file system which still has the old fat
8263173a18fSJosef Bacik * extent entries for metadata, so try and see if we have one of those.
8273173a18fSJosef Bacik */
8283173a18fSJosef Bacik if (ret > 0 && skinny_metadata) {
8293173a18fSJosef Bacik skinny_metadata = false;
8303173a18fSJosef Bacik if (path->slots[0]) {
8313173a18fSJosef Bacik path->slots[0]--;
8323173a18fSJosef Bacik btrfs_item_key_to_cpu(path->nodes[0], &key,
8333173a18fSJosef Bacik path->slots[0]);
8343173a18fSJosef Bacik if (key.objectid == bytenr &&
8353173a18fSJosef Bacik key.type == BTRFS_EXTENT_ITEM_KEY &&
8363173a18fSJosef Bacik key.offset == num_bytes)
8373173a18fSJosef Bacik ret = 0;
8383173a18fSJosef Bacik }
8393173a18fSJosef Bacik if (ret) {
8409ce49a0bSFilipe Manana key.objectid = bytenr;
8413173a18fSJosef Bacik key.type = BTRFS_EXTENT_ITEM_KEY;
8423173a18fSJosef Bacik key.offset = num_bytes;
8433173a18fSJosef Bacik btrfs_release_path(path);
8443173a18fSJosef Bacik goto again;
8453173a18fSJosef Bacik }
8463173a18fSJosef Bacik }
8473173a18fSJosef Bacik
84879787eaaSJeff Mahoney if (ret && !insert) {
84979787eaaSJeff Mahoney err = -ENOENT;
85079787eaaSJeff Mahoney goto out;
851fae7f21cSDulshani Gunawardhana } else if (WARN_ON(ret)) {
8527f72f505SQu Wenruo btrfs_print_leaf(path->nodes[0]);
8537f72f505SQu Wenruo btrfs_err(fs_info,
8547f72f505SQu Wenruo "extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
8557f72f505SQu Wenruo bytenr, num_bytes, parent, root_objectid, owner,
8567f72f505SQu Wenruo offset);
857492104c8SJosef Bacik err = -EIO;
858492104c8SJosef Bacik goto out;
85979787eaaSJeff Mahoney }
8605d4f98a2SYan Zheng
8615d4f98a2SYan Zheng leaf = path->nodes[0];
8623212fa14SJosef Bacik item_size = btrfs_item_size(leaf, path->slots[0]);
8636d8ff4e4SDavid Sterba if (unlikely(item_size < sizeof(*ei))) {
864182741d2SQu Wenruo err = -EUCLEAN;
865182741d2SQu Wenruo btrfs_err(fs_info,
866182741d2SQu Wenruo "unexpected extent item size, has %llu expect >= %zu",
867182741d2SQu Wenruo item_size, sizeof(*ei));
868ba3c2b19SNikolay Borisov btrfs_abort_transaction(trans, err);
869ba3c2b19SNikolay Borisov goto out;
870ba3c2b19SNikolay Borisov }
8715d4f98a2SYan Zheng
8725d4f98a2SYan Zheng ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
8735d4f98a2SYan Zheng flags = btrfs_extent_flags(leaf, ei);
8745d4f98a2SYan Zheng
8755d4f98a2SYan Zheng ptr = (unsigned long)(ei + 1);
8765d4f98a2SYan Zheng end = (unsigned long)ei + item_size;
8775d4f98a2SYan Zheng
8783173a18fSJosef Bacik if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
8795d4f98a2SYan Zheng ptr += sizeof(struct btrfs_tree_block_info);
8805d4f98a2SYan Zheng BUG_ON(ptr > end);
8815d4f98a2SYan Zheng }
8825d4f98a2SYan Zheng
8833de28d57SLiu Bo if (owner >= BTRFS_FIRST_FREE_OBJECTID)
8843de28d57SLiu Bo needed = BTRFS_REF_TYPE_DATA;
8853de28d57SLiu Bo else
8863de28d57SLiu Bo needed = BTRFS_REF_TYPE_BLOCK;
8873de28d57SLiu Bo
8885d4f98a2SYan Zheng err = -ENOENT;
8895d4f98a2SYan Zheng while (1) {
8905d4f98a2SYan Zheng if (ptr >= end) {
891cf4f03c3SNikolay Borisov if (ptr > end) {
892cf4f03c3SNikolay Borisov err = -EUCLEAN;
893cf4f03c3SNikolay Borisov btrfs_print_leaf(path->nodes[0]);
894cf4f03c3SNikolay Borisov btrfs_crit(fs_info,
895cf4f03c3SNikolay Borisov "overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
896cf4f03c3SNikolay Borisov path->slots[0], root_objectid, owner, offset, parent);
897cf4f03c3SNikolay Borisov }
8985d4f98a2SYan Zheng break;
8995d4f98a2SYan Zheng }
9005d4f98a2SYan Zheng iref = (struct btrfs_extent_inline_ref *)ptr;
9013de28d57SLiu Bo type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
9023de28d57SLiu Bo if (type == BTRFS_REF_TYPE_INVALID) {
903af431dcbSSu Yue err = -EUCLEAN;
9043de28d57SLiu Bo goto out;
9053de28d57SLiu Bo }
9063de28d57SLiu Bo
9075d4f98a2SYan Zheng if (want < type)
9085d4f98a2SYan Zheng break;
9095d4f98a2SYan Zheng if (want > type) {
9105d4f98a2SYan Zheng ptr += btrfs_extent_inline_ref_size(type);
9115d4f98a2SYan Zheng continue;
9125d4f98a2SYan Zheng }
9135d4f98a2SYan Zheng
9145d4f98a2SYan Zheng if (type == BTRFS_EXTENT_DATA_REF_KEY) {
9155d4f98a2SYan Zheng struct btrfs_extent_data_ref *dref;
9165d4f98a2SYan Zheng dref = (struct btrfs_extent_data_ref *)(&iref->offset);
9175d4f98a2SYan Zheng if (match_extent_data_ref(leaf, dref, root_objectid,
9185d4f98a2SYan Zheng owner, offset)) {
9195d4f98a2SYan Zheng err = 0;
9205d4f98a2SYan Zheng break;
9215d4f98a2SYan Zheng }
9225d4f98a2SYan Zheng if (hash_extent_data_ref_item(leaf, dref) <
9235d4f98a2SYan Zheng hash_extent_data_ref(root_objectid, owner, offset))
9245d4f98a2SYan Zheng break;
9255d4f98a2SYan Zheng } else {
9265d4f98a2SYan Zheng u64 ref_offset;
9275d4f98a2SYan Zheng ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
9285d4f98a2SYan Zheng if (parent > 0) {
9295d4f98a2SYan Zheng if (parent == ref_offset) {
9305d4f98a2SYan Zheng err = 0;
9315d4f98a2SYan Zheng break;
9325d4f98a2SYan Zheng }
9335d4f98a2SYan Zheng if (ref_offset < parent)
9345d4f98a2SYan Zheng break;
9355d4f98a2SYan Zheng } else {
9365d4f98a2SYan Zheng if (root_objectid == ref_offset) {
9375d4f98a2SYan Zheng err = 0;
9385d4f98a2SYan Zheng break;
9395d4f98a2SYan Zheng }
9405d4f98a2SYan Zheng if (ref_offset < root_objectid)
9415d4f98a2SYan Zheng break;
9425d4f98a2SYan Zheng }
9435d4f98a2SYan Zheng }
9445d4f98a2SYan Zheng ptr += btrfs_extent_inline_ref_size(type);
9455d4f98a2SYan Zheng }
9465d4f98a2SYan Zheng if (err == -ENOENT && insert) {
9475d4f98a2SYan Zheng if (item_size + extra_size >=
9485d4f98a2SYan Zheng BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
9495d4f98a2SYan Zheng err = -EAGAIN;
9505d4f98a2SYan Zheng goto out;
9515d4f98a2SYan Zheng }
9525d4f98a2SYan Zheng /*
9535d4f98a2SYan Zheng * To add new inline back ref, we have to make sure
9545d4f98a2SYan Zheng * there is no corresponding back ref item.
9555d4f98a2SYan Zheng * For simplicity, we just do not add new inline back
9565d4f98a2SYan Zheng * ref if there is any kind of item for this block
9575d4f98a2SYan Zheng */
9582c47e605SYan Zheng if (find_next_key(path, 0, &key) == 0 &&
9592c47e605SYan Zheng key.objectid == bytenr &&
96085d4198eSYan Zheng key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
9615d4f98a2SYan Zheng err = -EAGAIN;
9625d4f98a2SYan Zheng goto out;
9635d4f98a2SYan Zheng }
9645d4f98a2SYan Zheng }
9655d4f98a2SYan Zheng *ref_ret = (struct btrfs_extent_inline_ref *)ptr;
9665d4f98a2SYan Zheng out:
96785d4198eSYan Zheng if (insert) {
9685d4f98a2SYan Zheng path->keep_locks = 0;
9699a664971Sethanwu path->search_for_extension = 0;
9705d4f98a2SYan Zheng btrfs_unlock_up_safe(path, 1);
9715d4f98a2SYan Zheng }
9725d4f98a2SYan Zheng return err;
9735d4f98a2SYan Zheng }
9745d4f98a2SYan Zheng
9755d4f98a2SYan Zheng /*
9765d4f98a2SYan Zheng * helper to add new inline back ref
9775d4f98a2SYan Zheng */
9785d4f98a2SYan Zheng static noinline_for_stack
setup_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)979d5e09e38SFilipe Manana void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
9805d4f98a2SYan Zheng struct btrfs_path *path,
9815d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref,
9825d4f98a2SYan Zheng u64 parent, u64 root_objectid,
9835d4f98a2SYan Zheng u64 owner, u64 offset, int refs_to_add,
9845d4f98a2SYan Zheng struct btrfs_delayed_extent_op *extent_op)
9855d4f98a2SYan Zheng {
9865d4f98a2SYan Zheng struct extent_buffer *leaf;
9875d4f98a2SYan Zheng struct btrfs_extent_item *ei;
9885d4f98a2SYan Zheng unsigned long ptr;
9895d4f98a2SYan Zheng unsigned long end;
9905d4f98a2SYan Zheng unsigned long item_offset;
9915d4f98a2SYan Zheng u64 refs;
9925d4f98a2SYan Zheng int size;
9935d4f98a2SYan Zheng int type;
9945d4f98a2SYan Zheng
9955d4f98a2SYan Zheng leaf = path->nodes[0];
9965d4f98a2SYan Zheng ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
9975d4f98a2SYan Zheng item_offset = (unsigned long)iref - (unsigned long)ei;
9985d4f98a2SYan Zheng
9995d4f98a2SYan Zheng type = extent_ref_type(parent, owner);
10005d4f98a2SYan Zheng size = btrfs_extent_inline_ref_size(type);
10015d4f98a2SYan Zheng
1002d5e09e38SFilipe Manana btrfs_extend_item(trans, path, size);
10035d4f98a2SYan Zheng
10045d4f98a2SYan Zheng ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
10055d4f98a2SYan Zheng refs = btrfs_extent_refs(leaf, ei);
10065d4f98a2SYan Zheng refs += refs_to_add;
10075d4f98a2SYan Zheng btrfs_set_extent_refs(leaf, ei, refs);
10085d4f98a2SYan Zheng if (extent_op)
10095d4f98a2SYan Zheng __run_delayed_extent_op(extent_op, leaf, ei);
10105d4f98a2SYan Zheng
10115d4f98a2SYan Zheng ptr = (unsigned long)ei + item_offset;
10123212fa14SJosef Bacik end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
10135d4f98a2SYan Zheng if (ptr < end - size)
10145d4f98a2SYan Zheng memmove_extent_buffer(leaf, ptr + size, ptr,
10155d4f98a2SYan Zheng end - size - ptr);
10165d4f98a2SYan Zheng
10175d4f98a2SYan Zheng iref = (struct btrfs_extent_inline_ref *)ptr;
10185d4f98a2SYan Zheng btrfs_set_extent_inline_ref_type(leaf, iref, type);
10195d4f98a2SYan Zheng if (type == BTRFS_EXTENT_DATA_REF_KEY) {
10205d4f98a2SYan Zheng struct btrfs_extent_data_ref *dref;
10215d4f98a2SYan Zheng dref = (struct btrfs_extent_data_ref *)(&iref->offset);
10225d4f98a2SYan Zheng btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
10235d4f98a2SYan Zheng btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
10245d4f98a2SYan Zheng btrfs_set_extent_data_ref_offset(leaf, dref, offset);
10255d4f98a2SYan Zheng btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
10265d4f98a2SYan Zheng } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
10275d4f98a2SYan Zheng struct btrfs_shared_data_ref *sref;
10285d4f98a2SYan Zheng sref = (struct btrfs_shared_data_ref *)(iref + 1);
10295d4f98a2SYan Zheng btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
10305d4f98a2SYan Zheng btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
10315d4f98a2SYan Zheng } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
10325d4f98a2SYan Zheng btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
10335d4f98a2SYan Zheng } else {
10345d4f98a2SYan Zheng btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
10355d4f98a2SYan Zheng }
1036d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
10375d4f98a2SYan Zheng }
10385d4f98a2SYan Zheng
lookup_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_extent_inline_ref ** ref_ret,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset)10395d4f98a2SYan Zheng static int lookup_extent_backref(struct btrfs_trans_handle *trans,
10405d4f98a2SYan Zheng struct btrfs_path *path,
10415d4f98a2SYan Zheng struct btrfs_extent_inline_ref **ref_ret,
10425d4f98a2SYan Zheng u64 bytenr, u64 num_bytes, u64 parent,
10435d4f98a2SYan Zheng u64 root_objectid, u64 owner, u64 offset)
10445d4f98a2SYan Zheng {
10455d4f98a2SYan Zheng int ret;
10465d4f98a2SYan Zheng
1047867cc1fbSNikolay Borisov ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
1048867cc1fbSNikolay Borisov num_bytes, parent, root_objectid,
1049867cc1fbSNikolay Borisov owner, offset, 0);
10505d4f98a2SYan Zheng if (ret != -ENOENT)
10515d4f98a2SYan Zheng return ret;
10525d4f98a2SYan Zheng
1053b3b4aa74SDavid Sterba btrfs_release_path(path);
10545d4f98a2SYan Zheng *ref_ret = NULL;
10555d4f98a2SYan Zheng
10565d4f98a2SYan Zheng if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1057b8582eeaSNikolay Borisov ret = lookup_tree_block_ref(trans, path, bytenr, parent,
1058b8582eeaSNikolay Borisov root_objectid);
10595d4f98a2SYan Zheng } else {
1060bd1d53efSNikolay Borisov ret = lookup_extent_data_ref(trans, path, bytenr, parent,
1061bd1d53efSNikolay Borisov root_objectid, owner, offset);
10625d4f98a2SYan Zheng }
10635d4f98a2SYan Zheng return ret;
10645d4f98a2SYan Zheng }
10655d4f98a2SYan Zheng
10665d4f98a2SYan Zheng /*
10675d4f98a2SYan Zheng * helper to update/remove inline back ref
10685d4f98a2SYan Zheng */
update_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,int refs_to_mod,struct btrfs_delayed_extent_op * extent_op)1069d5e09e38SFilipe Manana static noinline_for_stack int update_inline_extent_backref(
1070d5e09e38SFilipe Manana struct btrfs_trans_handle *trans,
1071d5e09e38SFilipe Manana struct btrfs_path *path,
10725d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref,
10735d4f98a2SYan Zheng int refs_to_mod,
10745b2a54bbSJosef Bacik struct btrfs_delayed_extent_op *extent_op)
10755d4f98a2SYan Zheng {
107661a18f1cSNikolay Borisov struct extent_buffer *leaf = path->nodes[0];
107725761430SQu Wenruo struct btrfs_fs_info *fs_info = leaf->fs_info;
10785d4f98a2SYan Zheng struct btrfs_extent_item *ei;
10795d4f98a2SYan Zheng struct btrfs_extent_data_ref *dref = NULL;
10805d4f98a2SYan Zheng struct btrfs_shared_data_ref *sref = NULL;
10815d4f98a2SYan Zheng unsigned long ptr;
10825d4f98a2SYan Zheng unsigned long end;
10835d4f98a2SYan Zheng u32 item_size;
10845d4f98a2SYan Zheng int size;
10855d4f98a2SYan Zheng int type;
10865d4f98a2SYan Zheng u64 refs;
10875d4f98a2SYan Zheng
10885d4f98a2SYan Zheng ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
10895d4f98a2SYan Zheng refs = btrfs_extent_refs(leaf, ei);
109025761430SQu Wenruo if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) {
109125761430SQu Wenruo struct btrfs_key key;
109225761430SQu Wenruo u32 extent_size;
109325761430SQu Wenruo
109425761430SQu Wenruo btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
109525761430SQu Wenruo if (key.type == BTRFS_METADATA_ITEM_KEY)
109625761430SQu Wenruo extent_size = fs_info->nodesize;
109725761430SQu Wenruo else
109825761430SQu Wenruo extent_size = key.offset;
109925761430SQu Wenruo btrfs_print_leaf(leaf);
110025761430SQu Wenruo btrfs_err(fs_info,
110125761430SQu Wenruo "invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
110225761430SQu Wenruo key.objectid, extent_size, refs_to_mod, refs);
110325761430SQu Wenruo return -EUCLEAN;
110425761430SQu Wenruo }
11055d4f98a2SYan Zheng refs += refs_to_mod;
11065d4f98a2SYan Zheng btrfs_set_extent_refs(leaf, ei, refs);
11075d4f98a2SYan Zheng if (extent_op)
11085d4f98a2SYan Zheng __run_delayed_extent_op(extent_op, leaf, ei);
11095d4f98a2SYan Zheng
11103de28d57SLiu Bo type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
111125761430SQu Wenruo /*
111225761430SQu Wenruo * Function btrfs_get_extent_inline_ref_type() has already printed
111325761430SQu Wenruo * error messages.
111425761430SQu Wenruo */
111525761430SQu Wenruo if (unlikely(type == BTRFS_REF_TYPE_INVALID))
111625761430SQu Wenruo return -EUCLEAN;
11175d4f98a2SYan Zheng
11185d4f98a2SYan Zheng if (type == BTRFS_EXTENT_DATA_REF_KEY) {
11195d4f98a2SYan Zheng dref = (struct btrfs_extent_data_ref *)(&iref->offset);
11205d4f98a2SYan Zheng refs = btrfs_extent_data_ref_count(leaf, dref);
11215d4f98a2SYan Zheng } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
11225d4f98a2SYan Zheng sref = (struct btrfs_shared_data_ref *)(iref + 1);
11235d4f98a2SYan Zheng refs = btrfs_shared_data_ref_count(leaf, sref);
11245d4f98a2SYan Zheng } else {
11255d4f98a2SYan Zheng refs = 1;
112625761430SQu Wenruo /*
112725761430SQu Wenruo * For tree blocks we can only drop one ref for it, and tree
112825761430SQu Wenruo * blocks should not have refs > 1.
112925761430SQu Wenruo *
113025761430SQu Wenruo * Furthermore if we're inserting a new inline backref, we
113125761430SQu Wenruo * won't reach this path either. That would be
113225761430SQu Wenruo * setup_inline_extent_backref().
113325761430SQu Wenruo */
113425761430SQu Wenruo if (unlikely(refs_to_mod != -1)) {
113525761430SQu Wenruo struct btrfs_key key;
113625761430SQu Wenruo
113725761430SQu Wenruo btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
113825761430SQu Wenruo
113925761430SQu Wenruo btrfs_print_leaf(leaf);
114025761430SQu Wenruo btrfs_err(fs_info,
114125761430SQu Wenruo "invalid refs_to_mod for tree block %llu, has %d expect -1",
114225761430SQu Wenruo key.objectid, refs_to_mod);
114325761430SQu Wenruo return -EUCLEAN;
114425761430SQu Wenruo }
11455d4f98a2SYan Zheng }
11465d4f98a2SYan Zheng
114725761430SQu Wenruo if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) {
114825761430SQu Wenruo struct btrfs_key key;
114925761430SQu Wenruo u32 extent_size;
115025761430SQu Wenruo
115125761430SQu Wenruo btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
115225761430SQu Wenruo if (key.type == BTRFS_METADATA_ITEM_KEY)
115325761430SQu Wenruo extent_size = fs_info->nodesize;
115425761430SQu Wenruo else
115525761430SQu Wenruo extent_size = key.offset;
115625761430SQu Wenruo btrfs_print_leaf(leaf);
115725761430SQu Wenruo btrfs_err(fs_info,
115825761430SQu Wenruo "invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu",
115925761430SQu Wenruo (unsigned long)iref, key.objectid, extent_size,
116025761430SQu Wenruo refs_to_mod, refs);
116125761430SQu Wenruo return -EUCLEAN;
116225761430SQu Wenruo }
11635d4f98a2SYan Zheng refs += refs_to_mod;
11645d4f98a2SYan Zheng
11655d4f98a2SYan Zheng if (refs > 0) {
11665d4f98a2SYan Zheng if (type == BTRFS_EXTENT_DATA_REF_KEY)
11675d4f98a2SYan Zheng btrfs_set_extent_data_ref_count(leaf, dref, refs);
11685d4f98a2SYan Zheng else
11695d4f98a2SYan Zheng btrfs_set_shared_data_ref_count(leaf, sref, refs);
11705d4f98a2SYan Zheng } else {
11715d4f98a2SYan Zheng size = btrfs_extent_inline_ref_size(type);
11723212fa14SJosef Bacik item_size = btrfs_item_size(leaf, path->slots[0]);
11735d4f98a2SYan Zheng ptr = (unsigned long)iref;
11745d4f98a2SYan Zheng end = (unsigned long)ei + item_size;
11755d4f98a2SYan Zheng if (ptr + size < end)
11765d4f98a2SYan Zheng memmove_extent_buffer(leaf, ptr, ptr + size,
11775d4f98a2SYan Zheng end - ptr - size);
11785d4f98a2SYan Zheng item_size -= size;
1179d5e09e38SFilipe Manana btrfs_truncate_item(trans, path, item_size, 1);
11805d4f98a2SYan Zheng }
1181d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
118225761430SQu Wenruo return 0;
11835d4f98a2SYan Zheng }
11845d4f98a2SYan Zheng
11855d4f98a2SYan Zheng static noinline_for_stack
insert_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_path * path,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)11865d4f98a2SYan Zheng int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
11875d4f98a2SYan Zheng struct btrfs_path *path,
11885d4f98a2SYan Zheng u64 bytenr, u64 num_bytes, u64 parent,
11895d4f98a2SYan Zheng u64 root_objectid, u64 owner,
11905d4f98a2SYan Zheng u64 offset, int refs_to_add,
11915d4f98a2SYan Zheng struct btrfs_delayed_extent_op *extent_op)
11925d4f98a2SYan Zheng {
11935d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref;
11945d4f98a2SYan Zheng int ret;
11955d4f98a2SYan Zheng
1196867cc1fbSNikolay Borisov ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
1197867cc1fbSNikolay Borisov num_bytes, parent, root_objectid,
1198867cc1fbSNikolay Borisov owner, offset, 1);
11995d4f98a2SYan Zheng if (ret == 0) {
120007cce5cfSQu Wenruo /*
120107cce5cfSQu Wenruo * We're adding refs to a tree block we already own, this
120207cce5cfSQu Wenruo * should not happen at all.
120307cce5cfSQu Wenruo */
120407cce5cfSQu Wenruo if (owner < BTRFS_FIRST_FREE_OBJECTID) {
120507cce5cfSQu Wenruo btrfs_print_leaf(path->nodes[0]);
1206eee3b811SQu Wenruo btrfs_crit(trans->fs_info,
1207eee3b811SQu Wenruo "adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
1208eee3b811SQu Wenruo bytenr, num_bytes, root_objectid, path->slots[0]);
120907cce5cfSQu Wenruo return -EUCLEAN;
121007cce5cfSQu Wenruo }
1211d5e09e38SFilipe Manana ret = update_inline_extent_backref(trans, path, iref,
1212d5e09e38SFilipe Manana refs_to_add, extent_op);
12135d4f98a2SYan Zheng } else if (ret == -ENOENT) {
1214d5e09e38SFilipe Manana setup_inline_extent_backref(trans, path, iref, parent,
1215143bede5SJeff Mahoney root_objectid, owner, offset,
1216143bede5SJeff Mahoney refs_to_add, extent_op);
1217143bede5SJeff Mahoney ret = 0;
12185d4f98a2SYan Zheng }
12195d4f98a2SYan Zheng return ret;
12205d4f98a2SYan Zheng }
12215d4f98a2SYan Zheng
remove_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,int refs_to_drop,int is_data)12225d4f98a2SYan Zheng static int remove_extent_backref(struct btrfs_trans_handle *trans,
122376d76e78SJosef Bacik struct btrfs_root *root,
12245d4f98a2SYan Zheng struct btrfs_path *path,
12255d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref,
12265b2a54bbSJosef Bacik int refs_to_drop, int is_data)
12275d4f98a2SYan Zheng {
1228143bede5SJeff Mahoney int ret = 0;
12295d4f98a2SYan Zheng
12305d4f98a2SYan Zheng BUG_ON(!is_data && refs_to_drop != 1);
12315b2a54bbSJosef Bacik if (iref)
1232d5e09e38SFilipe Manana ret = update_inline_extent_backref(trans, path, iref,
1233d5e09e38SFilipe Manana -refs_to_drop, NULL);
12345b2a54bbSJosef Bacik else if (is_data)
12355b2a54bbSJosef Bacik ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
12365b2a54bbSJosef Bacik else
123776d76e78SJosef Bacik ret = btrfs_del_item(trans, root, path);
123831840ae1SZheng Yan return ret;
123931840ae1SZheng Yan }
124031840ae1SZheng Yan
btrfs_issue_discard(struct block_device * bdev,u64 start,u64 len,u64 * discarded_bytes)1241d04c6b88SJeff Mahoney static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
1242d04c6b88SJeff Mahoney u64 *discarded_bytes)
124315916de8SChris Mason {
124486557861SJeff Mahoney int j, ret = 0;
124586557861SJeff Mahoney u64 bytes_left, end;
1246adbe7e38SAnand Jain u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
12474d89d377SJeff Mahoney
1248be725b06SDavid Sterba /* Adjust the range to be aligned to 512B sectors if necessary. */
1249be725b06SDavid Sterba if (start != aligned_start) {
12504d89d377SJeff Mahoney len -= aligned_start - start;
1251adbe7e38SAnand Jain len = round_down(len, 1 << SECTOR_SHIFT);
12524d89d377SJeff Mahoney start = aligned_start;
12534d89d377SJeff Mahoney }
1254d04c6b88SJeff Mahoney
1255d04c6b88SJeff Mahoney *discarded_bytes = 0;
125686557861SJeff Mahoney
125786557861SJeff Mahoney if (!len)
125886557861SJeff Mahoney return 0;
125986557861SJeff Mahoney
126086557861SJeff Mahoney end = start + len;
126186557861SJeff Mahoney bytes_left = len;
126286557861SJeff Mahoney
126386557861SJeff Mahoney /* Skip any superblocks on this device. */
126486557861SJeff Mahoney for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
126586557861SJeff Mahoney u64 sb_start = btrfs_sb_offset(j);
126686557861SJeff Mahoney u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
126786557861SJeff Mahoney u64 size = sb_start - start;
126886557861SJeff Mahoney
126986557861SJeff Mahoney if (!in_range(sb_start, start, bytes_left) &&
127086557861SJeff Mahoney !in_range(sb_end, start, bytes_left) &&
127186557861SJeff Mahoney !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
127286557861SJeff Mahoney continue;
127386557861SJeff Mahoney
127486557861SJeff Mahoney /*
127586557861SJeff Mahoney * Superblock spans beginning of range. Adjust start and
127686557861SJeff Mahoney * try again.
127786557861SJeff Mahoney */
127886557861SJeff Mahoney if (sb_start <= start) {
127986557861SJeff Mahoney start += sb_end - start;
128086557861SJeff Mahoney if (start > end) {
128186557861SJeff Mahoney bytes_left = 0;
128286557861SJeff Mahoney break;
128386557861SJeff Mahoney }
128486557861SJeff Mahoney bytes_left = end - start;
128586557861SJeff Mahoney continue;
128686557861SJeff Mahoney }
128786557861SJeff Mahoney
128886557861SJeff Mahoney if (size) {
128929e70be2SAnand Jain ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
129029e70be2SAnand Jain size >> SECTOR_SHIFT,
129144abff2cSChristoph Hellwig GFP_NOFS);
1292d04c6b88SJeff Mahoney if (!ret)
129386557861SJeff Mahoney *discarded_bytes += size;
129486557861SJeff Mahoney else if (ret != -EOPNOTSUPP)
129586557861SJeff Mahoney return ret;
129686557861SJeff Mahoney }
129786557861SJeff Mahoney
129886557861SJeff Mahoney start = sb_end;
129986557861SJeff Mahoney if (start > end) {
130086557861SJeff Mahoney bytes_left = 0;
130186557861SJeff Mahoney break;
130286557861SJeff Mahoney }
130386557861SJeff Mahoney bytes_left = end - start;
130486557861SJeff Mahoney }
130586557861SJeff Mahoney
130686557861SJeff Mahoney if (bytes_left) {
130729e70be2SAnand Jain ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
130829e70be2SAnand Jain bytes_left >> SECTOR_SHIFT,
130944abff2cSChristoph Hellwig GFP_NOFS);
131086557861SJeff Mahoney if (!ret)
131186557861SJeff Mahoney *discarded_bytes += bytes_left;
13124d89d377SJeff Mahoney }
1313d04c6b88SJeff Mahoney return ret;
131415916de8SChris Mason }
131515916de8SChris Mason
do_discard_extent(struct btrfs_discard_stripe * stripe,u64 * bytes)1316a4012f06SChristoph Hellwig static int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
13176143c23cSNaohiro Aota {
13186143c23cSNaohiro Aota struct btrfs_device *dev = stripe->dev;
13196143c23cSNaohiro Aota struct btrfs_fs_info *fs_info = dev->fs_info;
13206143c23cSNaohiro Aota struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
13216143c23cSNaohiro Aota u64 phys = stripe->physical;
13226143c23cSNaohiro Aota u64 len = stripe->length;
13236143c23cSNaohiro Aota u64 discarded = 0;
13246143c23cSNaohiro Aota int ret = 0;
13256143c23cSNaohiro Aota
13266143c23cSNaohiro Aota /* Zone reset on a zoned filesystem */
13276143c23cSNaohiro Aota if (btrfs_can_zone_reset(dev, phys, len)) {
13286143c23cSNaohiro Aota u64 src_disc;
13296143c23cSNaohiro Aota
13306143c23cSNaohiro Aota ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
13316143c23cSNaohiro Aota if (ret)
13326143c23cSNaohiro Aota goto out;
13336143c23cSNaohiro Aota
13346143c23cSNaohiro Aota if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
13356143c23cSNaohiro Aota dev != dev_replace->srcdev)
13366143c23cSNaohiro Aota goto out;
13376143c23cSNaohiro Aota
13386143c23cSNaohiro Aota src_disc = discarded;
13396143c23cSNaohiro Aota
13406143c23cSNaohiro Aota /* Send to replace target as well */
13416143c23cSNaohiro Aota ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
13426143c23cSNaohiro Aota &discarded);
13436143c23cSNaohiro Aota discarded += src_disc;
134470200574SChristoph Hellwig } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
13456143c23cSNaohiro Aota ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
13466143c23cSNaohiro Aota } else {
13476143c23cSNaohiro Aota ret = 0;
13486143c23cSNaohiro Aota *bytes = 0;
13496143c23cSNaohiro Aota }
13506143c23cSNaohiro Aota
13516143c23cSNaohiro Aota out:
13526143c23cSNaohiro Aota *bytes = discarded;
13536143c23cSNaohiro Aota return ret;
13546143c23cSNaohiro Aota }
13556143c23cSNaohiro Aota
btrfs_discard_extent(struct btrfs_fs_info * fs_info,u64 bytenr,u64 num_bytes,u64 * actual_bytes)13562ff7e61eSJeff Mahoney int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
13575378e607SLi Dongyang u64 num_bytes, u64 *actual_bytes)
13581f3c79a2SLiu Hui {
13596b7faaddSQu Wenruo int ret = 0;
13605378e607SLi Dongyang u64 discarded_bytes = 0;
13616b7faaddSQu Wenruo u64 end = bytenr + num_bytes;
13626b7faaddSQu Wenruo u64 cur = bytenr;
1363e244a0aeSChristoph Hellwig
13642999241dSFilipe Manana /*
1365a4012f06SChristoph Hellwig * Avoid races with device replace and make sure the devices in the
1366a4012f06SChristoph Hellwig * stripes don't go away while we are discarding.
13672999241dSFilipe Manana */
13680b246afaSJeff Mahoney btrfs_bio_counter_inc_blocked(fs_info);
13696b7faaddSQu Wenruo while (cur < end) {
1370a4012f06SChristoph Hellwig struct btrfs_discard_stripe *stripes;
1371a4012f06SChristoph Hellwig unsigned int num_stripes;
13721f3c79a2SLiu Hui int i;
13731f3c79a2SLiu Hui
13746b7faaddSQu Wenruo num_bytes = end - cur;
1375a4012f06SChristoph Hellwig stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
1376a4012f06SChristoph Hellwig if (IS_ERR(stripes)) {
1377a4012f06SChristoph Hellwig ret = PTR_ERR(stripes);
1378a4012f06SChristoph Hellwig if (ret == -EOPNOTSUPP)
1379a4012f06SChristoph Hellwig ret = 0;
1380a4012f06SChristoph Hellwig break;
1381a4012f06SChristoph Hellwig }
13821f3c79a2SLiu Hui
1383a4012f06SChristoph Hellwig for (i = 0; i < num_stripes; i++) {
1384a4012f06SChristoph Hellwig struct btrfs_discard_stripe *stripe = stripes + i;
1385d04c6b88SJeff Mahoney u64 bytes;
138638b5f68eSAnand Jain
1387a4012f06SChristoph Hellwig if (!stripe->dev->bdev) {
1388627e0873SFilipe Manana ASSERT(btrfs_test_opt(fs_info, DEGRADED));
1389627e0873SFilipe Manana continue;
1390627e0873SFilipe Manana }
1391dcba6e48SNaohiro Aota
1392a4012f06SChristoph Hellwig if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
1393a4012f06SChristoph Hellwig &stripe->dev->dev_state))
13945e753a81SAnand Jain continue;
13955e753a81SAnand Jain
13966143c23cSNaohiro Aota ret = do_discard_extent(stripe, &bytes);
1397a4012f06SChristoph Hellwig if (ret) {
13986b7faaddSQu Wenruo /*
1399a4012f06SChristoph Hellwig * Keep going if discard is not supported by the
1400a4012f06SChristoph Hellwig * device.
14016b7faaddSQu Wenruo */
1402a4012f06SChristoph Hellwig if (ret != -EOPNOTSUPP)
1403a4012f06SChristoph Hellwig break;
1404d5e2003cSJosef Bacik ret = 0;
1405a4012f06SChristoph Hellwig } else {
1406a4012f06SChristoph Hellwig discarded_bytes += bytes;
14071f3c79a2SLiu Hui }
1408a4012f06SChristoph Hellwig }
1409a4012f06SChristoph Hellwig kfree(stripes);
1410a4012f06SChristoph Hellwig if (ret)
1411a4012f06SChristoph Hellwig break;
14126b7faaddSQu Wenruo cur += num_bytes;
14131f3c79a2SLiu Hui }
14140b246afaSJeff Mahoney btrfs_bio_counter_dec(fs_info);
14155378e607SLi Dongyang if (actual_bytes)
14165378e607SLi Dongyang *actual_bytes = discarded_bytes;
14171f3c79a2SLiu Hui return ret;
14181f3c79a2SLiu Hui }
14191f3c79a2SLiu Hui
142079787eaaSJeff Mahoney /* Can return -ENOMEM */
btrfs_inc_extent_ref(struct btrfs_trans_handle * trans,struct btrfs_ref * generic_ref)14215d4f98a2SYan Zheng int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
142282fa113fSQu Wenruo struct btrfs_ref *generic_ref)
142331840ae1SZheng Yan {
142482fa113fSQu Wenruo struct btrfs_fs_info *fs_info = trans->fs_info;
142531840ae1SZheng Yan int ret;
142666d7e7f0SArne Jansen
142782fa113fSQu Wenruo ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
142882fa113fSQu Wenruo generic_ref->action);
142982fa113fSQu Wenruo BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
1430113479d5SNikolay Borisov generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
143131840ae1SZheng Yan
143282fa113fSQu Wenruo if (generic_ref->type == BTRFS_REF_METADATA)
14332187374fSJosef Bacik ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
143482fa113fSQu Wenruo else
14352187374fSJosef Bacik ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
1436d7eae340SOmar Sandoval
143782fa113fSQu Wenruo btrfs_ref_tree_mod(fs_info, generic_ref);
14388a5040f7SQu Wenruo
143931840ae1SZheng Yan return ret;
144031840ae1SZheng Yan }
144131840ae1SZheng Yan
1442bd3c685eSNikolay Borisov /*
1443bd3c685eSNikolay Borisov * __btrfs_inc_extent_ref - insert backreference for a given extent
1444bd3c685eSNikolay Borisov *
144507cce5cfSQu Wenruo * The counterpart is in __btrfs_free_extent(), with examples and more details
144607cce5cfSQu Wenruo * how it works.
144707cce5cfSQu Wenruo *
1448bd3c685eSNikolay Borisov * @trans: Handle of transaction
1449bd3c685eSNikolay Borisov *
1450bd3c685eSNikolay Borisov * @node: The delayed ref node used to get the bytenr/length for
1451bd3c685eSNikolay Borisov * extent whose references are incremented.
1452bd3c685eSNikolay Borisov *
1453bd3c685eSNikolay Borisov * @parent: If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
1454bd3c685eSNikolay Borisov * BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
1455bd3c685eSNikolay Borisov * bytenr of the parent block. Since new extents are always
1456bd3c685eSNikolay Borisov * created with indirect references, this will only be the case
1457bd3c685eSNikolay Borisov * when relocating a shared extent. In that case, root_objectid
14581a9fd417SDavid Sterba * will be BTRFS_TREE_RELOC_OBJECTID. Otherwise, parent must
1459bd3c685eSNikolay Borisov * be 0
1460bd3c685eSNikolay Borisov *
1461bd3c685eSNikolay Borisov * @root_objectid: The id of the root where this modification has originated,
1462bd3c685eSNikolay Borisov * this can be either one of the well-known metadata trees or
1463bd3c685eSNikolay Borisov * the subvolume id which references this extent.
1464bd3c685eSNikolay Borisov *
1465bd3c685eSNikolay Borisov * @owner: For data extents it is the inode number of the owning file.
1466bd3c685eSNikolay Borisov * For metadata extents this parameter holds the level in the
1467bd3c685eSNikolay Borisov * tree of the extent.
1468bd3c685eSNikolay Borisov *
1469bd3c685eSNikolay Borisov * @offset: For metadata extents the offset is ignored and is currently
1470bd3c685eSNikolay Borisov * always passed as 0. For data extents it is the fileoffset
1471bd3c685eSNikolay Borisov * this extent belongs to.
1472bd3c685eSNikolay Borisov *
1473bd3c685eSNikolay Borisov * @refs_to_add Number of references to add
1474bd3c685eSNikolay Borisov *
1475bd3c685eSNikolay Borisov * @extent_op Pointer to a structure, holding information necessary when
1476bd3c685eSNikolay Borisov * updating a tree block's flags
1477bd3c685eSNikolay Borisov *
1478bd3c685eSNikolay Borisov */
__btrfs_inc_extent_ref(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)147931840ae1SZheng Yan static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1480c682f9b3SQu Wenruo struct btrfs_delayed_ref_node *node,
14815d4f98a2SYan Zheng u64 parent, u64 root_objectid,
14825d4f98a2SYan Zheng u64 owner, u64 offset, int refs_to_add,
14835d4f98a2SYan Zheng struct btrfs_delayed_extent_op *extent_op)
148456bec294SChris Mason {
14855caf2a00SChris Mason struct btrfs_path *path;
14865d4f98a2SYan Zheng struct extent_buffer *leaf;
1487234b63a0SChris Mason struct btrfs_extent_item *item;
1488fcebe456SJosef Bacik struct btrfs_key key;
1489c682f9b3SQu Wenruo u64 bytenr = node->bytenr;
1490c682f9b3SQu Wenruo u64 num_bytes = node->num_bytes;
14915d4f98a2SYan Zheng u64 refs;
14925d4f98a2SYan Zheng int ret;
1493037e6390SChris Mason
14945caf2a00SChris Mason path = btrfs_alloc_path();
149554aa1f4dSChris Mason if (!path)
149654aa1f4dSChris Mason return -ENOMEM;
149726b8003fSChris Mason
14985d4f98a2SYan Zheng /* this will setup the path even if it fails to insert the back ref */
1499a639cdebSNikolay Borisov ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
1500a639cdebSNikolay Borisov parent, root_objectid, owner,
1501a639cdebSNikolay Borisov offset, refs_to_add, extent_op);
15020ed4792aSQu Wenruo if ((ret < 0 && ret != -EAGAIN) || !ret)
15035d4f98a2SYan Zheng goto out;
1504fcebe456SJosef Bacik
1505fcebe456SJosef Bacik /*
1506fcebe456SJosef Bacik * Ok we had -EAGAIN which means we didn't have space to insert and
1507fcebe456SJosef Bacik * inline extent ref, so just update the reference count and add a
1508fcebe456SJosef Bacik * normal backref.
1509fcebe456SJosef Bacik */
1510fcebe456SJosef Bacik leaf = path->nodes[0];
1511fcebe456SJosef Bacik btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
15125d4f98a2SYan Zheng item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
15135d4f98a2SYan Zheng refs = btrfs_extent_refs(leaf, item);
15145d4f98a2SYan Zheng btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
15155d4f98a2SYan Zheng if (extent_op)
15165d4f98a2SYan Zheng __run_delayed_extent_op(extent_op, leaf, item);
151731840ae1SZheng Yan
1518d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
1519b3b4aa74SDavid Sterba btrfs_release_path(path);
15207bb86316SChris Mason
152156bec294SChris Mason /* now insert the actual backref */
1522d2f79e63SFilipe Manana if (owner < BTRFS_FIRST_FREE_OBJECTID)
152365cd6d9eSNikolay Borisov ret = insert_tree_block_ref(trans, path, bytenr, parent,
152465cd6d9eSNikolay Borisov root_objectid);
1525d2f79e63SFilipe Manana else
152665cd6d9eSNikolay Borisov ret = insert_extent_data_ref(trans, path, bytenr, parent,
152765cd6d9eSNikolay Borisov root_objectid, owner, offset,
152865cd6d9eSNikolay Borisov refs_to_add);
1529d2f79e63SFilipe Manana
153079787eaaSJeff Mahoney if (ret)
153166642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
15325d4f98a2SYan Zheng out:
153374493f7aSChris Mason btrfs_free_path(path);
153430d133fcSLiu Bo return ret;
153502217ed2SChris Mason }
153602217ed2SChris Mason
run_delayed_data_ref(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,bool insert_reserved)15375d4f98a2SYan Zheng static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
153856bec294SChris Mason struct btrfs_delayed_ref_node *node,
15395d4f98a2SYan Zheng struct btrfs_delayed_extent_op *extent_op,
154061c681feSFilipe Manana bool insert_reserved)
15415d4f98a2SYan Zheng {
15425d4f98a2SYan Zheng int ret = 0;
15435d4f98a2SYan Zheng struct btrfs_delayed_data_ref *ref;
15445d4f98a2SYan Zheng struct btrfs_key ins;
15455d4f98a2SYan Zheng u64 parent = 0;
15465d4f98a2SYan Zheng u64 ref_root = 0;
15475d4f98a2SYan Zheng u64 flags = 0;
15485d4f98a2SYan Zheng
15495d4f98a2SYan Zheng ins.objectid = node->bytenr;
15505d4f98a2SYan Zheng ins.offset = node->num_bytes;
15515d4f98a2SYan Zheng ins.type = BTRFS_EXTENT_ITEM_KEY;
15525d4f98a2SYan Zheng
15535d4f98a2SYan Zheng ref = btrfs_delayed_node_to_data_ref(node);
15542bf98ef3SNikolay Borisov trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
1555599c75ecSLiu Bo
15565d4f98a2SYan Zheng if (node->type == BTRFS_SHARED_DATA_REF_KEY)
15575d4f98a2SYan Zheng parent = ref->parent;
15585d4f98a2SYan Zheng ref_root = ref->root;
15595d4f98a2SYan Zheng
15605d4f98a2SYan Zheng if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
15613173a18fSJosef Bacik if (extent_op)
15625d4f98a2SYan Zheng flags |= extent_op->flags_to_set;
1563ef89b824SNikolay Borisov ret = alloc_reserved_file_extent(trans, parent, ref_root,
1564ef89b824SNikolay Borisov flags, ref->objectid,
1565ef89b824SNikolay Borisov ref->offset, &ins,
1566ef89b824SNikolay Borisov node->ref_mod);
15675d4f98a2SYan Zheng } else if (node->action == BTRFS_ADD_DELAYED_REF) {
15682590d0f1SNikolay Borisov ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
15692590d0f1SNikolay Borisov ref->objectid, ref->offset,
15702590d0f1SNikolay Borisov node->ref_mod, extent_op);
15715d4f98a2SYan Zheng } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1572e72cb923SNikolay Borisov ret = __btrfs_free_extent(trans, node, parent,
15735d4f98a2SYan Zheng ref_root, ref->objectid,
15745d4f98a2SYan Zheng ref->offset, node->ref_mod,
1575c682f9b3SQu Wenruo extent_op);
15765d4f98a2SYan Zheng } else {
15775d4f98a2SYan Zheng BUG();
15785d4f98a2SYan Zheng }
15795d4f98a2SYan Zheng return ret;
15805d4f98a2SYan Zheng }
15815d4f98a2SYan Zheng
__run_delayed_extent_op(struct btrfs_delayed_extent_op * extent_op,struct extent_buffer * leaf,struct btrfs_extent_item * ei)15825d4f98a2SYan Zheng static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
15835d4f98a2SYan Zheng struct extent_buffer *leaf,
15845d4f98a2SYan Zheng struct btrfs_extent_item *ei)
15855d4f98a2SYan Zheng {
15865d4f98a2SYan Zheng u64 flags = btrfs_extent_flags(leaf, ei);
15875d4f98a2SYan Zheng if (extent_op->update_flags) {
15885d4f98a2SYan Zheng flags |= extent_op->flags_to_set;
15895d4f98a2SYan Zheng btrfs_set_extent_flags(leaf, ei, flags);
15905d4f98a2SYan Zheng }
15915d4f98a2SYan Zheng
15925d4f98a2SYan Zheng if (extent_op->update_key) {
15935d4f98a2SYan Zheng struct btrfs_tree_block_info *bi;
15945d4f98a2SYan Zheng BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
15955d4f98a2SYan Zheng bi = (struct btrfs_tree_block_info *)(ei + 1);
15965d4f98a2SYan Zheng btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
15975d4f98a2SYan Zheng }
15985d4f98a2SYan Zheng }
15995d4f98a2SYan Zheng
run_delayed_extent_op(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_head * head,struct btrfs_delayed_extent_op * extent_op)16005d4f98a2SYan Zheng static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1601d278850eSJosef Bacik struct btrfs_delayed_ref_head *head,
16025d4f98a2SYan Zheng struct btrfs_delayed_extent_op *extent_op)
16035d4f98a2SYan Zheng {
160420b9a2d6SNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
160529cbcf40SJosef Bacik struct btrfs_root *root;
16065d4f98a2SYan Zheng struct btrfs_key key;
16075d4f98a2SYan Zheng struct btrfs_path *path;
16085d4f98a2SYan Zheng struct btrfs_extent_item *ei;
16095d4f98a2SYan Zheng struct extent_buffer *leaf;
16105d4f98a2SYan Zheng u32 item_size;
16115d4f98a2SYan Zheng int ret;
16125d4f98a2SYan Zheng int err = 0;
16130e3696f8SDavid Sterba int metadata = 1;
16145d4f98a2SYan Zheng
1615bf31f87fSDavid Sterba if (TRANS_ABORTED(trans))
161679787eaaSJeff Mahoney return 0;
161779787eaaSJeff Mahoney
16180e3696f8SDavid Sterba if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
16193173a18fSJosef Bacik metadata = 0;
16203173a18fSJosef Bacik
16215d4f98a2SYan Zheng path = btrfs_alloc_path();
16225d4f98a2SYan Zheng if (!path)
16235d4f98a2SYan Zheng return -ENOMEM;
16245d4f98a2SYan Zheng
1625d278850eSJosef Bacik key.objectid = head->bytenr;
16263173a18fSJosef Bacik
16273173a18fSJosef Bacik if (metadata) {
16283173a18fSJosef Bacik key.type = BTRFS_METADATA_ITEM_KEY;
1629b1c79e09SJosef Bacik key.offset = extent_op->level;
16303173a18fSJosef Bacik } else {
16315d4f98a2SYan Zheng key.type = BTRFS_EXTENT_ITEM_KEY;
1632d278850eSJosef Bacik key.offset = head->num_bytes;
16333173a18fSJosef Bacik }
16345d4f98a2SYan Zheng
163529cbcf40SJosef Bacik root = btrfs_extent_root(fs_info, key.objectid);
16363173a18fSJosef Bacik again:
163729cbcf40SJosef Bacik ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
16385d4f98a2SYan Zheng if (ret < 0) {
16395d4f98a2SYan Zheng err = ret;
16405d4f98a2SYan Zheng goto out;
16415d4f98a2SYan Zheng }
16425d4f98a2SYan Zheng if (ret > 0) {
16433173a18fSJosef Bacik if (metadata) {
164455994887SFilipe David Borba Manana if (path->slots[0] > 0) {
164555994887SFilipe David Borba Manana path->slots[0]--;
164655994887SFilipe David Borba Manana btrfs_item_key_to_cpu(path->nodes[0], &key,
164755994887SFilipe David Borba Manana path->slots[0]);
1648d278850eSJosef Bacik if (key.objectid == head->bytenr &&
164955994887SFilipe David Borba Manana key.type == BTRFS_EXTENT_ITEM_KEY &&
1650d278850eSJosef Bacik key.offset == head->num_bytes)
165155994887SFilipe David Borba Manana ret = 0;
165255994887SFilipe David Borba Manana }
165355994887SFilipe David Borba Manana if (ret > 0) {
16543173a18fSJosef Bacik btrfs_release_path(path);
16553173a18fSJosef Bacik metadata = 0;
16563173a18fSJosef Bacik
1657d278850eSJosef Bacik key.objectid = head->bytenr;
1658d278850eSJosef Bacik key.offset = head->num_bytes;
16593173a18fSJosef Bacik key.type = BTRFS_EXTENT_ITEM_KEY;
16603173a18fSJosef Bacik goto again;
16613173a18fSJosef Bacik }
166255994887SFilipe David Borba Manana } else {
16638ec0a4a5SFilipe Manana err = -EUCLEAN;
16648ec0a4a5SFilipe Manana btrfs_err(fs_info,
16658ec0a4a5SFilipe Manana "missing extent item for extent %llu num_bytes %llu level %d",
16668ec0a4a5SFilipe Manana head->bytenr, head->num_bytes, extent_op->level);
16675d4f98a2SYan Zheng goto out;
16685d4f98a2SYan Zheng }
166955994887SFilipe David Borba Manana }
16705d4f98a2SYan Zheng
16715d4f98a2SYan Zheng leaf = path->nodes[0];
16723212fa14SJosef Bacik item_size = btrfs_item_size(leaf, path->slots[0]);
1673ba3c2b19SNikolay Borisov
16746d8ff4e4SDavid Sterba if (unlikely(item_size < sizeof(*ei))) {
1675182741d2SQu Wenruo err = -EUCLEAN;
1676182741d2SQu Wenruo btrfs_err(fs_info,
1677182741d2SQu Wenruo "unexpected extent item size, has %u expect >= %zu",
1678182741d2SQu Wenruo item_size, sizeof(*ei));
1679ba3c2b19SNikolay Borisov btrfs_abort_transaction(trans, err);
1680ba3c2b19SNikolay Borisov goto out;
1681ba3c2b19SNikolay Borisov }
1682ba3c2b19SNikolay Borisov
16835d4f98a2SYan Zheng ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
16845d4f98a2SYan Zheng __run_delayed_extent_op(extent_op, leaf, ei);
16855d4f98a2SYan Zheng
1686d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
16875d4f98a2SYan Zheng out:
16885d4f98a2SYan Zheng btrfs_free_path(path);
16895d4f98a2SYan Zheng return err;
16905d4f98a2SYan Zheng }
16915d4f98a2SYan Zheng
run_delayed_tree_ref(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,bool insert_reserved)16925d4f98a2SYan Zheng static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
16935d4f98a2SYan Zheng struct btrfs_delayed_ref_node *node,
16945d4f98a2SYan Zheng struct btrfs_delayed_extent_op *extent_op,
169561c681feSFilipe Manana bool insert_reserved)
16965d4f98a2SYan Zheng {
16975d4f98a2SYan Zheng int ret = 0;
16985d4f98a2SYan Zheng struct btrfs_delayed_tree_ref *ref;
16995d4f98a2SYan Zheng u64 parent = 0;
17005d4f98a2SYan Zheng u64 ref_root = 0;
17015d4f98a2SYan Zheng
17025d4f98a2SYan Zheng ref = btrfs_delayed_node_to_tree_ref(node);
1703f97806f2SNikolay Borisov trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
1704599c75ecSLiu Bo
17055d4f98a2SYan Zheng if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
17065d4f98a2SYan Zheng parent = ref->parent;
17075d4f98a2SYan Zheng ref_root = ref->root;
17085d4f98a2SYan Zheng
17091bf76df3SFilipe Manana if (unlikely(node->ref_mod != 1)) {
1710f97806f2SNikolay Borisov btrfs_err(trans->fs_info,
17111bf76df3SFilipe Manana "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
171202794222SLiu Bo node->bytenr, node->ref_mod, node->action, ref_root,
171302794222SLiu Bo parent);
17141bf76df3SFilipe Manana return -EUCLEAN;
171502794222SLiu Bo }
17165d4f98a2SYan Zheng if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
17173173a18fSJosef Bacik BUG_ON(!extent_op || !extent_op->update_flags);
171821ebfbe7SNikolay Borisov ret = alloc_reserved_tree_block(trans, node, extent_op);
17195d4f98a2SYan Zheng } else if (node->action == BTRFS_ADD_DELAYED_REF) {
17202590d0f1SNikolay Borisov ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
17212590d0f1SNikolay Borisov ref->level, 0, 1, extent_op);
17225d4f98a2SYan Zheng } else if (node->action == BTRFS_DROP_DELAYED_REF) {
1723e72cb923SNikolay Borisov ret = __btrfs_free_extent(trans, node, parent, ref_root,
1724c682f9b3SQu Wenruo ref->level, 0, 1, extent_op);
17255d4f98a2SYan Zheng } else {
17265d4f98a2SYan Zheng BUG();
17275d4f98a2SYan Zheng }
17285d4f98a2SYan Zheng return ret;
17295d4f98a2SYan Zheng }
17305d4f98a2SYan Zheng
17315d4f98a2SYan Zheng /* helper function to actually process a single delayed ref entry */
run_one_delayed_ref(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,bool insert_reserved)17325d4f98a2SYan Zheng static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
17335d4f98a2SYan Zheng struct btrfs_delayed_ref_node *node,
17345d4f98a2SYan Zheng struct btrfs_delayed_extent_op *extent_op,
173561c681feSFilipe Manana bool insert_reserved)
173656bec294SChris Mason {
173779787eaaSJeff Mahoney int ret = 0;
173879787eaaSJeff Mahoney
1739bf31f87fSDavid Sterba if (TRANS_ABORTED(trans)) {
1740857cc2fcSJosef Bacik if (insert_reserved)
1741b25c36f8SNikolay Borisov btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
174279787eaaSJeff Mahoney return 0;
1743857cc2fcSJosef Bacik }
174479787eaaSJeff Mahoney
17455d4f98a2SYan Zheng if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
17465d4f98a2SYan Zheng node->type == BTRFS_SHARED_BLOCK_REF_KEY)
1747f97806f2SNikolay Borisov ret = run_delayed_tree_ref(trans, node, extent_op,
17485d4f98a2SYan Zheng insert_reserved);
17495d4f98a2SYan Zheng else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
17505d4f98a2SYan Zheng node->type == BTRFS_SHARED_DATA_REF_KEY)
17512bf98ef3SNikolay Borisov ret = run_delayed_data_ref(trans, node, extent_op,
17525d4f98a2SYan Zheng insert_reserved);
17535d4f98a2SYan Zheng else
17545d4f98a2SYan Zheng BUG();
175580ee54bfSJosef Bacik if (ret && insert_reserved)
1756b25c36f8SNikolay Borisov btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
175739f501d6SQu Wenruo if (ret < 0)
175839f501d6SQu Wenruo btrfs_err(trans->fs_info,
175939f501d6SQu Wenruo "failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
176039f501d6SQu Wenruo node->bytenr, node->num_bytes, node->type,
176139f501d6SQu Wenruo node->action, node->ref_mod, ret);
17625d4f98a2SYan Zheng return ret;
1763e9d0b13bSChris Mason }
1764e9d0b13bSChris Mason
1765c6fc2454SQu Wenruo static inline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head * head)176656bec294SChris Mason select_delayed_ref(struct btrfs_delayed_ref_head *head)
1767a28ec197SChris Mason {
1768cffc3374SFilipe Manana struct btrfs_delayed_ref_node *ref;
1769cffc3374SFilipe Manana
1770e3d03965SLiu Bo if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
1771c6fc2454SQu Wenruo return NULL;
1772d7df2c79SJosef Bacik
1773cffc3374SFilipe Manana /*
1774cffc3374SFilipe Manana * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
1775cffc3374SFilipe Manana * This is to prevent a ref count from going down to zero, which deletes
1776cffc3374SFilipe Manana * the extent item from the extent tree, when there still are references
1777cffc3374SFilipe Manana * to add, which would fail because they would not find the extent item.
1778cffc3374SFilipe Manana */
17791d57ee94SWang Xiaoguang if (!list_empty(&head->ref_add_list))
17801d57ee94SWang Xiaoguang return list_first_entry(&head->ref_add_list,
17811d57ee94SWang Xiaoguang struct btrfs_delayed_ref_node, add_list);
1782cffc3374SFilipe Manana
1783e3d03965SLiu Bo ref = rb_entry(rb_first_cached(&head->ref_tree),
17840e0adbcfSJosef Bacik struct btrfs_delayed_ref_node, ref_node);
17851d57ee94SWang Xiaoguang ASSERT(list_empty(&ref->add_list));
17861d57ee94SWang Xiaoguang return ref;
178756bec294SChris Mason }
178856bec294SChris Mason
unselect_delayed_ref_head(struct btrfs_delayed_ref_root * delayed_refs,struct btrfs_delayed_ref_head * head)17892eadaa22SJosef Bacik static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
17902eadaa22SJosef Bacik struct btrfs_delayed_ref_head *head)
17912eadaa22SJosef Bacik {
17922eadaa22SJosef Bacik spin_lock(&delayed_refs->lock);
179361c681feSFilipe Manana head->processing = false;
17942eadaa22SJosef Bacik delayed_refs->num_heads_ready++;
17952eadaa22SJosef Bacik spin_unlock(&delayed_refs->lock);
17962eadaa22SJosef Bacik btrfs_delayed_ref_unlock(head);
17972eadaa22SJosef Bacik }
17982eadaa22SJosef Bacik
cleanup_extent_op(struct btrfs_delayed_ref_head * head)1799bedc6617SJosef Bacik static struct btrfs_delayed_extent_op *cleanup_extent_op(
1800b00e6250SJosef Bacik struct btrfs_delayed_ref_head *head)
1801b00e6250SJosef Bacik {
1802b00e6250SJosef Bacik struct btrfs_delayed_extent_op *extent_op = head->extent_op;
1803bedc6617SJosef Bacik
1804bedc6617SJosef Bacik if (!extent_op)
1805bedc6617SJosef Bacik return NULL;
1806bedc6617SJosef Bacik
1807bedc6617SJosef Bacik if (head->must_insert_reserved) {
1808bedc6617SJosef Bacik head->extent_op = NULL;
1809bedc6617SJosef Bacik btrfs_free_delayed_extent_op(extent_op);
1810bedc6617SJosef Bacik return NULL;
1811bedc6617SJosef Bacik }
1812bedc6617SJosef Bacik return extent_op;
1813bedc6617SJosef Bacik }
1814bedc6617SJosef Bacik
run_and_cleanup_extent_op(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_head * head)1815bedc6617SJosef Bacik static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
1816bedc6617SJosef Bacik struct btrfs_delayed_ref_head *head)
1817bedc6617SJosef Bacik {
1818bedc6617SJosef Bacik struct btrfs_delayed_extent_op *extent_op;
1819b00e6250SJosef Bacik int ret;
1820b00e6250SJosef Bacik
1821bedc6617SJosef Bacik extent_op = cleanup_extent_op(head);
1822b00e6250SJosef Bacik if (!extent_op)
1823b00e6250SJosef Bacik return 0;
1824b00e6250SJosef Bacik head->extent_op = NULL;
1825b00e6250SJosef Bacik spin_unlock(&head->lock);
182620b9a2d6SNikolay Borisov ret = run_delayed_extent_op(trans, head, extent_op);
1827b00e6250SJosef Bacik btrfs_free_delayed_extent_op(extent_op);
1828b00e6250SJosef Bacik return ret ? ret : 1;
1829b00e6250SJosef Bacik }
1830b00e6250SJosef Bacik
btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info * fs_info,struct btrfs_delayed_ref_root * delayed_refs,struct btrfs_delayed_ref_head * head)183131890da0SJosef Bacik void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
183231890da0SJosef Bacik struct btrfs_delayed_ref_root *delayed_refs,
183307c47775SJosef Bacik struct btrfs_delayed_ref_head *head)
183407c47775SJosef Bacik {
1835ba2c4d4eSJosef Bacik int nr_items = 1; /* Dropping this ref head update. */
183607c47775SJosef Bacik
1837ba2c4d4eSJosef Bacik /*
183881e75ac7SJosef Bacik * We had csum deletions accounted for in our delayed refs rsv, we need
183981e75ac7SJosef Bacik * to drop the csum leaves for this update from our delayed_refs_rsv.
1840ba2c4d4eSJosef Bacik */
184181e75ac7SJosef Bacik if (head->total_ref_mod < 0 && head->is_data) {
184207c47775SJosef Bacik spin_lock(&delayed_refs->lock);
184307c47775SJosef Bacik delayed_refs->pending_csums -= head->num_bytes;
184407c47775SJosef Bacik spin_unlock(&delayed_refs->lock);
184581e75ac7SJosef Bacik nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
184607c47775SJosef Bacik }
184781e75ac7SJosef Bacik
1848ba2c4d4eSJosef Bacik btrfs_delayed_refs_rsv_release(fs_info, nr_items);
184907c47775SJosef Bacik }
185007c47775SJosef Bacik
cleanup_ref_head(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_head * head)1851194ab0bcSJosef Bacik static int cleanup_ref_head(struct btrfs_trans_handle *trans,
1852194ab0bcSJosef Bacik struct btrfs_delayed_ref_head *head)
1853194ab0bcSJosef Bacik {
1854f9871eddSNikolay Borisov
1855f9871eddSNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
1856194ab0bcSJosef Bacik struct btrfs_delayed_ref_root *delayed_refs;
1857194ab0bcSJosef Bacik int ret;
1858194ab0bcSJosef Bacik
1859194ab0bcSJosef Bacik delayed_refs = &trans->transaction->delayed_refs;
1860194ab0bcSJosef Bacik
1861bedc6617SJosef Bacik ret = run_and_cleanup_extent_op(trans, head);
1862194ab0bcSJosef Bacik if (ret < 0) {
1863194ab0bcSJosef Bacik unselect_delayed_ref_head(delayed_refs, head);
1864194ab0bcSJosef Bacik btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
1865194ab0bcSJosef Bacik return ret;
1866194ab0bcSJosef Bacik } else if (ret) {
1867194ab0bcSJosef Bacik return ret;
1868194ab0bcSJosef Bacik }
1869194ab0bcSJosef Bacik
1870194ab0bcSJosef Bacik /*
1871194ab0bcSJosef Bacik * Need to drop our head ref lock and re-acquire the delayed ref lock
1872194ab0bcSJosef Bacik * and then re-check to make sure nobody got added.
1873194ab0bcSJosef Bacik */
1874194ab0bcSJosef Bacik spin_unlock(&head->lock);
1875194ab0bcSJosef Bacik spin_lock(&delayed_refs->lock);
1876194ab0bcSJosef Bacik spin_lock(&head->lock);
1877e3d03965SLiu Bo if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
1878194ab0bcSJosef Bacik spin_unlock(&head->lock);
1879194ab0bcSJosef Bacik spin_unlock(&delayed_refs->lock);
1880194ab0bcSJosef Bacik return 1;
1881194ab0bcSJosef Bacik }
1882d7baffdaSJosef Bacik btrfs_delete_ref_head(delayed_refs, head);
1883c1103f7aSJosef Bacik spin_unlock(&head->lock);
18841e7a1421SNikolay Borisov spin_unlock(&delayed_refs->lock);
1885c1103f7aSJosef Bacik
1886c1103f7aSJosef Bacik if (head->must_insert_reserved) {
1887b25c36f8SNikolay Borisov btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
1888c1103f7aSJosef Bacik if (head->is_data) {
1889fc28b25eSJosef Bacik struct btrfs_root *csum_root;
1890fc28b25eSJosef Bacik
1891fc28b25eSJosef Bacik csum_root = btrfs_csum_root(fs_info, head->bytenr);
1892fc28b25eSJosef Bacik ret = btrfs_del_csums(trans, csum_root, head->bytenr,
1893fc28b25eSJosef Bacik head->num_bytes);
1894c1103f7aSJosef Bacik }
1895c1103f7aSJosef Bacik }
1896c1103f7aSJosef Bacik
189731890da0SJosef Bacik btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
189807c47775SJosef Bacik
189907c47775SJosef Bacik trace_run_delayed_ref_head(fs_info, head, 0);
1900c1103f7aSJosef Bacik btrfs_delayed_ref_unlock(head);
1901d278850eSJosef Bacik btrfs_put_delayed_ref_head(head);
1902856bd270SJosef Bacik return ret;
1903194ab0bcSJosef Bacik }
1904194ab0bcSJosef Bacik
btrfs_obtain_ref_head(struct btrfs_trans_handle * trans)1905b1cdbcb5SNikolay Borisov static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
1906b1cdbcb5SNikolay Borisov struct btrfs_trans_handle *trans)
1907b1cdbcb5SNikolay Borisov {
1908b1cdbcb5SNikolay Borisov struct btrfs_delayed_ref_root *delayed_refs =
1909b1cdbcb5SNikolay Borisov &trans->transaction->delayed_refs;
1910b1cdbcb5SNikolay Borisov struct btrfs_delayed_ref_head *head = NULL;
1911b1cdbcb5SNikolay Borisov int ret;
1912b1cdbcb5SNikolay Borisov
1913b1cdbcb5SNikolay Borisov spin_lock(&delayed_refs->lock);
19145637c74bSLu Fengqi head = btrfs_select_ref_head(delayed_refs);
1915b1cdbcb5SNikolay Borisov if (!head) {
1916b1cdbcb5SNikolay Borisov spin_unlock(&delayed_refs->lock);
1917b1cdbcb5SNikolay Borisov return head;
1918b1cdbcb5SNikolay Borisov }
1919b1cdbcb5SNikolay Borisov
1920b1cdbcb5SNikolay Borisov /*
1921b1cdbcb5SNikolay Borisov * Grab the lock that says we are going to process all the refs for
1922b1cdbcb5SNikolay Borisov * this head
1923b1cdbcb5SNikolay Borisov */
19249e920a6fSLu Fengqi ret = btrfs_delayed_ref_lock(delayed_refs, head);
1925b1cdbcb5SNikolay Borisov spin_unlock(&delayed_refs->lock);
1926b1cdbcb5SNikolay Borisov
1927b1cdbcb5SNikolay Borisov /*
1928b1cdbcb5SNikolay Borisov * We may have dropped the spin lock to get the head mutex lock, and
1929b1cdbcb5SNikolay Borisov * that might have given someone else time to free the head. If that's
1930b1cdbcb5SNikolay Borisov * true, it has been removed from our list and we can move on.
1931b1cdbcb5SNikolay Borisov */
1932b1cdbcb5SNikolay Borisov if (ret == -EAGAIN)
1933b1cdbcb5SNikolay Borisov head = ERR_PTR(-EAGAIN);
1934b1cdbcb5SNikolay Borisov
1935b1cdbcb5SNikolay Borisov return head;
1936b1cdbcb5SNikolay Borisov }
1937b1cdbcb5SNikolay Borisov
btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_head * locked_ref)1938e7261386SNikolay Borisov static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
1939a8fdc051SFilipe Manana struct btrfs_delayed_ref_head *locked_ref)
1940e7261386SNikolay Borisov {
1941e7261386SNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
1942e7261386SNikolay Borisov struct btrfs_delayed_ref_root *delayed_refs;
1943e7261386SNikolay Borisov struct btrfs_delayed_extent_op *extent_op;
1944e7261386SNikolay Borisov struct btrfs_delayed_ref_node *ref;
194561c681feSFilipe Manana bool must_insert_reserved;
1946e7261386SNikolay Borisov int ret;
1947e7261386SNikolay Borisov
1948e7261386SNikolay Borisov delayed_refs = &trans->transaction->delayed_refs;
1949e7261386SNikolay Borisov
19500110a4c4SNikolay Borisov lockdep_assert_held(&locked_ref->mutex);
19510110a4c4SNikolay Borisov lockdep_assert_held(&locked_ref->lock);
19520110a4c4SNikolay Borisov
1953e7261386SNikolay Borisov while ((ref = select_delayed_ref(locked_ref))) {
1954e7261386SNikolay Borisov if (ref->seq &&
1955e7261386SNikolay Borisov btrfs_check_delayed_seq(fs_info, ref->seq)) {
1956e7261386SNikolay Borisov spin_unlock(&locked_ref->lock);
1957e7261386SNikolay Borisov unselect_delayed_ref_head(delayed_refs, locked_ref);
1958e7261386SNikolay Borisov return -EAGAIN;
1959e7261386SNikolay Borisov }
1960e7261386SNikolay Borisov
1961e7261386SNikolay Borisov rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
1962e7261386SNikolay Borisov RB_CLEAR_NODE(&ref->ref_node);
1963e7261386SNikolay Borisov if (!list_empty(&ref->add_list))
1964e7261386SNikolay Borisov list_del(&ref->add_list);
1965e7261386SNikolay Borisov /*
1966e7261386SNikolay Borisov * When we play the delayed ref, also correct the ref_mod on
1967e7261386SNikolay Borisov * head
1968e7261386SNikolay Borisov */
1969e7261386SNikolay Borisov switch (ref->action) {
1970e7261386SNikolay Borisov case BTRFS_ADD_DELAYED_REF:
1971e7261386SNikolay Borisov case BTRFS_ADD_DELAYED_EXTENT:
1972e7261386SNikolay Borisov locked_ref->ref_mod -= ref->ref_mod;
1973e7261386SNikolay Borisov break;
1974e7261386SNikolay Borisov case BTRFS_DROP_DELAYED_REF:
1975e7261386SNikolay Borisov locked_ref->ref_mod += ref->ref_mod;
1976e7261386SNikolay Borisov break;
1977e7261386SNikolay Borisov default:
1978e7261386SNikolay Borisov WARN_ON(1);
1979e7261386SNikolay Borisov }
1980e7261386SNikolay Borisov atomic_dec(&delayed_refs->num_entries);
1981e7261386SNikolay Borisov
1982e7261386SNikolay Borisov /*
1983e7261386SNikolay Borisov * Record the must_insert_reserved flag before we drop the
1984e7261386SNikolay Borisov * spin lock.
1985e7261386SNikolay Borisov */
1986e7261386SNikolay Borisov must_insert_reserved = locked_ref->must_insert_reserved;
198761c681feSFilipe Manana locked_ref->must_insert_reserved = false;
1988e7261386SNikolay Borisov
1989e7261386SNikolay Borisov extent_op = locked_ref->extent_op;
1990e7261386SNikolay Borisov locked_ref->extent_op = NULL;
1991e7261386SNikolay Borisov spin_unlock(&locked_ref->lock);
1992e7261386SNikolay Borisov
1993e7261386SNikolay Borisov ret = run_one_delayed_ref(trans, ref, extent_op,
1994e7261386SNikolay Borisov must_insert_reserved);
1995e7261386SNikolay Borisov
1996e7261386SNikolay Borisov btrfs_free_delayed_extent_op(extent_op);
1997e7261386SNikolay Borisov if (ret) {
1998e7261386SNikolay Borisov unselect_delayed_ref_head(delayed_refs, locked_ref);
1999e7261386SNikolay Borisov btrfs_put_delayed_ref(ref);
2000e7261386SNikolay Borisov return ret;
2001e7261386SNikolay Borisov }
2002e7261386SNikolay Borisov
2003e7261386SNikolay Borisov btrfs_put_delayed_ref(ref);
2004e7261386SNikolay Borisov cond_resched();
2005e7261386SNikolay Borisov
2006e7261386SNikolay Borisov spin_lock(&locked_ref->lock);
20070c555c97SJohannes Thumshirn btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
2008e7261386SNikolay Borisov }
2009e7261386SNikolay Borisov
2010e7261386SNikolay Borisov return 0;
2011e7261386SNikolay Borisov }
2012e7261386SNikolay Borisov
201379787eaaSJeff Mahoney /*
201479787eaaSJeff Mahoney * Returns 0 on success or if called with an already aborted transaction.
201579787eaaSJeff Mahoney * Returns -ENOMEM or -EIO on failure and will abort the transaction.
201679787eaaSJeff Mahoney */
__btrfs_run_delayed_refs(struct btrfs_trans_handle * trans,unsigned long nr)2017d7df2c79SJosef Bacik static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2018d7df2c79SJosef Bacik unsigned long nr)
201956bec294SChris Mason {
20200a1e458aSNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
202156bec294SChris Mason struct btrfs_delayed_ref_root *delayed_refs;
202256bec294SChris Mason struct btrfs_delayed_ref_head *locked_ref = NULL;
202356bec294SChris Mason int ret;
2024d7df2c79SJosef Bacik unsigned long count = 0;
202556bec294SChris Mason
202656bec294SChris Mason delayed_refs = &trans->transaction->delayed_refs;
20270110a4c4SNikolay Borisov do {
202856bec294SChris Mason if (!locked_ref) {
2029b1cdbcb5SNikolay Borisov locked_ref = btrfs_obtain_ref_head(trans);
20300110a4c4SNikolay Borisov if (IS_ERR_OR_NULL(locked_ref)) {
20310110a4c4SNikolay Borisov if (PTR_ERR(locked_ref) == -EAGAIN) {
2032c3e69d58SChris Mason continue;
20330110a4c4SNikolay Borisov } else {
20340110a4c4SNikolay Borisov break;
203556bec294SChris Mason }
203656bec294SChris Mason }
20370110a4c4SNikolay Borisov count++;
20380110a4c4SNikolay Borisov }
20392c3cf7d5SFilipe Manana /*
20402c3cf7d5SFilipe Manana * We need to try and merge add/drops of the same ref since we
20412c3cf7d5SFilipe Manana * can run into issues with relocate dropping the implicit ref
20422c3cf7d5SFilipe Manana * and then it being added back again before the drop can
20432c3cf7d5SFilipe Manana * finish. If we merged anything we need to re-loop so we can
20442c3cf7d5SFilipe Manana * get a good ref.
20452c3cf7d5SFilipe Manana * Or we can get node references of the same type that weren't
20462c3cf7d5SFilipe Manana * merged when created due to bumps in the tree mod seq, and
20472c3cf7d5SFilipe Manana * we need to merge them to prevent adding an inline extent
20482c3cf7d5SFilipe Manana * backref before dropping it (triggering a BUG_ON at
20492c3cf7d5SFilipe Manana * insert_inline_extent_backref()).
20502c3cf7d5SFilipe Manana */
2051d7df2c79SJosef Bacik spin_lock(&locked_ref->lock);
20520c555c97SJohannes Thumshirn btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
2053ae1e206bSJosef Bacik
2054a8fdc051SFilipe Manana ret = btrfs_run_delayed_refs_for_head(trans, locked_ref);
20550110a4c4SNikolay Borisov if (ret < 0 && ret != -EAGAIN) {
2056c1103f7aSJosef Bacik /*
20570110a4c4SNikolay Borisov * Error, btrfs_run_delayed_refs_for_head already
20580110a4c4SNikolay Borisov * unlocked everything so just bail out
2059c1103f7aSJosef Bacik */
20600110a4c4SNikolay Borisov return ret;
20610110a4c4SNikolay Borisov } else if (!ret) {
20620110a4c4SNikolay Borisov /*
20630110a4c4SNikolay Borisov * Success, perform the usual cleanup of a processed
20640110a4c4SNikolay Borisov * head
20650110a4c4SNikolay Borisov */
2066f9871eddSNikolay Borisov ret = cleanup_ref_head(trans, locked_ref);
2067194ab0bcSJosef Bacik if (ret > 0 ) {
2068b00e6250SJosef Bacik /* We dropped our lock, we need to loop. */
2069b00e6250SJosef Bacik ret = 0;
2070d7df2c79SJosef Bacik continue;
2071194ab0bcSJosef Bacik } else if (ret) {
2072194ab0bcSJosef Bacik return ret;
207356bec294SChris Mason }
20740110a4c4SNikolay Borisov }
20750110a4c4SNikolay Borisov
20760110a4c4SNikolay Borisov /*
20770110a4c4SNikolay Borisov * Either success case or btrfs_run_delayed_refs_for_head
20780110a4c4SNikolay Borisov * returned -EAGAIN, meaning we need to select another head
20790110a4c4SNikolay Borisov */
20800110a4c4SNikolay Borisov
2081c1103f7aSJosef Bacik locked_ref = NULL;
20821887be66SChris Mason cond_resched();
20830110a4c4SNikolay Borisov } while ((nr != -1 && count < nr) || locked_ref);
20840a2b2a84SJosef Bacik
2085d7df2c79SJosef Bacik return 0;
208656bec294SChris Mason }
208756bec294SChris Mason
2088709c0486SArne Jansen #ifdef SCRAMBLE_DELAYED_REFS
2089709c0486SArne Jansen /*
2090709c0486SArne Jansen * Normally delayed refs get processed in ascending bytenr order. This
2091709c0486SArne Jansen * correlates in most cases to the order added. To expose dependencies on this
2092709c0486SArne Jansen * order, we start to process the tree in the middle instead of the beginning
2093709c0486SArne Jansen */
find_middle(struct rb_root * root)2094709c0486SArne Jansen static u64 find_middle(struct rb_root *root)
2095709c0486SArne Jansen {
2096709c0486SArne Jansen struct rb_node *n = root->rb_node;
2097709c0486SArne Jansen struct btrfs_delayed_ref_node *entry;
2098709c0486SArne Jansen int alt = 1;
2099709c0486SArne Jansen u64 middle;
2100709c0486SArne Jansen u64 first = 0, last = 0;
2101709c0486SArne Jansen
2102709c0486SArne Jansen n = rb_first(root);
2103709c0486SArne Jansen if (n) {
2104709c0486SArne Jansen entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2105709c0486SArne Jansen first = entry->bytenr;
2106709c0486SArne Jansen }
2107709c0486SArne Jansen n = rb_last(root);
2108709c0486SArne Jansen if (n) {
2109709c0486SArne Jansen entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2110709c0486SArne Jansen last = entry->bytenr;
2111709c0486SArne Jansen }
2112709c0486SArne Jansen n = root->rb_node;
2113709c0486SArne Jansen
2114709c0486SArne Jansen while (n) {
2115709c0486SArne Jansen entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
2116709c0486SArne Jansen WARN_ON(!entry->in_tree);
2117709c0486SArne Jansen
2118709c0486SArne Jansen middle = entry->bytenr;
2119709c0486SArne Jansen
2120709c0486SArne Jansen if (alt)
2121709c0486SArne Jansen n = n->rb_left;
2122709c0486SArne Jansen else
2123709c0486SArne Jansen n = n->rb_right;
2124709c0486SArne Jansen
2125709c0486SArne Jansen alt = 1 - alt;
2126709c0486SArne Jansen }
2127709c0486SArne Jansen return middle;
2128709c0486SArne Jansen }
2129709c0486SArne Jansen #endif
2130709c0486SArne Jansen
21311262133bSJosef Bacik /*
2132c3e69d58SChris Mason * this starts processing the delayed reference count updates and
2133c3e69d58SChris Mason * extent insertions we have queued up so far. count can be
2134c3e69d58SChris Mason * 0, which means to process everything in the tree at the start
2135c3e69d58SChris Mason * of the run (but not newly added entries), or it can be some target
2136c3e69d58SChris Mason * number you'd like to process.
213779787eaaSJeff Mahoney *
213879787eaaSJeff Mahoney * Returns 0 on success or if called with an aborted transaction
213979787eaaSJeff Mahoney * Returns <0 on error and aborts the transaction
2140c3e69d58SChris Mason */
btrfs_run_delayed_refs(struct btrfs_trans_handle * trans,unsigned long count)2141c3e69d58SChris Mason int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2142c79a70b1SNikolay Borisov unsigned long count)
2143c3e69d58SChris Mason {
2144c79a70b1SNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
2145c3e69d58SChris Mason struct rb_node *node;
2146c3e69d58SChris Mason struct btrfs_delayed_ref_root *delayed_refs;
2147c46effa6SLiu Bo struct btrfs_delayed_ref_head *head;
2148c3e69d58SChris Mason int ret;
2149c3e69d58SChris Mason int run_all = count == (unsigned long)-1;
2150c3e69d58SChris Mason
215179787eaaSJeff Mahoney /* We'll clean this up in btrfs_cleanup_transaction */
2152bf31f87fSDavid Sterba if (TRANS_ABORTED(trans))
215379787eaaSJeff Mahoney return 0;
215479787eaaSJeff Mahoney
21550b246afaSJeff Mahoney if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
2156511711afSChris Mason return 0;
2157511711afSChris Mason
2158c3e69d58SChris Mason delayed_refs = &trans->transaction->delayed_refs;
215926455d33SLiu Bo if (count == 0)
216061a56a99SJosef Bacik count = delayed_refs->num_heads_ready;
2161bb721703SChris Mason
2162c3e69d58SChris Mason again:
2163709c0486SArne Jansen #ifdef SCRAMBLE_DELAYED_REFS
2164709c0486SArne Jansen delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
2165709c0486SArne Jansen #endif
21660a1e458aSNikolay Borisov ret = __btrfs_run_delayed_refs(trans, count);
216779787eaaSJeff Mahoney if (ret < 0) {
216866642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
216979787eaaSJeff Mahoney return ret;
217079787eaaSJeff Mahoney }
2171c3e69d58SChris Mason
2172c3e69d58SChris Mason if (run_all) {
21736c686b35SNikolay Borisov btrfs_create_pending_block_groups(trans);
2174ea658badSJosef Bacik
2175d7df2c79SJosef Bacik spin_lock(&delayed_refs->lock);
21765c9d028bSLiu Bo node = rb_first_cached(&delayed_refs->href_root);
2177d7df2c79SJosef Bacik if (!node) {
2178d7df2c79SJosef Bacik spin_unlock(&delayed_refs->lock);
2179c3e69d58SChris Mason goto out;
2180d7df2c79SJosef Bacik }
2181c46effa6SLiu Bo head = rb_entry(node, struct btrfs_delayed_ref_head,
2182c46effa6SLiu Bo href_node);
2183d278850eSJosef Bacik refcount_inc(&head->refs);
218456bec294SChris Mason spin_unlock(&delayed_refs->lock);
2185d278850eSJosef Bacik
2186d278850eSJosef Bacik /* Mutex was contended, block until it's released and retry. */
218756bec294SChris Mason mutex_lock(&head->mutex);
218856bec294SChris Mason mutex_unlock(&head->mutex);
218956bec294SChris Mason
2190d278850eSJosef Bacik btrfs_put_delayed_ref_head(head);
2191d7df2c79SJosef Bacik cond_resched();
219256bec294SChris Mason goto again;
219356bec294SChris Mason }
219454aa1f4dSChris Mason out:
2195a28ec197SChris Mason return 0;
2196a28ec197SChris Mason }
2197a28ec197SChris Mason
btrfs_set_disk_extent_flags(struct btrfs_trans_handle * trans,struct extent_buffer * eb,u64 flags)21985d4f98a2SYan Zheng int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
21994aec05faSJosef Bacik struct extent_buffer *eb, u64 flags)
22005d4f98a2SYan Zheng {
22015d4f98a2SYan Zheng struct btrfs_delayed_extent_op *extent_op;
22024aec05faSJosef Bacik int level = btrfs_header_level(eb);
22035d4f98a2SYan Zheng int ret;
22045d4f98a2SYan Zheng
220578a6184aSMiao Xie extent_op = btrfs_alloc_delayed_extent_op();
22065d4f98a2SYan Zheng if (!extent_op)
22075d4f98a2SYan Zheng return -ENOMEM;
22085d4f98a2SYan Zheng
22095d4f98a2SYan Zheng extent_op->flags_to_set = flags;
221035b3ad50SDavid Sterba extent_op->update_flags = true;
221135b3ad50SDavid Sterba extent_op->update_key = false;
2212b1c79e09SJosef Bacik extent_op->level = level;
22135d4f98a2SYan Zheng
221442c9d0b5SDavid Sterba ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
22155d4f98a2SYan Zheng if (ret)
221678a6184aSMiao Xie btrfs_free_delayed_extent_op(extent_op);
22175d4f98a2SYan Zheng return ret;
22185d4f98a2SYan Zheng }
22195d4f98a2SYan Zheng
check_delayed_ref(struct btrfs_root * root,struct btrfs_path * path,u64 objectid,u64 offset,u64 bytenr)2220e4c3b2dcSLiu Bo static noinline int check_delayed_ref(struct btrfs_root *root,
22215d4f98a2SYan Zheng struct btrfs_path *path,
22225d4f98a2SYan Zheng u64 objectid, u64 offset, u64 bytenr)
22235d4f98a2SYan Zheng {
22245d4f98a2SYan Zheng struct btrfs_delayed_ref_head *head;
22255d4f98a2SYan Zheng struct btrfs_delayed_ref_node *ref;
22265d4f98a2SYan Zheng struct btrfs_delayed_data_ref *data_ref;
22275d4f98a2SYan Zheng struct btrfs_delayed_ref_root *delayed_refs;
2228e4c3b2dcSLiu Bo struct btrfs_transaction *cur_trans;
22290e0adbcfSJosef Bacik struct rb_node *node;
22305d4f98a2SYan Zheng int ret = 0;
22315d4f98a2SYan Zheng
2232998ac6d2Sethanwu spin_lock(&root->fs_info->trans_lock);
2233e4c3b2dcSLiu Bo cur_trans = root->fs_info->running_transaction;
2234998ac6d2Sethanwu if (cur_trans)
2235998ac6d2Sethanwu refcount_inc(&cur_trans->use_count);
2236998ac6d2Sethanwu spin_unlock(&root->fs_info->trans_lock);
2237e4c3b2dcSLiu Bo if (!cur_trans)
2238e4c3b2dcSLiu Bo return 0;
2239e4c3b2dcSLiu Bo
2240e4c3b2dcSLiu Bo delayed_refs = &cur_trans->delayed_refs;
22415d4f98a2SYan Zheng spin_lock(&delayed_refs->lock);
2242f72ad18eSLiu Bo head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
2243d7df2c79SJosef Bacik if (!head) {
2244d7df2c79SJosef Bacik spin_unlock(&delayed_refs->lock);
2245998ac6d2Sethanwu btrfs_put_transaction(cur_trans);
2246d7df2c79SJosef Bacik return 0;
2247d7df2c79SJosef Bacik }
22485d4f98a2SYan Zheng
22495d4f98a2SYan Zheng if (!mutex_trylock(&head->mutex)) {
225026ce9114SJosef Bacik if (path->nowait) {
225126ce9114SJosef Bacik spin_unlock(&delayed_refs->lock);
225226ce9114SJosef Bacik btrfs_put_transaction(cur_trans);
225326ce9114SJosef Bacik return -EAGAIN;
225426ce9114SJosef Bacik }
225526ce9114SJosef Bacik
2256d278850eSJosef Bacik refcount_inc(&head->refs);
22575d4f98a2SYan Zheng spin_unlock(&delayed_refs->lock);
22585d4f98a2SYan Zheng
2259b3b4aa74SDavid Sterba btrfs_release_path(path);
22605d4f98a2SYan Zheng
22618cc33e5cSDavid Sterba /*
22628cc33e5cSDavid Sterba * Mutex was contended, block until it's released and let
22638cc33e5cSDavid Sterba * caller try again
22648cc33e5cSDavid Sterba */
22655d4f98a2SYan Zheng mutex_lock(&head->mutex);
22665d4f98a2SYan Zheng mutex_unlock(&head->mutex);
2267d278850eSJosef Bacik btrfs_put_delayed_ref_head(head);
2268998ac6d2Sethanwu btrfs_put_transaction(cur_trans);
22695d4f98a2SYan Zheng return -EAGAIN;
22705d4f98a2SYan Zheng }
2271d7df2c79SJosef Bacik spin_unlock(&delayed_refs->lock);
22725d4f98a2SYan Zheng
2273d7df2c79SJosef Bacik spin_lock(&head->lock);
22740e0adbcfSJosef Bacik /*
22750e0adbcfSJosef Bacik * XXX: We should replace this with a proper search function in the
22760e0adbcfSJosef Bacik * future.
22770e0adbcfSJosef Bacik */
2278e3d03965SLiu Bo for (node = rb_first_cached(&head->ref_tree); node;
2279e3d03965SLiu Bo node = rb_next(node)) {
22800e0adbcfSJosef Bacik ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
2281d7df2c79SJosef Bacik /* If it's a shared ref we know a cross reference exists */
2282d7df2c79SJosef Bacik if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
22835d4f98a2SYan Zheng ret = 1;
2284d7df2c79SJosef Bacik break;
2285d7df2c79SJosef Bacik }
22865d4f98a2SYan Zheng
22875d4f98a2SYan Zheng data_ref = btrfs_delayed_node_to_data_ref(ref);
22885d4f98a2SYan Zheng
2289d7df2c79SJosef Bacik /*
2290d7df2c79SJosef Bacik * If our ref doesn't match the one we're currently looking at
2291d7df2c79SJosef Bacik * then we have a cross reference.
2292d7df2c79SJosef Bacik */
22935d4f98a2SYan Zheng if (data_ref->root != root->root_key.objectid ||
2294d7df2c79SJosef Bacik data_ref->objectid != objectid ||
2295d7df2c79SJosef Bacik data_ref->offset != offset) {
2296d7df2c79SJosef Bacik ret = 1;
2297d7df2c79SJosef Bacik break;
2298d7df2c79SJosef Bacik }
2299d7df2c79SJosef Bacik }
2300d7df2c79SJosef Bacik spin_unlock(&head->lock);
23015d4f98a2SYan Zheng mutex_unlock(&head->mutex);
2302998ac6d2Sethanwu btrfs_put_transaction(cur_trans);
23035d4f98a2SYan Zheng return ret;
23045d4f98a2SYan Zheng }
23055d4f98a2SYan Zheng
check_committed_ref(struct btrfs_root * root,struct btrfs_path * path,u64 objectid,u64 offset,u64 bytenr,bool strict)2306e4c3b2dcSLiu Bo static noinline int check_committed_ref(struct btrfs_root *root,
23075d4f98a2SYan Zheng struct btrfs_path *path,
2308a84d5d42SBoris Burkov u64 objectid, u64 offset, u64 bytenr,
2309a84d5d42SBoris Burkov bool strict)
2310be20aa9dSChris Mason {
23110b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
231229cbcf40SJosef Bacik struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
2313f321e491SYan Zheng struct extent_buffer *leaf;
23145d4f98a2SYan Zheng struct btrfs_extent_data_ref *ref;
23155d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref;
23165d4f98a2SYan Zheng struct btrfs_extent_item *ei;
2317be20aa9dSChris Mason struct btrfs_key key;
23185d4f98a2SYan Zheng u32 item_size;
23193de28d57SLiu Bo int type;
2320f321e491SYan Zheng int ret;
2321be20aa9dSChris Mason
2322be20aa9dSChris Mason key.objectid = bytenr;
232331840ae1SZheng Yan key.offset = (u64)-1;
2324f321e491SYan Zheng key.type = BTRFS_EXTENT_ITEM_KEY;
2325be20aa9dSChris Mason
2326be20aa9dSChris Mason ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2327be20aa9dSChris Mason if (ret < 0)
2328be20aa9dSChris Mason goto out;
232979787eaaSJeff Mahoney BUG_ON(ret == 0); /* Corruption */
233080ff3856SYan Zheng
233180ff3856SYan Zheng ret = -ENOENT;
233280ff3856SYan Zheng if (path->slots[0] == 0)
233331840ae1SZheng Yan goto out;
2334be20aa9dSChris Mason
233531840ae1SZheng Yan path->slots[0]--;
2336f321e491SYan Zheng leaf = path->nodes[0];
23375d4f98a2SYan Zheng btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2338be20aa9dSChris Mason
23395d4f98a2SYan Zheng if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2340be20aa9dSChris Mason goto out;
2341be20aa9dSChris Mason
234280ff3856SYan Zheng ret = 1;
23433212fa14SJosef Bacik item_size = btrfs_item_size(leaf, path->slots[0]);
23445d4f98a2SYan Zheng ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
23455d4f98a2SYan Zheng
2346a6bd9cd1SNikolay Borisov /* If extent item has more than 1 inline ref then it's shared */
23475d4f98a2SYan Zheng if (item_size != sizeof(*ei) +
23485d4f98a2SYan Zheng btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
23495d4f98a2SYan Zheng goto out;
23505d4f98a2SYan Zheng
2351a84d5d42SBoris Burkov /*
2352a84d5d42SBoris Burkov * If extent created before last snapshot => it's shared unless the
2353a84d5d42SBoris Burkov * snapshot has been deleted. Use the heuristic if strict is false.
2354a84d5d42SBoris Burkov */
2355a84d5d42SBoris Burkov if (!strict &&
2356a84d5d42SBoris Burkov (btrfs_extent_generation(leaf, ei) <=
2357a84d5d42SBoris Burkov btrfs_root_last_snapshot(&root->root_item)))
23585d4f98a2SYan Zheng goto out;
23595d4f98a2SYan Zheng
23605d4f98a2SYan Zheng iref = (struct btrfs_extent_inline_ref *)(ei + 1);
23613de28d57SLiu Bo
2362a6bd9cd1SNikolay Borisov /* If this extent has SHARED_DATA_REF then it's shared */
23633de28d57SLiu Bo type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
23643de28d57SLiu Bo if (type != BTRFS_EXTENT_DATA_REF_KEY)
23655d4f98a2SYan Zheng goto out;
23665d4f98a2SYan Zheng
23675d4f98a2SYan Zheng ref = (struct btrfs_extent_data_ref *)(&iref->offset);
23685d4f98a2SYan Zheng if (btrfs_extent_refs(leaf, ei) !=
23695d4f98a2SYan Zheng btrfs_extent_data_ref_count(leaf, ref) ||
23705d4f98a2SYan Zheng btrfs_extent_data_ref_root(leaf, ref) !=
23715d4f98a2SYan Zheng root->root_key.objectid ||
23725d4f98a2SYan Zheng btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
23735d4f98a2SYan Zheng btrfs_extent_data_ref_offset(leaf, ref) != offset)
23745d4f98a2SYan Zheng goto out;
23755d4f98a2SYan Zheng
23765d4f98a2SYan Zheng ret = 0;
23775d4f98a2SYan Zheng out:
23785d4f98a2SYan Zheng return ret;
23795d4f98a2SYan Zheng }
23805d4f98a2SYan Zheng
btrfs_cross_ref_exist(struct btrfs_root * root,u64 objectid,u64 offset,u64 bytenr,bool strict,struct btrfs_path * path)2381e4c3b2dcSLiu Bo int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
23821a89f173SFilipe Manana u64 bytenr, bool strict, struct btrfs_path *path)
23835d4f98a2SYan Zheng {
23845d4f98a2SYan Zheng int ret;
23855d4f98a2SYan Zheng
23865d4f98a2SYan Zheng do {
2387e4c3b2dcSLiu Bo ret = check_committed_ref(root, path, objectid,
2388a84d5d42SBoris Burkov offset, bytenr, strict);
23895d4f98a2SYan Zheng if (ret && ret != -ENOENT)
23905d4f98a2SYan Zheng goto out;
23915d4f98a2SYan Zheng
2392380fd066SMisono Tomohiro ret = check_delayed_ref(root, path, objectid, offset, bytenr);
2393380fd066SMisono Tomohiro } while (ret == -EAGAIN);
23945d4f98a2SYan Zheng
2395be20aa9dSChris Mason out:
23961a89f173SFilipe Manana btrfs_release_path(path);
239737f00a6dSJohannes Thumshirn if (btrfs_is_data_reloc_root(root))
2398f0486c68SYan, Zheng WARN_ON(ret > 0);
2399f321e491SYan Zheng return ret;
2400f321e491SYan Zheng }
2401f321e491SYan Zheng
__btrfs_mod_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref,int inc)24025d4f98a2SYan Zheng static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2403b7a9f29fSChris Mason struct btrfs_root *root,
24045d4f98a2SYan Zheng struct extent_buffer *buf,
2405e339a6b0SJosef Bacik int full_backref, int inc)
240631840ae1SZheng Yan {
24070b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
240831840ae1SZheng Yan u64 bytenr;
24095d4f98a2SYan Zheng u64 num_bytes;
24105d4f98a2SYan Zheng u64 parent;
241131840ae1SZheng Yan u64 ref_root;
241231840ae1SZheng Yan u32 nritems;
241331840ae1SZheng Yan struct btrfs_key key;
241431840ae1SZheng Yan struct btrfs_file_extent_item *fi;
241582fa113fSQu Wenruo struct btrfs_ref generic_ref = { 0 };
241682fa113fSQu Wenruo bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
241731840ae1SZheng Yan int i;
241882fa113fSQu Wenruo int action;
241931840ae1SZheng Yan int level;
242031840ae1SZheng Yan int ret = 0;
2421fccb84c9SDavid Sterba
24220b246afaSJeff Mahoney if (btrfs_is_testing(fs_info))
2423faa2dbf0SJosef Bacik return 0;
2424fccb84c9SDavid Sterba
242531840ae1SZheng Yan ref_root = btrfs_header_owner(buf);
242631840ae1SZheng Yan nritems = btrfs_header_nritems(buf);
242731840ae1SZheng Yan level = btrfs_header_level(buf);
242831840ae1SZheng Yan
242992a7cc42SQu Wenruo if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
24305d4f98a2SYan Zheng return 0;
2431b7a9f29fSChris Mason
24325d4f98a2SYan Zheng if (full_backref)
24335d4f98a2SYan Zheng parent = buf->start;
24345d4f98a2SYan Zheng else
24355d4f98a2SYan Zheng parent = 0;
243682fa113fSQu Wenruo if (inc)
243782fa113fSQu Wenruo action = BTRFS_ADD_DELAYED_REF;
243882fa113fSQu Wenruo else
243982fa113fSQu Wenruo action = BTRFS_DROP_DELAYED_REF;
24405d4f98a2SYan Zheng
244131840ae1SZheng Yan for (i = 0; i < nritems; i++) {
2442db94535dSChris Mason if (level == 0) {
24435f39d397SChris Mason btrfs_item_key_to_cpu(buf, &key, i);
2444962a298fSDavid Sterba if (key.type != BTRFS_EXTENT_DATA_KEY)
244554aa1f4dSChris Mason continue;
24465f39d397SChris Mason fi = btrfs_item_ptr(buf, i,
244754aa1f4dSChris Mason struct btrfs_file_extent_item);
24485f39d397SChris Mason if (btrfs_file_extent_type(buf, fi) ==
244954aa1f4dSChris Mason BTRFS_FILE_EXTENT_INLINE)
245054aa1f4dSChris Mason continue;
245131840ae1SZheng Yan bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
245231840ae1SZheng Yan if (bytenr == 0)
245354aa1f4dSChris Mason continue;
245431840ae1SZheng Yan
24555d4f98a2SYan Zheng num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
24565d4f98a2SYan Zheng key.offset -= btrfs_file_extent_offset(buf, fi);
245782fa113fSQu Wenruo btrfs_init_generic_ref(&generic_ref, action, bytenr,
245882fa113fSQu Wenruo num_bytes, parent);
245982fa113fSQu Wenruo btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
2460f42c5da6SNikolay Borisov key.offset, root->root_key.objectid,
2461f42c5da6SNikolay Borisov for_reloc);
2462dd28b6a5SQu Wenruo if (inc)
246382fa113fSQu Wenruo ret = btrfs_inc_extent_ref(trans, &generic_ref);
2464dd28b6a5SQu Wenruo else
2465ffd4bb2aSQu Wenruo ret = btrfs_free_extent(trans, &generic_ref);
24665d4f98a2SYan Zheng if (ret)
24675d4f98a2SYan Zheng goto fail;
2468b7a9f29fSChris Mason } else {
2469b7a9f29fSChris Mason bytenr = btrfs_node_blockptr(buf, i);
24700b246afaSJeff Mahoney num_bytes = fs_info->nodesize;
247182fa113fSQu Wenruo btrfs_init_generic_ref(&generic_ref, action, bytenr,
247282fa113fSQu Wenruo num_bytes, parent);
2473f42c5da6SNikolay Borisov btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
2474f42c5da6SNikolay Borisov root->root_key.objectid, for_reloc);
2475dd28b6a5SQu Wenruo if (inc)
247682fa113fSQu Wenruo ret = btrfs_inc_extent_ref(trans, &generic_ref);
2477dd28b6a5SQu Wenruo else
2478ffd4bb2aSQu Wenruo ret = btrfs_free_extent(trans, &generic_ref);
24795d4f98a2SYan Zheng if (ret)
248031840ae1SZheng Yan goto fail;
248131840ae1SZheng Yan }
248231840ae1SZheng Yan }
248331840ae1SZheng Yan return 0;
248431840ae1SZheng Yan fail:
248554aa1f4dSChris Mason return ret;
248602217ed2SChris Mason }
248702217ed2SChris Mason
btrfs_inc_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref)24885d4f98a2SYan Zheng int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2489e339a6b0SJosef Bacik struct extent_buffer *buf, int full_backref)
249031840ae1SZheng Yan {
2491e339a6b0SJosef Bacik return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
249231840ae1SZheng Yan }
249331840ae1SZheng Yan
btrfs_dec_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref)24945d4f98a2SYan Zheng int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2495e339a6b0SJosef Bacik struct extent_buffer *buf, int full_backref)
24965d4f98a2SYan Zheng {
2497e339a6b0SJosef Bacik return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
249831840ae1SZheng Yan }
249931840ae1SZheng Yan
get_alloc_profile_by_root(struct btrfs_root * root,int data)25001b86826dSJeff Mahoney static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
2501b742bb82SYan, Zheng {
25020b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
2503b742bb82SYan, Zheng u64 flags;
250453b381b3SDavid Woodhouse u64 ret;
2505b742bb82SYan, Zheng
2506b742bb82SYan, Zheng if (data)
2507b742bb82SYan, Zheng flags = BTRFS_BLOCK_GROUP_DATA;
25080b246afaSJeff Mahoney else if (root == fs_info->chunk_root)
2509b742bb82SYan, Zheng flags = BTRFS_BLOCK_GROUP_SYSTEM;
2510b742bb82SYan, Zheng else
2511b742bb82SYan, Zheng flags = BTRFS_BLOCK_GROUP_METADATA;
2512b742bb82SYan, Zheng
2513878d7b67SJosef Bacik ret = btrfs_get_alloc_profile(fs_info, flags);
251453b381b3SDavid Woodhouse return ret;
25156a63209fSJosef Bacik }
25166a63209fSJosef Bacik
first_logical_byte(struct btrfs_fs_info * fs_info)25170eb997bfSFilipe Manana static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
2518a061fc8dSChris Mason {
251908dddb29SFilipe Manana struct rb_node *leftmost;
252008dddb29SFilipe Manana u64 bytenr = 0;
25210f9dd46cSJosef Bacik
252216b0c258SFilipe Manana read_lock(&fs_info->block_group_cache_lock);
25230eb997bfSFilipe Manana /* Get the block group with the lowest logical start address. */
252408dddb29SFilipe Manana leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
252508dddb29SFilipe Manana if (leftmost) {
252608dddb29SFilipe Manana struct btrfs_block_group *bg;
2527a1897fddSLiu Bo
252808dddb29SFilipe Manana bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
252908dddb29SFilipe Manana bytenr = bg->start;
253008dddb29SFilipe Manana }
253116b0c258SFilipe Manana read_unlock(&fs_info->block_group_cache_lock);
2532d2fb3437SYan Zheng
2533d2fb3437SYan Zheng return bytenr;
2534a061fc8dSChris Mason }
2535a061fc8dSChris Mason
pin_down_extent(struct btrfs_trans_handle * trans,struct btrfs_block_group * cache,u64 bytenr,u64 num_bytes,int reserved)25366690d071SNikolay Borisov static int pin_down_extent(struct btrfs_trans_handle *trans,
25376690d071SNikolay Borisov struct btrfs_block_group *cache,
253811833d66SYan Zheng u64 bytenr, u64 num_bytes, int reserved)
2539324ae4dfSYan {
2540fdf08605SDavid Sterba struct btrfs_fs_info *fs_info = cache->fs_info;
2541fdf08605SDavid Sterba
254225179201SJosef Bacik spin_lock(&cache->space_info->lock);
2543c286ac48SChris Mason spin_lock(&cache->lock);
254411833d66SYan Zheng cache->pinned += num_bytes;
2545bb96c4e5SJosef Bacik btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
2546bb96c4e5SJosef Bacik num_bytes);
254711833d66SYan Zheng if (reserved) {
254811833d66SYan Zheng cache->reserved -= num_bytes;
254911833d66SYan Zheng cache->space_info->bytes_reserved -= num_bytes;
255068b38550SJosef Bacik }
2551c286ac48SChris Mason spin_unlock(&cache->lock);
255225179201SJosef Bacik spin_unlock(&cache->space_info->lock);
255368b38550SJosef Bacik
2554fe1a598cSDavid Sterba set_extent_bit(&trans->transaction->pinned_extents, bytenr,
25551d126800SDavid Sterba bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
2556324ae4dfSYan return 0;
2557324ae4dfSYan }
25589078a3e1SChris Mason
btrfs_pin_extent(struct btrfs_trans_handle * trans,u64 bytenr,u64 num_bytes,int reserved)2559b25c36f8SNikolay Borisov int btrfs_pin_extent(struct btrfs_trans_handle *trans,
2560f0486c68SYan, Zheng u64 bytenr, u64 num_bytes, int reserved)
2561e8569813SZheng Yan {
256232da5386SDavid Sterba struct btrfs_block_group *cache;
2563f0486c68SYan, Zheng
2564b25c36f8SNikolay Borisov cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
256579787eaaSJeff Mahoney BUG_ON(!cache); /* Logic error */
2566f0486c68SYan, Zheng
25676690d071SNikolay Borisov pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
2568f0486c68SYan, Zheng
2569f0486c68SYan, Zheng btrfs_put_block_group(cache);
2570f0486c68SYan, Zheng return 0;
2571f0486c68SYan, Zheng }
2572f0486c68SYan, Zheng
2573f0486c68SYan, Zheng /*
2574e688b725SChris Mason * this function must be called within transaction
2575f0486c68SYan, Zheng */
btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle * trans,u64 bytenr,u64 num_bytes)25769fce5704SNikolay Borisov int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2577e688b725SChris Mason u64 bytenr, u64 num_bytes)
2578f0486c68SYan, Zheng {
257932da5386SDavid Sterba struct btrfs_block_group *cache;
2580b50c6e25SJosef Bacik int ret;
2581e688b725SChris Mason
25829fce5704SNikolay Borisov cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
2583b50c6e25SJosef Bacik if (!cache)
2584b50c6e25SJosef Bacik return -EINVAL;
2585e688b725SChris Mason
2586e688b725SChris Mason /*
2587ced8ecf0SOmar Sandoval * Fully cache the free space first so that our pin removes the free space
2588ced8ecf0SOmar Sandoval * from the cache.
2589e688b725SChris Mason */
2590ced8ecf0SOmar Sandoval ret = btrfs_cache_block_group(cache, true);
25919ad6d91fSFilipe Manana if (ret)
25929ad6d91fSFilipe Manana goto out;
2593e688b725SChris Mason
25946690d071SNikolay Borisov pin_down_extent(trans, cache, bytenr, num_bytes, 0);
2595e688b725SChris Mason
2596e688b725SChris Mason /* remove us from the free space cache (if we're there at all) */
2597b50c6e25SJosef Bacik ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
25989ad6d91fSFilipe Manana out:
2599e688b725SChris Mason btrfs_put_block_group(cache);
2600b50c6e25SJosef Bacik return ret;
2601e688b725SChris Mason }
2602e688b725SChris Mason
__exclude_logged_extent(struct btrfs_fs_info * fs_info,u64 start,u64 num_bytes)26032ff7e61eSJeff Mahoney static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
26042ff7e61eSJeff Mahoney u64 start, u64 num_bytes)
26058c2a1a30SJosef Bacik {
26068c2a1a30SJosef Bacik int ret;
260732da5386SDavid Sterba struct btrfs_block_group *block_group;
26088c2a1a30SJosef Bacik
26090b246afaSJeff Mahoney block_group = btrfs_lookup_block_group(fs_info, start);
26108c2a1a30SJosef Bacik if (!block_group)
26118c2a1a30SJosef Bacik return -EINVAL;
26128c2a1a30SJosef Bacik
2613ced8ecf0SOmar Sandoval ret = btrfs_cache_block_group(block_group, true);
26148c2a1a30SJosef Bacik if (ret)
26159ad6d91fSFilipe Manana goto out;
26168c2a1a30SJosef Bacik
26179ad6d91fSFilipe Manana ret = btrfs_remove_free_space(block_group, start, num_bytes);
26189ad6d91fSFilipe Manana out:
26198c2a1a30SJosef Bacik btrfs_put_block_group(block_group);
26208c2a1a30SJosef Bacik return ret;
26218c2a1a30SJosef Bacik }
26228c2a1a30SJosef Bacik
btrfs_exclude_logged_extents(struct extent_buffer * eb)2623bcdc428cSDavid Sterba int btrfs_exclude_logged_extents(struct extent_buffer *eb)
26248c2a1a30SJosef Bacik {
2625bcdc428cSDavid Sterba struct btrfs_fs_info *fs_info = eb->fs_info;
26268c2a1a30SJosef Bacik struct btrfs_file_extent_item *item;
26278c2a1a30SJosef Bacik struct btrfs_key key;
26288c2a1a30SJosef Bacik int found_type;
26298c2a1a30SJosef Bacik int i;
2630b89311efSGu Jinxiang int ret = 0;
26318c2a1a30SJosef Bacik
26322ff7e61eSJeff Mahoney if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
26338c2a1a30SJosef Bacik return 0;
26348c2a1a30SJosef Bacik
26358c2a1a30SJosef Bacik for (i = 0; i < btrfs_header_nritems(eb); i++) {
26368c2a1a30SJosef Bacik btrfs_item_key_to_cpu(eb, &key, i);
26378c2a1a30SJosef Bacik if (key.type != BTRFS_EXTENT_DATA_KEY)
26388c2a1a30SJosef Bacik continue;
26398c2a1a30SJosef Bacik item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
26408c2a1a30SJosef Bacik found_type = btrfs_file_extent_type(eb, item);
26418c2a1a30SJosef Bacik if (found_type == BTRFS_FILE_EXTENT_INLINE)
26428c2a1a30SJosef Bacik continue;
26438c2a1a30SJosef Bacik if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
26448c2a1a30SJosef Bacik continue;
26458c2a1a30SJosef Bacik key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
26468c2a1a30SJosef Bacik key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
2647b89311efSGu Jinxiang ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
2648b89311efSGu Jinxiang if (ret)
2649b89311efSGu Jinxiang break;
26508c2a1a30SJosef Bacik }
26518c2a1a30SJosef Bacik
2652b89311efSGu Jinxiang return ret;
26538c2a1a30SJosef Bacik }
26548c2a1a30SJosef Bacik
26559cfa3e34SFilipe Manana static void
btrfs_inc_block_group_reservations(struct btrfs_block_group * bg)265632da5386SDavid Sterba btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
26579cfa3e34SFilipe Manana {
26589cfa3e34SFilipe Manana atomic_inc(&bg->reservations);
26599cfa3e34SFilipe Manana }
26609cfa3e34SFilipe Manana
2661c759c4e1SJosef Bacik /*
2662c759c4e1SJosef Bacik * Returns the free cluster for the given space info and sets empty_cluster to
2663c759c4e1SJosef Bacik * what it should be based on the mount options.
2664c759c4e1SJosef Bacik */
2665c759c4e1SJosef Bacik static struct btrfs_free_cluster *
fetch_cluster_info(struct btrfs_fs_info * fs_info,struct btrfs_space_info * space_info,u64 * empty_cluster)26662ff7e61eSJeff Mahoney fetch_cluster_info(struct btrfs_fs_info *fs_info,
26672ff7e61eSJeff Mahoney struct btrfs_space_info *space_info, u64 *empty_cluster)
2668c759c4e1SJosef Bacik {
2669c759c4e1SJosef Bacik struct btrfs_free_cluster *ret = NULL;
2670c759c4e1SJosef Bacik
2671c759c4e1SJosef Bacik *empty_cluster = 0;
2672c759c4e1SJosef Bacik if (btrfs_mixed_space_info(space_info))
2673c759c4e1SJosef Bacik return ret;
2674c759c4e1SJosef Bacik
2675c759c4e1SJosef Bacik if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
26760b246afaSJeff Mahoney ret = &fs_info->meta_alloc_cluster;
2677583b7231SHans van Kranenburg if (btrfs_test_opt(fs_info, SSD))
2678583b7231SHans van Kranenburg *empty_cluster = SZ_2M;
2679583b7231SHans van Kranenburg else
2680ee22184bSByongho Lee *empty_cluster = SZ_64K;
2681583b7231SHans van Kranenburg } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
2682583b7231SHans van Kranenburg btrfs_test_opt(fs_info, SSD_SPREAD)) {
2683583b7231SHans van Kranenburg *empty_cluster = SZ_2M;
26840b246afaSJeff Mahoney ret = &fs_info->data_alloc_cluster;
2685c759c4e1SJosef Bacik }
2686c759c4e1SJosef Bacik
2687c759c4e1SJosef Bacik return ret;
2688c759c4e1SJosef Bacik }
2689c759c4e1SJosef Bacik
unpin_extent_range(struct btrfs_fs_info * fs_info,u64 start,u64 end,const bool return_free_space)26902ff7e61eSJeff Mahoney static int unpin_extent_range(struct btrfs_fs_info *fs_info,
26912ff7e61eSJeff Mahoney u64 start, u64 end,
2692678886bdSFilipe Manana const bool return_free_space)
269311833d66SYan Zheng {
269432da5386SDavid Sterba struct btrfs_block_group *cache = NULL;
26957b398f8eSJosef Bacik struct btrfs_space_info *space_info;
26967b398f8eSJosef Bacik struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
2697c759c4e1SJosef Bacik struct btrfs_free_cluster *cluster = NULL;
269811833d66SYan Zheng u64 len;
2699c759c4e1SJosef Bacik u64 total_unpinned = 0;
2700c759c4e1SJosef Bacik u64 empty_cluster = 0;
27017b398f8eSJosef Bacik bool readonly;
270211833d66SYan Zheng
270311833d66SYan Zheng while (start <= end) {
27047b398f8eSJosef Bacik readonly = false;
270511833d66SYan Zheng if (!cache ||
2706b3470b5dSDavid Sterba start >= cache->start + cache->length) {
270711833d66SYan Zheng if (cache)
270811833d66SYan Zheng btrfs_put_block_group(cache);
2709c759c4e1SJosef Bacik total_unpinned = 0;
271011833d66SYan Zheng cache = btrfs_lookup_block_group(fs_info, start);
271179787eaaSJeff Mahoney BUG_ON(!cache); /* Logic error */
2712c759c4e1SJosef Bacik
27132ff7e61eSJeff Mahoney cluster = fetch_cluster_info(fs_info,
2714c759c4e1SJosef Bacik cache->space_info,
2715c759c4e1SJosef Bacik &empty_cluster);
2716c759c4e1SJosef Bacik empty_cluster <<= 1;
271711833d66SYan Zheng }
271811833d66SYan Zheng
2719b3470b5dSDavid Sterba len = cache->start + cache->length - start;
272011833d66SYan Zheng len = min(len, end + 1 - start);
272111833d66SYan Zheng
272248ff7083SOmar Sandoval if (return_free_space)
272348ff7083SOmar Sandoval btrfs_add_free_space(cache, start, len);
272411833d66SYan Zheng
2725f0486c68SYan, Zheng start += len;
2726c759c4e1SJosef Bacik total_unpinned += len;
27277b398f8eSJosef Bacik space_info = cache->space_info;
2728f0486c68SYan, Zheng
2729c759c4e1SJosef Bacik /*
2730c759c4e1SJosef Bacik * If this space cluster has been marked as fragmented and we've
2731c759c4e1SJosef Bacik * unpinned enough in this block group to potentially allow a
2732c759c4e1SJosef Bacik * cluster to be created inside of it go ahead and clear the
2733c759c4e1SJosef Bacik * fragmented check.
2734c759c4e1SJosef Bacik */
2735c759c4e1SJosef Bacik if (cluster && cluster->fragmented &&
2736c759c4e1SJosef Bacik total_unpinned > empty_cluster) {
2737c759c4e1SJosef Bacik spin_lock(&cluster->lock);
2738c759c4e1SJosef Bacik cluster->fragmented = 0;
2739c759c4e1SJosef Bacik spin_unlock(&cluster->lock);
2740c759c4e1SJosef Bacik }
2741c759c4e1SJosef Bacik
27427b398f8eSJosef Bacik spin_lock(&space_info->lock);
274311833d66SYan Zheng spin_lock(&cache->lock);
274411833d66SYan Zheng cache->pinned -= len;
2745bb96c4e5SJosef Bacik btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
27464f4db217SJosef Bacik space_info->max_extent_size = 0;
27477b398f8eSJosef Bacik if (cache->ro) {
27487b398f8eSJosef Bacik space_info->bytes_readonly += len;
27497b398f8eSJosef Bacik readonly = true;
2750169e0da9SNaohiro Aota } else if (btrfs_is_zoned(fs_info)) {
2751169e0da9SNaohiro Aota /* Need reset before reusing in a zoned block group */
2752169e0da9SNaohiro Aota space_info->bytes_zone_unusable += len;
2753169e0da9SNaohiro Aota readonly = true;
27547b398f8eSJosef Bacik }
275511833d66SYan Zheng spin_unlock(&cache->lock);
2756957780ebSJosef Bacik if (!readonly && return_free_space &&
2757957780ebSJosef Bacik global_rsv->space_info == space_info) {
27587b398f8eSJosef Bacik spin_lock(&global_rsv->lock);
27597b398f8eSJosef Bacik if (!global_rsv->full) {
2760c4bf1909SJiapeng Chong u64 to_add = min(len, global_rsv->size -
27617b398f8eSJosef Bacik global_rsv->reserved);
2762c4bf1909SJiapeng Chong
2763957780ebSJosef Bacik global_rsv->reserved += to_add;
2764bb96c4e5SJosef Bacik btrfs_space_info_update_bytes_may_use(fs_info,
2765bb96c4e5SJosef Bacik space_info, to_add);
27667b398f8eSJosef Bacik if (global_rsv->reserved >= global_rsv->size)
27677b398f8eSJosef Bacik global_rsv->full = 1;
2768957780ebSJosef Bacik len -= to_add;
27697b398f8eSJosef Bacik }
27707b398f8eSJosef Bacik spin_unlock(&global_rsv->lock);
27717b398f8eSJosef Bacik }
27722732798cSJosef Bacik /* Add to any tickets we may have */
27732732798cSJosef Bacik if (!readonly && return_free_space && len)
27742732798cSJosef Bacik btrfs_try_granting_tickets(fs_info, space_info);
27757b398f8eSJosef Bacik spin_unlock(&space_info->lock);
277611833d66SYan Zheng }
277711833d66SYan Zheng
277811833d66SYan Zheng if (cache)
277911833d66SYan Zheng btrfs_put_block_group(cache);
2780ccd467d6SChris Mason return 0;
2781ccd467d6SChris Mason }
2782ccd467d6SChris Mason
btrfs_finish_extent_commit(struct btrfs_trans_handle * trans)27835ead2dd0SNikolay Borisov int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
2784a28ec197SChris Mason {
27855ead2dd0SNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
278632da5386SDavid Sterba struct btrfs_block_group *block_group, *tmp;
2787e33e17eeSJeff Mahoney struct list_head *deleted_bgs;
278811833d66SYan Zheng struct extent_io_tree *unpin;
27891a5bc167SChris Mason u64 start;
27901a5bc167SChris Mason u64 end;
2791a28ec197SChris Mason int ret;
2792a28ec197SChris Mason
2793fe119a6eSNikolay Borisov unpin = &trans->transaction->pinned_extents;
279411833d66SYan Zheng
2795bf31f87fSDavid Sterba while (!TRANS_ABORTED(trans)) {
27960e6ec385SFilipe Manana struct extent_state *cached_state = NULL;
27970e6ec385SFilipe Manana
2798d4b450cdSFilipe Manana mutex_lock(&fs_info->unused_bg_unpin_mutex);
2799e5860f82SFilipe Manana if (!find_first_extent_bit(unpin, 0, &start, &end,
2800e5860f82SFilipe Manana EXTENT_DIRTY, &cached_state)) {
2801d4b450cdSFilipe Manana mutex_unlock(&fs_info->unused_bg_unpin_mutex);
2802a28ec197SChris Mason break;
2803d4b450cdSFilipe Manana }
28041f3c79a2SLiu Hui
280546b27f50SDennis Zhou if (btrfs_test_opt(fs_info, DISCARD_SYNC))
28062ff7e61eSJeff Mahoney ret = btrfs_discard_extent(fs_info, start,
28075378e607SLi Dongyang end + 1 - start, NULL);
28081f3c79a2SLiu Hui
28090e6ec385SFilipe Manana clear_extent_dirty(unpin, start, end, &cached_state);
28102ff7e61eSJeff Mahoney unpin_extent_range(fs_info, start, end, true);
2811d4b450cdSFilipe Manana mutex_unlock(&fs_info->unused_bg_unpin_mutex);
28120e6ec385SFilipe Manana free_extent_state(cached_state);
2813c286ac48SChris Mason cond_resched();
28140579da42SChris Mason }
2815817d52f8SJosef Bacik
2816a2309300SDennis Zhou if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
2817a2309300SDennis Zhou btrfs_discard_calc_delay(&fs_info->discard_ctl);
2818b0643e59SDennis Zhou btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
2819a2309300SDennis Zhou }
2820b0643e59SDennis Zhou
2821e33e17eeSJeff Mahoney /*
2822e33e17eeSJeff Mahoney * Transaction is finished. We don't need the lock anymore. We
2823e33e17eeSJeff Mahoney * do need to clean up the block groups in case of a transaction
2824e33e17eeSJeff Mahoney * abort.
2825e33e17eeSJeff Mahoney */
2826e33e17eeSJeff Mahoney deleted_bgs = &trans->transaction->deleted_bgs;
2827e33e17eeSJeff Mahoney list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
2828e33e17eeSJeff Mahoney u64 trimmed = 0;
2829e33e17eeSJeff Mahoney
2830e33e17eeSJeff Mahoney ret = -EROFS;
2831bf31f87fSDavid Sterba if (!TRANS_ABORTED(trans))
28322ff7e61eSJeff Mahoney ret = btrfs_discard_extent(fs_info,
2833b3470b5dSDavid Sterba block_group->start,
2834b3470b5dSDavid Sterba block_group->length,
2835e33e17eeSJeff Mahoney &trimmed);
2836e33e17eeSJeff Mahoney
2837e33e17eeSJeff Mahoney list_del_init(&block_group->bg_list);
28386b7304afSFilipe Manana btrfs_unfreeze_block_group(block_group);
2839e33e17eeSJeff Mahoney btrfs_put_block_group(block_group);
2840e33e17eeSJeff Mahoney
2841e33e17eeSJeff Mahoney if (ret) {
2842e33e17eeSJeff Mahoney const char *errstr = btrfs_decode_error(ret);
2843e33e17eeSJeff Mahoney btrfs_warn(fs_info,
2844913e1535SDavid Sterba "discard failed while removing blockgroup: errno=%d %s",
2845e33e17eeSJeff Mahoney ret, errstr);
2846e33e17eeSJeff Mahoney }
2847e33e17eeSJeff Mahoney }
2848e33e17eeSJeff Mahoney
2849e20d96d6SChris Mason return 0;
2850e20d96d6SChris Mason }
2851e20d96d6SChris Mason
do_free_extent_accounting(struct btrfs_trans_handle * trans,u64 bytenr,u64 num_bytes,bool is_data)28528f8aa4c7SJosef Bacik static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
28538f8aa4c7SJosef Bacik u64 bytenr, u64 num_bytes, bool is_data)
28548f8aa4c7SJosef Bacik {
28558f8aa4c7SJosef Bacik int ret;
28568f8aa4c7SJosef Bacik
28578f8aa4c7SJosef Bacik if (is_data) {
28588f8aa4c7SJosef Bacik struct btrfs_root *csum_root;
28598f8aa4c7SJosef Bacik
28608f8aa4c7SJosef Bacik csum_root = btrfs_csum_root(trans->fs_info, bytenr);
28618f8aa4c7SJosef Bacik ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
28628f8aa4c7SJosef Bacik if (ret) {
28638f8aa4c7SJosef Bacik btrfs_abort_transaction(trans, ret);
28648f8aa4c7SJosef Bacik return ret;
28658f8aa4c7SJosef Bacik }
28668f8aa4c7SJosef Bacik }
28678f8aa4c7SJosef Bacik
28688f8aa4c7SJosef Bacik ret = add_to_free_space_tree(trans, bytenr, num_bytes);
28698f8aa4c7SJosef Bacik if (ret) {
28708f8aa4c7SJosef Bacik btrfs_abort_transaction(trans, ret);
28718f8aa4c7SJosef Bacik return ret;
28728f8aa4c7SJosef Bacik }
28738f8aa4c7SJosef Bacik
28748f8aa4c7SJosef Bacik ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
28758f8aa4c7SJosef Bacik if (ret)
28768f8aa4c7SJosef Bacik btrfs_abort_transaction(trans, ret);
28778f8aa4c7SJosef Bacik
28788f8aa4c7SJosef Bacik return ret;
28798f8aa4c7SJosef Bacik }
28808f8aa4c7SJosef Bacik
2881eee3b811SQu Wenruo #define abort_and_dump(trans, path, fmt, args...) \
2882eee3b811SQu Wenruo ({ \
2883eee3b811SQu Wenruo btrfs_abort_transaction(trans, -EUCLEAN); \
2884eee3b811SQu Wenruo btrfs_print_leaf(path->nodes[0]); \
2885eee3b811SQu Wenruo btrfs_crit(trans->fs_info, fmt, ##args); \
2886eee3b811SQu Wenruo })
2887eee3b811SQu Wenruo
28881c2a07f5SQu Wenruo /*
28891c2a07f5SQu Wenruo * Drop one or more refs of @node.
28901c2a07f5SQu Wenruo *
28911c2a07f5SQu Wenruo * 1. Locate the extent refs.
28921c2a07f5SQu Wenruo * It's either inline in EXTENT/METADATA_ITEM or in keyed SHARED_* item.
28931c2a07f5SQu Wenruo * Locate it, then reduce the refs number or remove the ref line completely.
28941c2a07f5SQu Wenruo *
28951c2a07f5SQu Wenruo * 2. Update the refs count in EXTENT/METADATA_ITEM
28961c2a07f5SQu Wenruo *
28971c2a07f5SQu Wenruo * Inline backref case:
28981c2a07f5SQu Wenruo *
28991c2a07f5SQu Wenruo * in extent tree we have:
29001c2a07f5SQu Wenruo *
29011c2a07f5SQu Wenruo * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
29021c2a07f5SQu Wenruo * refs 2 gen 6 flags DATA
29031c2a07f5SQu Wenruo * extent data backref root FS_TREE objectid 258 offset 0 count 1
29041c2a07f5SQu Wenruo * extent data backref root FS_TREE objectid 257 offset 0 count 1
29051c2a07f5SQu Wenruo *
29061c2a07f5SQu Wenruo * This function gets called with:
29071c2a07f5SQu Wenruo *
29081c2a07f5SQu Wenruo * node->bytenr = 13631488
29091c2a07f5SQu Wenruo * node->num_bytes = 1048576
29101c2a07f5SQu Wenruo * root_objectid = FS_TREE
29111c2a07f5SQu Wenruo * owner_objectid = 257
29121c2a07f5SQu Wenruo * owner_offset = 0
29131c2a07f5SQu Wenruo * refs_to_drop = 1
29141c2a07f5SQu Wenruo *
29151c2a07f5SQu Wenruo * Then we should get some like:
29161c2a07f5SQu Wenruo *
29171c2a07f5SQu Wenruo * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
29181c2a07f5SQu Wenruo * refs 1 gen 6 flags DATA
29191c2a07f5SQu Wenruo * extent data backref root FS_TREE objectid 258 offset 0 count 1
29201c2a07f5SQu Wenruo *
29211c2a07f5SQu Wenruo * Keyed backref case:
29221c2a07f5SQu Wenruo *
29231c2a07f5SQu Wenruo * in extent tree we have:
29241c2a07f5SQu Wenruo *
29251c2a07f5SQu Wenruo * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
29261c2a07f5SQu Wenruo * refs 754 gen 6 flags DATA
29271c2a07f5SQu Wenruo * [...]
29281c2a07f5SQu Wenruo * item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28
29291c2a07f5SQu Wenruo * extent data backref root FS_TREE objectid 866 offset 0 count 1
29301c2a07f5SQu Wenruo *
29311c2a07f5SQu Wenruo * This function get called with:
29321c2a07f5SQu Wenruo *
29331c2a07f5SQu Wenruo * node->bytenr = 13631488
29341c2a07f5SQu Wenruo * node->num_bytes = 1048576
29351c2a07f5SQu Wenruo * root_objectid = FS_TREE
29361c2a07f5SQu Wenruo * owner_objectid = 866
29371c2a07f5SQu Wenruo * owner_offset = 0
29381c2a07f5SQu Wenruo * refs_to_drop = 1
29391c2a07f5SQu Wenruo *
29401c2a07f5SQu Wenruo * Then we should get some like:
29411c2a07f5SQu Wenruo *
29421c2a07f5SQu Wenruo * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
29431c2a07f5SQu Wenruo * refs 753 gen 6 flags DATA
29441c2a07f5SQu Wenruo *
29451c2a07f5SQu Wenruo * And that (13631488 EXTENT_DATA_REF <HASH>) gets removed.
29461c2a07f5SQu Wenruo */
__btrfs_free_extent(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,u64 parent,u64 root_objectid,u64 owner_objectid,u64 owner_offset,int refs_to_drop,struct btrfs_delayed_extent_op * extent_op)29475d4f98a2SYan Zheng static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2948c682f9b3SQu Wenruo struct btrfs_delayed_ref_node *node, u64 parent,
29495d4f98a2SYan Zheng u64 root_objectid, u64 owner_objectid,
29505d4f98a2SYan Zheng u64 owner_offset, int refs_to_drop,
2951c682f9b3SQu Wenruo struct btrfs_delayed_extent_op *extent_op)
2952a28ec197SChris Mason {
2953e72cb923SNikolay Borisov struct btrfs_fs_info *info = trans->fs_info;
2954e2fa7227SChris Mason struct btrfs_key key;
29555d4f98a2SYan Zheng struct btrfs_path *path;
295629cbcf40SJosef Bacik struct btrfs_root *extent_root;
29575f39d397SChris Mason struct extent_buffer *leaf;
29585d4f98a2SYan Zheng struct btrfs_extent_item *ei;
29595d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref;
2960a28ec197SChris Mason int ret;
29615d4f98a2SYan Zheng int is_data;
2962952fccacSChris Mason int extent_slot = 0;
2963952fccacSChris Mason int found_extent = 0;
2964952fccacSChris Mason int num_to_del = 1;
29655d4f98a2SYan Zheng u32 item_size;
29665d4f98a2SYan Zheng u64 refs;
2967c682f9b3SQu Wenruo u64 bytenr = node->bytenr;
2968c682f9b3SQu Wenruo u64 num_bytes = node->num_bytes;
29690b246afaSJeff Mahoney bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
2970037e6390SChris Mason
297129cbcf40SJosef Bacik extent_root = btrfs_extent_root(info, bytenr);
2972abed4aaaSJosef Bacik ASSERT(extent_root);
297329cbcf40SJosef Bacik
29745caf2a00SChris Mason path = btrfs_alloc_path();
297554aa1f4dSChris Mason if (!path)
297654aa1f4dSChris Mason return -ENOMEM;
297754aa1f4dSChris Mason
29785d4f98a2SYan Zheng is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
29791c2a07f5SQu Wenruo
29801c2a07f5SQu Wenruo if (!is_data && refs_to_drop != 1) {
29811c2a07f5SQu Wenruo btrfs_crit(info,
29821c2a07f5SQu Wenruo "invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
29831c2a07f5SQu Wenruo node->bytenr, refs_to_drop);
29841c2a07f5SQu Wenruo ret = -EINVAL;
29851c2a07f5SQu Wenruo btrfs_abort_transaction(trans, ret);
29861c2a07f5SQu Wenruo goto out;
29871c2a07f5SQu Wenruo }
29885d4f98a2SYan Zheng
29893173a18fSJosef Bacik if (is_data)
2990897ca819SThomas Meyer skinny_metadata = false;
29913173a18fSJosef Bacik
2992fbe4801bSNikolay Borisov ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
2993fbe4801bSNikolay Borisov parent, root_objectid, owner_objectid,
29945d4f98a2SYan Zheng owner_offset);
29957bb86316SChris Mason if (ret == 0) {
29961c2a07f5SQu Wenruo /*
29971c2a07f5SQu Wenruo * Either the inline backref or the SHARED_DATA_REF/
29981c2a07f5SQu Wenruo * SHARED_BLOCK_REF is found
29991c2a07f5SQu Wenruo *
30001c2a07f5SQu Wenruo * Here is a quick path to locate EXTENT/METADATA_ITEM.
30011c2a07f5SQu Wenruo * It's possible the EXTENT/METADATA_ITEM is near current slot.
30021c2a07f5SQu Wenruo */
3003952fccacSChris Mason extent_slot = path->slots[0];
30045d4f98a2SYan Zheng while (extent_slot >= 0) {
30055d4f98a2SYan Zheng btrfs_item_key_to_cpu(path->nodes[0], &key,
3006952fccacSChris Mason extent_slot);
30075d4f98a2SYan Zheng if (key.objectid != bytenr)
3008952fccacSChris Mason break;
30095d4f98a2SYan Zheng if (key.type == BTRFS_EXTENT_ITEM_KEY &&
30105d4f98a2SYan Zheng key.offset == num_bytes) {
3011952fccacSChris Mason found_extent = 1;
3012952fccacSChris Mason break;
3013952fccacSChris Mason }
30143173a18fSJosef Bacik if (key.type == BTRFS_METADATA_ITEM_KEY &&
30153173a18fSJosef Bacik key.offset == owner_objectid) {
30163173a18fSJosef Bacik found_extent = 1;
30173173a18fSJosef Bacik break;
30183173a18fSJosef Bacik }
30191c2a07f5SQu Wenruo
30201c2a07f5SQu Wenruo /* Quick path didn't find the EXTEMT/METADATA_ITEM */
3021952fccacSChris Mason if (path->slots[0] - extent_slot > 5)
3022952fccacSChris Mason break;
30235d4f98a2SYan Zheng extent_slot--;
3024952fccacSChris Mason }
3025a79865c6SNikolay Borisov
302631840ae1SZheng Yan if (!found_extent) {
30271c2a07f5SQu Wenruo if (iref) {
3028eee3b811SQu Wenruo abort_and_dump(trans, path,
3029eee3b811SQu Wenruo "invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
3030eee3b811SQu Wenruo path->slots[0]);
3031eee3b811SQu Wenruo ret = -EUCLEAN;
3032eee3b811SQu Wenruo goto out;
30331c2a07f5SQu Wenruo }
30341c2a07f5SQu Wenruo /* Must be SHARED_* item, remove the backref first */
303576d76e78SJosef Bacik ret = remove_extent_backref(trans, extent_root, path,
30365b2a54bbSJosef Bacik NULL, refs_to_drop, is_data);
3037005d6427SDavid Sterba if (ret) {
303866642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
3039005d6427SDavid Sterba goto out;
3040005d6427SDavid Sterba }
3041b3b4aa74SDavid Sterba btrfs_release_path(path);
30425d4f98a2SYan Zheng
30431c2a07f5SQu Wenruo /* Slow path to locate EXTENT/METADATA_ITEM */
30445d4f98a2SYan Zheng key.objectid = bytenr;
30455d4f98a2SYan Zheng key.type = BTRFS_EXTENT_ITEM_KEY;
30465d4f98a2SYan Zheng key.offset = num_bytes;
30475d4f98a2SYan Zheng
30483173a18fSJosef Bacik if (!is_data && skinny_metadata) {
30493173a18fSJosef Bacik key.type = BTRFS_METADATA_ITEM_KEY;
30503173a18fSJosef Bacik key.offset = owner_objectid;
30513173a18fSJosef Bacik }
30523173a18fSJosef Bacik
305331840ae1SZheng Yan ret = btrfs_search_slot(trans, extent_root,
305431840ae1SZheng Yan &key, path, -1, 1);
30553173a18fSJosef Bacik if (ret > 0 && skinny_metadata && path->slots[0]) {
30563173a18fSJosef Bacik /*
30573173a18fSJosef Bacik * Couldn't find our skinny metadata item,
30583173a18fSJosef Bacik * see if we have ye olde extent item.
30593173a18fSJosef Bacik */
30603173a18fSJosef Bacik path->slots[0]--;
30613173a18fSJosef Bacik btrfs_item_key_to_cpu(path->nodes[0], &key,
30623173a18fSJosef Bacik path->slots[0]);
30633173a18fSJosef Bacik if (key.objectid == bytenr &&
30643173a18fSJosef Bacik key.type == BTRFS_EXTENT_ITEM_KEY &&
30653173a18fSJosef Bacik key.offset == num_bytes)
30663173a18fSJosef Bacik ret = 0;
30673173a18fSJosef Bacik }
30683173a18fSJosef Bacik
30693173a18fSJosef Bacik if (ret > 0 && skinny_metadata) {
30703173a18fSJosef Bacik skinny_metadata = false;
30719ce49a0bSFilipe Manana key.objectid = bytenr;
30723173a18fSJosef Bacik key.type = BTRFS_EXTENT_ITEM_KEY;
30733173a18fSJosef Bacik key.offset = num_bytes;
30743173a18fSJosef Bacik btrfs_release_path(path);
30753173a18fSJosef Bacik ret = btrfs_search_slot(trans, extent_root,
30763173a18fSJosef Bacik &key, path, -1, 1);
30773173a18fSJosef Bacik }
30783173a18fSJosef Bacik
3079f3465ca4SJosef Bacik if (ret) {
3080b783e62dSJosef Bacik if (ret > 0)
3081a4f78750SDavid Sterba btrfs_print_leaf(path->nodes[0]);
3082eee3b811SQu Wenruo btrfs_err(info,
3083eee3b811SQu Wenruo "umm, got %d back from search, was looking for %llu, slot %d",
3084eee3b811SQu Wenruo ret, bytenr, path->slots[0]);
3085f3465ca4SJosef Bacik }
3086005d6427SDavid Sterba if (ret < 0) {
308766642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
3088005d6427SDavid Sterba goto out;
3089005d6427SDavid Sterba }
309031840ae1SZheng Yan extent_slot = path->slots[0];
309131840ae1SZheng Yan }
3092fae7f21cSDulshani Gunawardhana } else if (WARN_ON(ret == -ENOENT)) {
3093eee3b811SQu Wenruo abort_and_dump(trans, path,
3094eee3b811SQu Wenruo "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
3095c1c9ff7cSGeert Uytterhoeven bytenr, parent, root_objectid, owner_objectid,
3096eee3b811SQu Wenruo owner_offset, path->slots[0]);
3097c4a050bbSJosef Bacik goto out;
309879787eaaSJeff Mahoney } else {
309966642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
3100005d6427SDavid Sterba goto out;
31017bb86316SChris Mason }
31025f39d397SChris Mason
31035f39d397SChris Mason leaf = path->nodes[0];
31043212fa14SJosef Bacik item_size = btrfs_item_size(leaf, extent_slot);
31056d8ff4e4SDavid Sterba if (unlikely(item_size < sizeof(*ei))) {
3106182741d2SQu Wenruo ret = -EUCLEAN;
3107182741d2SQu Wenruo btrfs_err(trans->fs_info,
3108182741d2SQu Wenruo "unexpected extent item size, has %u expect >= %zu",
3109182741d2SQu Wenruo item_size, sizeof(*ei));
3110ba3c2b19SNikolay Borisov btrfs_abort_transaction(trans, ret);
3111ba3c2b19SNikolay Borisov goto out;
3112ba3c2b19SNikolay Borisov }
31135d4f98a2SYan Zheng ei = btrfs_item_ptr(leaf, extent_slot,
31145d4f98a2SYan Zheng struct btrfs_extent_item);
31153173a18fSJosef Bacik if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
31163173a18fSJosef Bacik key.type == BTRFS_EXTENT_ITEM_KEY) {
31175d4f98a2SYan Zheng struct btrfs_tree_block_info *bi;
3118eee3b811SQu Wenruo
31191c2a07f5SQu Wenruo if (item_size < sizeof(*ei) + sizeof(*bi)) {
3120eee3b811SQu Wenruo abort_and_dump(trans, path,
3121eee3b811SQu Wenruo "invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
31221c2a07f5SQu Wenruo key.objectid, key.type, key.offset,
3123eee3b811SQu Wenruo path->slots[0], owner_objectid, item_size,
31241c2a07f5SQu Wenruo sizeof(*ei) + sizeof(*bi));
3125eee3b811SQu Wenruo ret = -EUCLEAN;
3126eee3b811SQu Wenruo goto out;
31271c2a07f5SQu Wenruo }
31285d4f98a2SYan Zheng bi = (struct btrfs_tree_block_info *)(ei + 1);
31295d4f98a2SYan Zheng WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
31305d4f98a2SYan Zheng }
31315d4f98a2SYan Zheng
31325d4f98a2SYan Zheng refs = btrfs_extent_refs(leaf, ei);
313332b02538SJosef Bacik if (refs < refs_to_drop) {
3134eee3b811SQu Wenruo abort_and_dump(trans, path,
3135eee3b811SQu Wenruo "trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
3136eee3b811SQu Wenruo refs_to_drop, refs, bytenr, path->slots[0]);
3137eee3b811SQu Wenruo ret = -EUCLEAN;
3138eee3b811SQu Wenruo goto out;
313932b02538SJosef Bacik }
31405d4f98a2SYan Zheng refs -= refs_to_drop;
31415d4f98a2SYan Zheng
31425d4f98a2SYan Zheng if (refs > 0) {
31435d4f98a2SYan Zheng if (extent_op)
31445d4f98a2SYan Zheng __run_delayed_extent_op(extent_op, leaf, ei);
31455d4f98a2SYan Zheng /*
31465d4f98a2SYan Zheng * In the case of inline back ref, reference count will
31475d4f98a2SYan Zheng * be updated by remove_extent_backref
31485d4f98a2SYan Zheng */
31495d4f98a2SYan Zheng if (iref) {
31501c2a07f5SQu Wenruo if (!found_extent) {
3151eee3b811SQu Wenruo abort_and_dump(trans, path,
3152eee3b811SQu Wenruo "invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
3153eee3b811SQu Wenruo path->slots[0]);
3154eee3b811SQu Wenruo ret = -EUCLEAN;
3155eee3b811SQu Wenruo goto out;
31561c2a07f5SQu Wenruo }
31575d4f98a2SYan Zheng } else {
31585d4f98a2SYan Zheng btrfs_set_extent_refs(leaf, ei, refs);
3159d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
31605d4f98a2SYan Zheng }
31615d4f98a2SYan Zheng if (found_extent) {
316276d76e78SJosef Bacik ret = remove_extent_backref(trans, extent_root, path,
31635b2a54bbSJosef Bacik iref, refs_to_drop, is_data);
3164005d6427SDavid Sterba if (ret) {
316566642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
3166005d6427SDavid Sterba goto out;
3167005d6427SDavid Sterba }
3168952fccacSChris Mason }
31695d4f98a2SYan Zheng } else {
31701c2a07f5SQu Wenruo /* In this branch refs == 1 */
31715d4f98a2SYan Zheng if (found_extent) {
31721c2a07f5SQu Wenruo if (is_data && refs_to_drop !=
31731c2a07f5SQu Wenruo extent_data_ref_count(path, iref)) {
3174eee3b811SQu Wenruo abort_and_dump(trans, path,
3175eee3b811SQu Wenruo "invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
31761c2a07f5SQu Wenruo extent_data_ref_count(path, iref),
3177eee3b811SQu Wenruo refs_to_drop, path->slots[0]);
3178eee3b811SQu Wenruo ret = -EUCLEAN;
3179eee3b811SQu Wenruo goto out;
31801c2a07f5SQu Wenruo }
31815d4f98a2SYan Zheng if (iref) {
31821c2a07f5SQu Wenruo if (path->slots[0] != extent_slot) {
3183eee3b811SQu Wenruo abort_and_dump(trans, path,
3184eee3b811SQu Wenruo "invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
31851c2a07f5SQu Wenruo key.objectid, key.type,
3186eee3b811SQu Wenruo key.offset, path->slots[0]);
3187eee3b811SQu Wenruo ret = -EUCLEAN;
3188eee3b811SQu Wenruo goto out;
31891c2a07f5SQu Wenruo }
31905d4f98a2SYan Zheng } else {
31911c2a07f5SQu Wenruo /*
31921c2a07f5SQu Wenruo * No inline ref, we must be at SHARED_* item,
31931c2a07f5SQu Wenruo * And it's single ref, it must be:
31941c2a07f5SQu Wenruo * | extent_slot ||extent_slot + 1|
31951c2a07f5SQu Wenruo * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
31961c2a07f5SQu Wenruo */
31971c2a07f5SQu Wenruo if (path->slots[0] != extent_slot + 1) {
3198eee3b811SQu Wenruo abort_and_dump(trans, path,
3199eee3b811SQu Wenruo "invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
3200eee3b811SQu Wenruo path->slots[0]);
3201eee3b811SQu Wenruo ret = -EUCLEAN;
3202eee3b811SQu Wenruo goto out;
32031c2a07f5SQu Wenruo }
32045d4f98a2SYan Zheng path->slots[0] = extent_slot;
32055d4f98a2SYan Zheng num_to_del = 2;
32065d4f98a2SYan Zheng }
32075d4f98a2SYan Zheng }
32085d4f98a2SYan Zheng
3209952fccacSChris Mason ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
3210952fccacSChris Mason num_to_del);
3211005d6427SDavid Sterba if (ret) {
321266642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
3213005d6427SDavid Sterba goto out;
3214005d6427SDavid Sterba }
3215b3b4aa74SDavid Sterba btrfs_release_path(path);
321621af804cSDavid Woodhouse
32178f8aa4c7SJosef Bacik ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
3218a28ec197SChris Mason }
3219fcebe456SJosef Bacik btrfs_release_path(path);
3220fcebe456SJosef Bacik
322179787eaaSJeff Mahoney out:
32225caf2a00SChris Mason btrfs_free_path(path);
3223a28ec197SChris Mason return ret;
3224a28ec197SChris Mason }
3225a28ec197SChris Mason
3226a28ec197SChris Mason /*
3227f0486c68SYan, Zheng * when we free an block, it is possible (and likely) that we free the last
32281887be66SChris Mason * delayed ref for that extent as well. This searches the delayed ref tree for
32291887be66SChris Mason * a given extent, and if there are no other delayed refs to be processed, it
32301887be66SChris Mason * removes it from the tree.
32311887be66SChris Mason */
check_ref_cleanup(struct btrfs_trans_handle * trans,u64 bytenr)32321887be66SChris Mason static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
32332ff7e61eSJeff Mahoney u64 bytenr)
32341887be66SChris Mason {
32351887be66SChris Mason struct btrfs_delayed_ref_head *head;
32361887be66SChris Mason struct btrfs_delayed_ref_root *delayed_refs;
3237f0486c68SYan, Zheng int ret = 0;
32381887be66SChris Mason
32391887be66SChris Mason delayed_refs = &trans->transaction->delayed_refs;
32401887be66SChris Mason spin_lock(&delayed_refs->lock);
3241f72ad18eSLiu Bo head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
32421887be66SChris Mason if (!head)
3243cf93da7bSChris Mason goto out_delayed_unlock;
32441887be66SChris Mason
3245d7df2c79SJosef Bacik spin_lock(&head->lock);
3246e3d03965SLiu Bo if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
32471887be66SChris Mason goto out;
32481887be66SChris Mason
3249bedc6617SJosef Bacik if (cleanup_extent_op(head) != NULL)
32505d4f98a2SYan Zheng goto out;
32515d4f98a2SYan Zheng
32521887be66SChris Mason /*
32531887be66SChris Mason * waiting for the lock here would deadlock. If someone else has it
32541887be66SChris Mason * locked they are already in the process of dropping it anyway
32551887be66SChris Mason */
32561887be66SChris Mason if (!mutex_trylock(&head->mutex))
32571887be66SChris Mason goto out;
32581887be66SChris Mason
3259d7baffdaSJosef Bacik btrfs_delete_ref_head(delayed_refs, head);
326061c681feSFilipe Manana head->processing = false;
3261d7baffdaSJosef Bacik
3262d7df2c79SJosef Bacik spin_unlock(&head->lock);
32631887be66SChris Mason spin_unlock(&delayed_refs->lock);
32641887be66SChris Mason
3265f0486c68SYan, Zheng BUG_ON(head->extent_op);
3266f0486c68SYan, Zheng if (head->must_insert_reserved)
3267f0486c68SYan, Zheng ret = 1;
3268f0486c68SYan, Zheng
326931890da0SJosef Bacik btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
3270f0486c68SYan, Zheng mutex_unlock(&head->mutex);
3271d278850eSJosef Bacik btrfs_put_delayed_ref_head(head);
3272f0486c68SYan, Zheng return ret;
32731887be66SChris Mason out:
3274d7df2c79SJosef Bacik spin_unlock(&head->lock);
3275cf93da7bSChris Mason
3276cf93da7bSChris Mason out_delayed_unlock:
32771887be66SChris Mason spin_unlock(&delayed_refs->lock);
32781887be66SChris Mason return 0;
32791887be66SChris Mason }
32801887be66SChris Mason
btrfs_free_tree_block(struct btrfs_trans_handle * trans,u64 root_id,struct extent_buffer * buf,u64 parent,int last_ref)3281f0486c68SYan, Zheng void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
32827a163608SFilipe Manana u64 root_id,
3283f0486c68SYan, Zheng struct extent_buffer *buf,
32845581a51aSJan Schmidt u64 parent, int last_ref)
3285f0486c68SYan, Zheng {
32867a163608SFilipe Manana struct btrfs_fs_info *fs_info = trans->fs_info;
3287ed4f255bSQu Wenruo struct btrfs_ref generic_ref = { 0 };
3288f0486c68SYan, Zheng int ret;
3289f0486c68SYan, Zheng
3290ed4f255bSQu Wenruo btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
3291ed4f255bSQu Wenruo buf->start, buf->len, parent);
3292ed4f255bSQu Wenruo btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
32937a163608SFilipe Manana root_id, 0, false);
3294ed4f255bSQu Wenruo
32957a163608SFilipe Manana if (root_id != BTRFS_TREE_LOG_OBJECTID) {
32968a5040f7SQu Wenruo btrfs_ref_tree_mod(fs_info, &generic_ref);
32972187374fSJosef Bacik ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
329879787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM */
3299f0486c68SYan, Zheng }
3300f0486c68SYan, Zheng
33010a16c7d7SOmar Sandoval if (last_ref && btrfs_header_generation(buf) == trans->transid) {
330232da5386SDavid Sterba struct btrfs_block_group *cache;
3303485df755SFilipe Manana bool must_pin = false;
33046219872dSFilipe Manana
33057a163608SFilipe Manana if (root_id != BTRFS_TREE_LOG_OBJECTID) {
33062ff7e61eSJeff Mahoney ret = check_ref_cleanup(trans, buf->start);
3307d3575156SNaohiro Aota if (!ret) {
3308d3575156SNaohiro Aota btrfs_redirty_list_add(trans->transaction, buf);
330937be25bcSJosef Bacik goto out;
3310f0486c68SYan, Zheng }
3311d3575156SNaohiro Aota }
3312f0486c68SYan, Zheng
33130b246afaSJeff Mahoney cache = btrfs_lookup_block_group(fs_info, buf->start);
33146219872dSFilipe Manana
3315f0486c68SYan, Zheng if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
33166690d071SNikolay Borisov pin_down_extent(trans, cache, buf->start, buf->len, 1);
33176219872dSFilipe Manana btrfs_put_block_group(cache);
331837be25bcSJosef Bacik goto out;
3319f0486c68SYan, Zheng }
3320f0486c68SYan, Zheng
3321485df755SFilipe Manana /*
3322968b7158SJosef Bacik * If there are tree mod log users we may have recorded mod log
3323968b7158SJosef Bacik * operations for this node. If we re-allocate this node we
3324968b7158SJosef Bacik * could replay operations on this node that happened when it
3325968b7158SJosef Bacik * existed in a completely different root. For example if it
3326968b7158SJosef Bacik * was part of root A, then was reallocated to root B, and we
3327968b7158SJosef Bacik * are doing a btrfs_old_search_slot(root b), we could replay
3328968b7158SJosef Bacik * operations that happened when the block was part of root A,
3329968b7158SJosef Bacik * giving us an inconsistent view of the btree.
3330968b7158SJosef Bacik *
3331485df755SFilipe Manana * We are safe from races here because at this point no other
3332485df755SFilipe Manana * node or root points to this extent buffer, so if after this
3333968b7158SJosef Bacik * check a new tree mod log user joins we will not have an
3334968b7158SJosef Bacik * existing log of operations on this node that we have to
3335968b7158SJosef Bacik * contend with.
3336485df755SFilipe Manana */
3337968b7158SJosef Bacik if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
3338888dd183SFilipe Manana must_pin = true;
3339485df755SFilipe Manana
3340485df755SFilipe Manana if (must_pin || btrfs_is_zoned(fs_info)) {
3341d3575156SNaohiro Aota btrfs_redirty_list_add(trans->transaction, buf);
3342d3575156SNaohiro Aota pin_down_extent(trans, cache, buf->start, buf->len, 1);
3343d3575156SNaohiro Aota btrfs_put_block_group(cache);
3344d3575156SNaohiro Aota goto out;
3345d3575156SNaohiro Aota }
3346d3575156SNaohiro Aota
3347f0486c68SYan, Zheng WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
3348f0486c68SYan, Zheng
3349f0486c68SYan, Zheng btrfs_add_free_space(cache, buf->start, buf->len);
33504824f1f4SWang Xiaoguang btrfs_free_reserved_bytes(cache, buf->len, 0);
33516219872dSFilipe Manana btrfs_put_block_group(cache);
335271ff6437SJeff Mahoney trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
3353f0486c68SYan, Zheng }
3354f0486c68SYan, Zheng out:
33550a16c7d7SOmar Sandoval if (last_ref) {
3356a826d6dcSJosef Bacik /*
33570a16c7d7SOmar Sandoval * Deleting the buffer, clear the corrupt flag since it doesn't
33580a16c7d7SOmar Sandoval * matter anymore.
3359a826d6dcSJosef Bacik */
3360a826d6dcSJosef Bacik clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
3361f0486c68SYan, Zheng }
33620a16c7d7SOmar Sandoval }
3363f0486c68SYan, Zheng
336479787eaaSJeff Mahoney /* Can return -ENOMEM */
btrfs_free_extent(struct btrfs_trans_handle * trans,struct btrfs_ref * ref)3365ffd4bb2aSQu Wenruo int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
3366925baeddSChris Mason {
3367ffd4bb2aSQu Wenruo struct btrfs_fs_info *fs_info = trans->fs_info;
3368925baeddSChris Mason int ret;
3369925baeddSChris Mason
3370f5ee5c9aSJeff Mahoney if (btrfs_is_testing(fs_info))
3371faa2dbf0SJosef Bacik return 0;
3372fccb84c9SDavid Sterba
337356bec294SChris Mason /*
337456bec294SChris Mason * tree log blocks never actually go into the extent allocation
337556bec294SChris Mason * tree, just update pinning info and exit early.
337656bec294SChris Mason */
3377ffd4bb2aSQu Wenruo if ((ref->type == BTRFS_REF_METADATA &&
3378113479d5SNikolay Borisov ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
3379ffd4bb2aSQu Wenruo (ref->type == BTRFS_REF_DATA &&
3380113479d5SNikolay Borisov ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
3381b9473439SChris Mason /* unlocks the pinned mutex */
3382b25c36f8SNikolay Borisov btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
338356bec294SChris Mason ret = 0;
3384ffd4bb2aSQu Wenruo } else if (ref->type == BTRFS_REF_METADATA) {
33852187374fSJosef Bacik ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
33865d4f98a2SYan Zheng } else {
33872187374fSJosef Bacik ret = btrfs_add_delayed_data_ref(trans, ref, 0);
338856bec294SChris Mason }
3389d7eae340SOmar Sandoval
3390ffd4bb2aSQu Wenruo if (!((ref->type == BTRFS_REF_METADATA &&
3391113479d5SNikolay Borisov ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
3392ffd4bb2aSQu Wenruo (ref->type == BTRFS_REF_DATA &&
3393113479d5SNikolay Borisov ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
3394ffd4bb2aSQu Wenruo btrfs_ref_tree_mod(fs_info, ref);
33958a5040f7SQu Wenruo
3396925baeddSChris Mason return ret;
3397925baeddSChris Mason }
3398925baeddSChris Mason
3399817d52f8SJosef Bacik enum btrfs_loop_type {
3400b9d97cffSJosef Bacik /*
3401b9d97cffSJosef Bacik * Start caching block groups but do not wait for progress or for them
3402b9d97cffSJosef Bacik * to be done.
3403b9d97cffSJosef Bacik */
3404f262fa8dSDavid Sterba LOOP_CACHING_NOWAIT,
3405b9d97cffSJosef Bacik
3406b9d97cffSJosef Bacik /*
3407b9d97cffSJosef Bacik * Wait for the block group free_space >= the space we're waiting for if
3408b9d97cffSJosef Bacik * the block group isn't cached.
3409b9d97cffSJosef Bacik */
3410f262fa8dSDavid Sterba LOOP_CACHING_WAIT,
3411b9d97cffSJosef Bacik
3412b9d97cffSJosef Bacik /*
3413b9d97cffSJosef Bacik * Allow allocations to happen from block groups that do not yet have a
3414b9d97cffSJosef Bacik * size classification.
3415b9d97cffSJosef Bacik */
341652bb7a21SBoris Burkov LOOP_UNSET_SIZE_CLASS,
3417b9d97cffSJosef Bacik
3418b9d97cffSJosef Bacik /*
3419b9d97cffSJosef Bacik * Allocate a chunk and then retry the allocation.
3420b9d97cffSJosef Bacik */
3421f262fa8dSDavid Sterba LOOP_ALLOC_CHUNK,
3422b9d97cffSJosef Bacik
3423b9d97cffSJosef Bacik /*
3424b9d97cffSJosef Bacik * Ignore the size class restrictions for this allocation.
3425b9d97cffSJosef Bacik */
342652bb7a21SBoris Burkov LOOP_WRONG_SIZE_CLASS,
3427b9d97cffSJosef Bacik
3428b9d97cffSJosef Bacik /*
3429b9d97cffSJosef Bacik * Ignore the empty size, only try to allocate the number of bytes
3430b9d97cffSJosef Bacik * needed for this allocation.
3431b9d97cffSJosef Bacik */
3432f262fa8dSDavid Sterba LOOP_NO_EMPTY_SIZE,
3433817d52f8SJosef Bacik };
3434817d52f8SJosef Bacik
3435e570fd27SMiao Xie static inline void
btrfs_lock_block_group(struct btrfs_block_group * cache,int delalloc)343632da5386SDavid Sterba btrfs_lock_block_group(struct btrfs_block_group *cache,
3437e570fd27SMiao Xie int delalloc)
3438e570fd27SMiao Xie {
3439e570fd27SMiao Xie if (delalloc)
3440e570fd27SMiao Xie down_read(&cache->data_rwsem);
3441e570fd27SMiao Xie }
3442e570fd27SMiao Xie
btrfs_grab_block_group(struct btrfs_block_group * cache,int delalloc)344332da5386SDavid Sterba static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
3444e570fd27SMiao Xie int delalloc)
3445e570fd27SMiao Xie {
3446e570fd27SMiao Xie btrfs_get_block_group(cache);
3447e570fd27SMiao Xie if (delalloc)
3448e570fd27SMiao Xie down_read(&cache->data_rwsem);
3449e570fd27SMiao Xie }
3450e570fd27SMiao Xie
btrfs_lock_cluster(struct btrfs_block_group * block_group,struct btrfs_free_cluster * cluster,int delalloc)345132da5386SDavid Sterba static struct btrfs_block_group *btrfs_lock_cluster(
345232da5386SDavid Sterba struct btrfs_block_group *block_group,
3453e570fd27SMiao Xie struct btrfs_free_cluster *cluster,
3454e570fd27SMiao Xie int delalloc)
3455c142c6a4SJules Irenge __acquires(&cluster->refill_lock)
3456e570fd27SMiao Xie {
345732da5386SDavid Sterba struct btrfs_block_group *used_bg = NULL;
34586719afdcSGeert Uytterhoeven
3459e570fd27SMiao Xie spin_lock(&cluster->refill_lock);
34606719afdcSGeert Uytterhoeven while (1) {
3461e570fd27SMiao Xie used_bg = cluster->block_group;
3462e570fd27SMiao Xie if (!used_bg)
3463e570fd27SMiao Xie return NULL;
3464e570fd27SMiao Xie
3465e570fd27SMiao Xie if (used_bg == block_group)
3466e570fd27SMiao Xie return used_bg;
3467e570fd27SMiao Xie
3468e570fd27SMiao Xie btrfs_get_block_group(used_bg);
3469e570fd27SMiao Xie
3470e570fd27SMiao Xie if (!delalloc)
3471e570fd27SMiao Xie return used_bg;
3472e570fd27SMiao Xie
3473e570fd27SMiao Xie if (down_read_trylock(&used_bg->data_rwsem))
3474e570fd27SMiao Xie return used_bg;
3475e570fd27SMiao Xie
3476e570fd27SMiao Xie spin_unlock(&cluster->refill_lock);
34776719afdcSGeert Uytterhoeven
3478e321f8a8SLiu Bo /* We should only have one-level nested. */
3479e321f8a8SLiu Bo down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
34806719afdcSGeert Uytterhoeven
34816719afdcSGeert Uytterhoeven spin_lock(&cluster->refill_lock);
34826719afdcSGeert Uytterhoeven if (used_bg == cluster->block_group)
34836719afdcSGeert Uytterhoeven return used_bg;
34846719afdcSGeert Uytterhoeven
34856719afdcSGeert Uytterhoeven up_read(&used_bg->data_rwsem);
34866719afdcSGeert Uytterhoeven btrfs_put_block_group(used_bg);
34876719afdcSGeert Uytterhoeven }
3488e570fd27SMiao Xie }
3489e570fd27SMiao Xie
3490e570fd27SMiao Xie static inline void
btrfs_release_block_group(struct btrfs_block_group * cache,int delalloc)349132da5386SDavid Sterba btrfs_release_block_group(struct btrfs_block_group *cache,
3492e570fd27SMiao Xie int delalloc)
3493e570fd27SMiao Xie {
3494e570fd27SMiao Xie if (delalloc)
3495e570fd27SMiao Xie up_read(&cache->data_rwsem);
3496e570fd27SMiao Xie btrfs_put_block_group(cache);
3497e570fd27SMiao Xie }
3498e570fd27SMiao Xie
3499d06e3bb6SQu Wenruo /*
3500d06e3bb6SQu Wenruo * Helper function for find_free_extent().
3501d06e3bb6SQu Wenruo *
3502d06e3bb6SQu Wenruo * Return -ENOENT to inform caller that we need fallback to unclustered mode.
3503d06e3bb6SQu Wenruo * Return >0 to inform caller that we find nothing
3504d06e3bb6SQu Wenruo * Return 0 means we have found a location and set ffe_ctl->found_offset.
3505d06e3bb6SQu Wenruo */
find_free_extent_clustered(struct btrfs_block_group * bg,struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group ** cluster_bg_ret)350632da5386SDavid Sterba static int find_free_extent_clustered(struct btrfs_block_group *bg,
3507d06e3bb6SQu Wenruo struct find_free_extent_ctl *ffe_ctl,
350832da5386SDavid Sterba struct btrfs_block_group **cluster_bg_ret)
3509d06e3bb6SQu Wenruo {
351032da5386SDavid Sterba struct btrfs_block_group *cluster_bg;
3511897cae79SNaohiro Aota struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3512d06e3bb6SQu Wenruo u64 aligned_cluster;
3513d06e3bb6SQu Wenruo u64 offset;
3514d06e3bb6SQu Wenruo int ret;
3515d06e3bb6SQu Wenruo
3516d06e3bb6SQu Wenruo cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
3517d06e3bb6SQu Wenruo if (!cluster_bg)
3518d06e3bb6SQu Wenruo goto refill_cluster;
3519d06e3bb6SQu Wenruo if (cluster_bg != bg && (cluster_bg->ro ||
3520d06e3bb6SQu Wenruo !block_group_bits(cluster_bg, ffe_ctl->flags)))
3521d06e3bb6SQu Wenruo goto release_cluster;
3522d06e3bb6SQu Wenruo
3523d06e3bb6SQu Wenruo offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
3524b3470b5dSDavid Sterba ffe_ctl->num_bytes, cluster_bg->start,
3525d06e3bb6SQu Wenruo &ffe_ctl->max_extent_size);
3526d06e3bb6SQu Wenruo if (offset) {
3527d06e3bb6SQu Wenruo /* We have a block, we're done */
3528d06e3bb6SQu Wenruo spin_unlock(&last_ptr->refill_lock);
3529cfc2de0fSBoris Burkov trace_btrfs_reserve_extent_cluster(cluster_bg, ffe_ctl);
3530d06e3bb6SQu Wenruo *cluster_bg_ret = cluster_bg;
3531d06e3bb6SQu Wenruo ffe_ctl->found_offset = offset;
3532d06e3bb6SQu Wenruo return 0;
3533d06e3bb6SQu Wenruo }
3534d06e3bb6SQu Wenruo WARN_ON(last_ptr->block_group != cluster_bg);
3535d06e3bb6SQu Wenruo
3536d06e3bb6SQu Wenruo release_cluster:
3537d06e3bb6SQu Wenruo /*
3538d06e3bb6SQu Wenruo * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so
3539d06e3bb6SQu Wenruo * lets just skip it and let the allocator find whatever block it can
3540d06e3bb6SQu Wenruo * find. If we reach this point, we will have tried the cluster
3541d06e3bb6SQu Wenruo * allocator plenty of times and not have found anything, so we are
3542d06e3bb6SQu Wenruo * likely way too fragmented for the clustering stuff to find anything.
3543d06e3bb6SQu Wenruo *
3544d06e3bb6SQu Wenruo * However, if the cluster is taken from the current block group,
3545d06e3bb6SQu Wenruo * release the cluster first, so that we stand a better chance of
3546d06e3bb6SQu Wenruo * succeeding in the unclustered allocation.
3547d06e3bb6SQu Wenruo */
3548d06e3bb6SQu Wenruo if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
3549d06e3bb6SQu Wenruo spin_unlock(&last_ptr->refill_lock);
3550d06e3bb6SQu Wenruo btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3551d06e3bb6SQu Wenruo return -ENOENT;
3552d06e3bb6SQu Wenruo }
3553d06e3bb6SQu Wenruo
3554d06e3bb6SQu Wenruo /* This cluster didn't work out, free it and start over */
3555d06e3bb6SQu Wenruo btrfs_return_cluster_to_free_space(NULL, last_ptr);
3556d06e3bb6SQu Wenruo
3557d06e3bb6SQu Wenruo if (cluster_bg != bg)
3558d06e3bb6SQu Wenruo btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
3559d06e3bb6SQu Wenruo
3560d06e3bb6SQu Wenruo refill_cluster:
3561d06e3bb6SQu Wenruo if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
3562d06e3bb6SQu Wenruo spin_unlock(&last_ptr->refill_lock);
3563d06e3bb6SQu Wenruo return -ENOENT;
3564d06e3bb6SQu Wenruo }
3565d06e3bb6SQu Wenruo
3566d06e3bb6SQu Wenruo aligned_cluster = max_t(u64,
3567d06e3bb6SQu Wenruo ffe_ctl->empty_cluster + ffe_ctl->empty_size,
3568d06e3bb6SQu Wenruo bg->full_stripe_len);
35692ceeae2eSDavid Sterba ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
35702ceeae2eSDavid Sterba ffe_ctl->num_bytes, aligned_cluster);
3571d06e3bb6SQu Wenruo if (ret == 0) {
3572d06e3bb6SQu Wenruo /* Now pull our allocation out of this cluster */
3573d06e3bb6SQu Wenruo offset = btrfs_alloc_from_cluster(bg, last_ptr,
3574d06e3bb6SQu Wenruo ffe_ctl->num_bytes, ffe_ctl->search_start,
3575d06e3bb6SQu Wenruo &ffe_ctl->max_extent_size);
3576d06e3bb6SQu Wenruo if (offset) {
3577d06e3bb6SQu Wenruo /* We found one, proceed */
3578d06e3bb6SQu Wenruo spin_unlock(&last_ptr->refill_lock);
3579d06e3bb6SQu Wenruo ffe_ctl->found_offset = offset;
3580cfc2de0fSBoris Burkov trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
3581d06e3bb6SQu Wenruo return 0;
3582d06e3bb6SQu Wenruo }
3583d06e3bb6SQu Wenruo }
3584d06e3bb6SQu Wenruo /*
3585d06e3bb6SQu Wenruo * At this point we either didn't find a cluster or we weren't able to
3586d06e3bb6SQu Wenruo * allocate a block from our cluster. Free the cluster we've been
3587d06e3bb6SQu Wenruo * trying to use, and go to the next block group.
3588d06e3bb6SQu Wenruo */
3589d06e3bb6SQu Wenruo btrfs_return_cluster_to_free_space(NULL, last_ptr);
3590d06e3bb6SQu Wenruo spin_unlock(&last_ptr->refill_lock);
3591d06e3bb6SQu Wenruo return 1;
3592d06e3bb6SQu Wenruo }
3593d06e3bb6SQu Wenruo
3594b4bd745dSQu Wenruo /*
3595e1a41848SQu Wenruo * Return >0 to inform caller that we find nothing
3596e1a41848SQu Wenruo * Return 0 when we found an free extent and set ffe_ctrl->found_offset
3597e1a41848SQu Wenruo */
find_free_extent_unclustered(struct btrfs_block_group * bg,struct find_free_extent_ctl * ffe_ctl)359832da5386SDavid Sterba static int find_free_extent_unclustered(struct btrfs_block_group *bg,
3599e1a41848SQu Wenruo struct find_free_extent_ctl *ffe_ctl)
3600e1a41848SQu Wenruo {
3601897cae79SNaohiro Aota struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
3602e1a41848SQu Wenruo u64 offset;
3603e1a41848SQu Wenruo
3604e1a41848SQu Wenruo /*
3605e1a41848SQu Wenruo * We are doing an unclustered allocation, set the fragmented flag so
3606e1a41848SQu Wenruo * we don't bother trying to setup a cluster again until we get more
3607e1a41848SQu Wenruo * space.
3608e1a41848SQu Wenruo */
3609e1a41848SQu Wenruo if (unlikely(last_ptr)) {
3610e1a41848SQu Wenruo spin_lock(&last_ptr->lock);
3611e1a41848SQu Wenruo last_ptr->fragmented = 1;
3612e1a41848SQu Wenruo spin_unlock(&last_ptr->lock);
3613e1a41848SQu Wenruo }
3614e1a41848SQu Wenruo if (ffe_ctl->cached) {
3615e1a41848SQu Wenruo struct btrfs_free_space_ctl *free_space_ctl;
3616e1a41848SQu Wenruo
3617e1a41848SQu Wenruo free_space_ctl = bg->free_space_ctl;
3618e1a41848SQu Wenruo spin_lock(&free_space_ctl->tree_lock);
3619e1a41848SQu Wenruo if (free_space_ctl->free_space <
3620e1a41848SQu Wenruo ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
3621e1a41848SQu Wenruo ffe_ctl->empty_size) {
3622e1a41848SQu Wenruo ffe_ctl->total_free_space = max_t(u64,
3623e1a41848SQu Wenruo ffe_ctl->total_free_space,
3624e1a41848SQu Wenruo free_space_ctl->free_space);
3625e1a41848SQu Wenruo spin_unlock(&free_space_ctl->tree_lock);
3626e1a41848SQu Wenruo return 1;
3627e1a41848SQu Wenruo }
3628e1a41848SQu Wenruo spin_unlock(&free_space_ctl->tree_lock);
3629e1a41848SQu Wenruo }
3630e1a41848SQu Wenruo
3631e1a41848SQu Wenruo offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
3632e1a41848SQu Wenruo ffe_ctl->num_bytes, ffe_ctl->empty_size,
3633e1a41848SQu Wenruo &ffe_ctl->max_extent_size);
3634cd361199SJosef Bacik if (!offset)
3635e1a41848SQu Wenruo return 1;
3636e1a41848SQu Wenruo ffe_ctl->found_offset = offset;
3637e1a41848SQu Wenruo return 0;
3638e1a41848SQu Wenruo }
3639e1a41848SQu Wenruo
do_allocation_clustered(struct btrfs_block_group * block_group,struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group ** bg_ret)3640c668690dSNaohiro Aota static int do_allocation_clustered(struct btrfs_block_group *block_group,
3641c668690dSNaohiro Aota struct find_free_extent_ctl *ffe_ctl,
3642c668690dSNaohiro Aota struct btrfs_block_group **bg_ret)
3643c668690dSNaohiro Aota {
3644c668690dSNaohiro Aota int ret;
3645c668690dSNaohiro Aota
3646c668690dSNaohiro Aota /* We want to try and use the cluster allocator, so lets look there */
3647c668690dSNaohiro Aota if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
3648897cae79SNaohiro Aota ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3649cd361199SJosef Bacik if (ret >= 0)
3650c668690dSNaohiro Aota return ret;
3651c668690dSNaohiro Aota /* ret == -ENOENT case falls through */
3652c668690dSNaohiro Aota }
3653c668690dSNaohiro Aota
3654897cae79SNaohiro Aota return find_free_extent_unclustered(block_group, ffe_ctl);
3655c668690dSNaohiro Aota }
3656c668690dSNaohiro Aota
36572eda5708SNaohiro Aota /*
365840ab3be1SNaohiro Aota * Tree-log block group locking
365940ab3be1SNaohiro Aota * ============================
366040ab3be1SNaohiro Aota *
366140ab3be1SNaohiro Aota * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
366240ab3be1SNaohiro Aota * indicates the starting address of a block group, which is reserved only
366340ab3be1SNaohiro Aota * for tree-log metadata.
366440ab3be1SNaohiro Aota *
366540ab3be1SNaohiro Aota * Lock nesting
366640ab3be1SNaohiro Aota * ============
366740ab3be1SNaohiro Aota *
366840ab3be1SNaohiro Aota * space_info::lock
366940ab3be1SNaohiro Aota * block_group::lock
367040ab3be1SNaohiro Aota * fs_info::treelog_bg_lock
367140ab3be1SNaohiro Aota */
367240ab3be1SNaohiro Aota
367340ab3be1SNaohiro Aota /*
36742eda5708SNaohiro Aota * Simple allocator for sequential-only block group. It only allows sequential
36752eda5708SNaohiro Aota * allocation. No need to play with trees. This function also reserves the
36762eda5708SNaohiro Aota * bytes as in btrfs_add_reserved_bytes.
36772eda5708SNaohiro Aota */
do_allocation_zoned(struct btrfs_block_group * block_group,struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group ** bg_ret)36782eda5708SNaohiro Aota static int do_allocation_zoned(struct btrfs_block_group *block_group,
36792eda5708SNaohiro Aota struct find_free_extent_ctl *ffe_ctl,
36802eda5708SNaohiro Aota struct btrfs_block_group **bg_ret)
36812eda5708SNaohiro Aota {
368240ab3be1SNaohiro Aota struct btrfs_fs_info *fs_info = block_group->fs_info;
36832eda5708SNaohiro Aota struct btrfs_space_info *space_info = block_group->space_info;
36842eda5708SNaohiro Aota struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
36852eda5708SNaohiro Aota u64 start = block_group->start;
36862eda5708SNaohiro Aota u64 num_bytes = ffe_ctl->num_bytes;
36872eda5708SNaohiro Aota u64 avail;
368840ab3be1SNaohiro Aota u64 bytenr = block_group->start;
368940ab3be1SNaohiro Aota u64 log_bytenr;
3690c2707a25SJohannes Thumshirn u64 data_reloc_bytenr;
36912eda5708SNaohiro Aota int ret = 0;
36922d81eb1cSJohannes Thumshirn bool skip = false;
36932eda5708SNaohiro Aota
36942eda5708SNaohiro Aota ASSERT(btrfs_is_zoned(block_group->fs_info));
36952eda5708SNaohiro Aota
369640ab3be1SNaohiro Aota /*
369740ab3be1SNaohiro Aota * Do not allow non-tree-log blocks in the dedicated tree-log block
369840ab3be1SNaohiro Aota * group, and vice versa.
369940ab3be1SNaohiro Aota */
370040ab3be1SNaohiro Aota spin_lock(&fs_info->treelog_bg_lock);
370140ab3be1SNaohiro Aota log_bytenr = fs_info->treelog_bg;
37022d81eb1cSJohannes Thumshirn if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
37032d81eb1cSJohannes Thumshirn (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
37042d81eb1cSJohannes Thumshirn skip = true;
370540ab3be1SNaohiro Aota spin_unlock(&fs_info->treelog_bg_lock);
370640ab3be1SNaohiro Aota if (skip)
370740ab3be1SNaohiro Aota return 1;
370840ab3be1SNaohiro Aota
3709c2707a25SJohannes Thumshirn /*
3710c2707a25SJohannes Thumshirn * Do not allow non-relocation blocks in the dedicated relocation block
3711c2707a25SJohannes Thumshirn * group, and vice versa.
3712c2707a25SJohannes Thumshirn */
3713c2707a25SJohannes Thumshirn spin_lock(&fs_info->relocation_bg_lock);
3714c2707a25SJohannes Thumshirn data_reloc_bytenr = fs_info->data_reloc_bg;
3715c2707a25SJohannes Thumshirn if (data_reloc_bytenr &&
3716c2707a25SJohannes Thumshirn ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
3717c2707a25SJohannes Thumshirn (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
3718c2707a25SJohannes Thumshirn skip = true;
3719c2707a25SJohannes Thumshirn spin_unlock(&fs_info->relocation_bg_lock);
3720c2707a25SJohannes Thumshirn if (skip)
3721c2707a25SJohannes Thumshirn return 1;
37221ada69f6SNaohiro Aota
37232e654e4bSNaohiro Aota /* Check RO and no space case before trying to activate it */
37242e654e4bSNaohiro Aota spin_lock(&block_group->lock);
37251bfd4767SNaohiro Aota if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
37261ada69f6SNaohiro Aota ret = 1;
37271ada69f6SNaohiro Aota /*
37281ada69f6SNaohiro Aota * May need to clear fs_info->{treelog,data_reloc}_bg.
37291ada69f6SNaohiro Aota * Return the error after taking the locks.
37301ada69f6SNaohiro Aota */
37312e654e4bSNaohiro Aota }
37322e654e4bSNaohiro Aota spin_unlock(&block_group->lock);
37332e654e4bSNaohiro Aota
37345a7d107eSNaohiro Aota /* Metadata block group is activated at write time. */
37355a7d107eSNaohiro Aota if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
37365a7d107eSNaohiro Aota !btrfs_zone_activate(block_group)) {
37371ada69f6SNaohiro Aota ret = 1;
37381ada69f6SNaohiro Aota /*
37391ada69f6SNaohiro Aota * May need to clear fs_info->{treelog,data_reloc}_bg.
37401ada69f6SNaohiro Aota * Return the error after taking the locks.
37411ada69f6SNaohiro Aota */
37421ada69f6SNaohiro Aota }
37432e654e4bSNaohiro Aota
37442eda5708SNaohiro Aota spin_lock(&space_info->lock);
37452eda5708SNaohiro Aota spin_lock(&block_group->lock);
374640ab3be1SNaohiro Aota spin_lock(&fs_info->treelog_bg_lock);
3747c2707a25SJohannes Thumshirn spin_lock(&fs_info->relocation_bg_lock);
374840ab3be1SNaohiro Aota
37491ada69f6SNaohiro Aota if (ret)
37501ada69f6SNaohiro Aota goto out;
37511ada69f6SNaohiro Aota
375240ab3be1SNaohiro Aota ASSERT(!ffe_ctl->for_treelog ||
375340ab3be1SNaohiro Aota block_group->start == fs_info->treelog_bg ||
375440ab3be1SNaohiro Aota fs_info->treelog_bg == 0);
3755c2707a25SJohannes Thumshirn ASSERT(!ffe_ctl->for_data_reloc ||
3756c2707a25SJohannes Thumshirn block_group->start == fs_info->data_reloc_bg ||
3757c2707a25SJohannes Thumshirn fs_info->data_reloc_bg == 0);
37582eda5708SNaohiro Aota
37593349b57fSJosef Bacik if (block_group->ro ||
3760332581bdSNaohiro Aota (!ffe_ctl->for_data_reloc &&
3761332581bdSNaohiro Aota test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
37622eda5708SNaohiro Aota ret = 1;
37632eda5708SNaohiro Aota goto out;
37642eda5708SNaohiro Aota }
37652eda5708SNaohiro Aota
376640ab3be1SNaohiro Aota /*
376740ab3be1SNaohiro Aota * Do not allow currently using block group to be tree-log dedicated
376840ab3be1SNaohiro Aota * block group.
376940ab3be1SNaohiro Aota */
377040ab3be1SNaohiro Aota if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
377140ab3be1SNaohiro Aota (block_group->used || block_group->reserved)) {
377240ab3be1SNaohiro Aota ret = 1;
377340ab3be1SNaohiro Aota goto out;
377440ab3be1SNaohiro Aota }
377540ab3be1SNaohiro Aota
3776c2707a25SJohannes Thumshirn /*
3777c2707a25SJohannes Thumshirn * Do not allow currently used block group to be the data relocation
3778c2707a25SJohannes Thumshirn * dedicated block group.
3779c2707a25SJohannes Thumshirn */
3780c2707a25SJohannes Thumshirn if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
3781c2707a25SJohannes Thumshirn (block_group->used || block_group->reserved)) {
3782c2707a25SJohannes Thumshirn ret = 1;
3783c2707a25SJohannes Thumshirn goto out;
3784c2707a25SJohannes Thumshirn }
3785c2707a25SJohannes Thumshirn
378698173255SNaohiro Aota WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
378798173255SNaohiro Aota avail = block_group->zone_capacity - block_group->alloc_offset;
37882eda5708SNaohiro Aota if (avail < num_bytes) {
37892eda5708SNaohiro Aota if (ffe_ctl->max_extent_size < avail) {
37902eda5708SNaohiro Aota /*
37912eda5708SNaohiro Aota * With sequential allocator, free space is always
37922eda5708SNaohiro Aota * contiguous
37932eda5708SNaohiro Aota */
37942eda5708SNaohiro Aota ffe_ctl->max_extent_size = avail;
37952eda5708SNaohiro Aota ffe_ctl->total_free_space = avail;
37962eda5708SNaohiro Aota }
37972eda5708SNaohiro Aota ret = 1;
37982eda5708SNaohiro Aota goto out;
37992eda5708SNaohiro Aota }
38002eda5708SNaohiro Aota
380140ab3be1SNaohiro Aota if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
380240ab3be1SNaohiro Aota fs_info->treelog_bg = block_group->start;
380340ab3be1SNaohiro Aota
3804332581bdSNaohiro Aota if (ffe_ctl->for_data_reloc) {
3805332581bdSNaohiro Aota if (!fs_info->data_reloc_bg)
3806c2707a25SJohannes Thumshirn fs_info->data_reloc_bg = block_group->start;
3807332581bdSNaohiro Aota /*
3808332581bdSNaohiro Aota * Do not allow allocations from this block group, unless it is
3809332581bdSNaohiro Aota * for data relocation. Compared to increasing the ->ro, setting
3810332581bdSNaohiro Aota * the ->zoned_data_reloc_ongoing flag still allows nocow
3811332581bdSNaohiro Aota * writers to come in. See btrfs_inc_nocow_writers().
3812332581bdSNaohiro Aota *
3813332581bdSNaohiro Aota * We need to disable an allocation to avoid an allocation of
3814332581bdSNaohiro Aota * regular (non-relocation data) extent. With mix of relocation
3815332581bdSNaohiro Aota * extents and regular extents, we can dispatch WRITE commands
3816332581bdSNaohiro Aota * (for relocation extents) and ZONE APPEND commands (for
3817332581bdSNaohiro Aota * regular extents) at the same time to the same zone, which
3818332581bdSNaohiro Aota * easily break the write pointer.
3819332581bdSNaohiro Aota *
3820332581bdSNaohiro Aota * Also, this flag avoids this block group to be zone finished.
3821332581bdSNaohiro Aota */
3822332581bdSNaohiro Aota set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
3823332581bdSNaohiro Aota }
3824c2707a25SJohannes Thumshirn
38252eda5708SNaohiro Aota ffe_ctl->found_offset = start + block_group->alloc_offset;
38262eda5708SNaohiro Aota block_group->alloc_offset += num_bytes;
38272eda5708SNaohiro Aota spin_lock(&ctl->tree_lock);
38282eda5708SNaohiro Aota ctl->free_space -= num_bytes;
38292eda5708SNaohiro Aota spin_unlock(&ctl->tree_lock);
38302eda5708SNaohiro Aota
38312eda5708SNaohiro Aota /*
38322eda5708SNaohiro Aota * We do not check if found_offset is aligned to stripesize. The
38332eda5708SNaohiro Aota * address is anyway rewritten when using zone append writing.
38342eda5708SNaohiro Aota */
38352eda5708SNaohiro Aota
38362eda5708SNaohiro Aota ffe_ctl->search_start = ffe_ctl->found_offset;
38372eda5708SNaohiro Aota
38382eda5708SNaohiro Aota out:
383940ab3be1SNaohiro Aota if (ret && ffe_ctl->for_treelog)
384040ab3be1SNaohiro Aota fs_info->treelog_bg = 0;
3841332581bdSNaohiro Aota if (ret && ffe_ctl->for_data_reloc)
3842c2707a25SJohannes Thumshirn fs_info->data_reloc_bg = 0;
3843c2707a25SJohannes Thumshirn spin_unlock(&fs_info->relocation_bg_lock);
384440ab3be1SNaohiro Aota spin_unlock(&fs_info->treelog_bg_lock);
38452eda5708SNaohiro Aota spin_unlock(&block_group->lock);
38462eda5708SNaohiro Aota spin_unlock(&space_info->lock);
38472eda5708SNaohiro Aota return ret;
38482eda5708SNaohiro Aota }
38492eda5708SNaohiro Aota
do_allocation(struct btrfs_block_group * block_group,struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group ** bg_ret)3850c668690dSNaohiro Aota static int do_allocation(struct btrfs_block_group *block_group,
3851c668690dSNaohiro Aota struct find_free_extent_ctl *ffe_ctl,
3852c668690dSNaohiro Aota struct btrfs_block_group **bg_ret)
3853c668690dSNaohiro Aota {
3854c668690dSNaohiro Aota switch (ffe_ctl->policy) {
3855c668690dSNaohiro Aota case BTRFS_EXTENT_ALLOC_CLUSTERED:
3856c668690dSNaohiro Aota return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
38572eda5708SNaohiro Aota case BTRFS_EXTENT_ALLOC_ZONED:
38582eda5708SNaohiro Aota return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
3859c668690dSNaohiro Aota default:
3860c668690dSNaohiro Aota BUG();
3861c668690dSNaohiro Aota }
3862c668690dSNaohiro Aota }
3863c668690dSNaohiro Aota
release_block_group(struct btrfs_block_group * block_group,struct find_free_extent_ctl * ffe_ctl,int delalloc)3864baba5062SNaohiro Aota static void release_block_group(struct btrfs_block_group *block_group,
3865baba5062SNaohiro Aota struct find_free_extent_ctl *ffe_ctl,
3866baba5062SNaohiro Aota int delalloc)
3867baba5062SNaohiro Aota {
3868baba5062SNaohiro Aota switch (ffe_ctl->policy) {
3869baba5062SNaohiro Aota case BTRFS_EXTENT_ALLOC_CLUSTERED:
3870cd361199SJosef Bacik ffe_ctl->retry_uncached = false;
3871baba5062SNaohiro Aota break;
38722eda5708SNaohiro Aota case BTRFS_EXTENT_ALLOC_ZONED:
38732eda5708SNaohiro Aota /* Nothing to do */
38742eda5708SNaohiro Aota break;
3875baba5062SNaohiro Aota default:
3876baba5062SNaohiro Aota BUG();
3877baba5062SNaohiro Aota }
3878baba5062SNaohiro Aota
3879baba5062SNaohiro Aota BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
3880baba5062SNaohiro Aota ffe_ctl->index);
3881baba5062SNaohiro Aota btrfs_release_block_group(block_group, delalloc);
3882baba5062SNaohiro Aota }
3883baba5062SNaohiro Aota
found_extent_clustered(struct find_free_extent_ctl * ffe_ctl,struct btrfs_key * ins)38840ab9724bSNaohiro Aota static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
38850ab9724bSNaohiro Aota struct btrfs_key *ins)
38860ab9724bSNaohiro Aota {
38870ab9724bSNaohiro Aota struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
38880ab9724bSNaohiro Aota
38890ab9724bSNaohiro Aota if (!ffe_ctl->use_cluster && last_ptr) {
38900ab9724bSNaohiro Aota spin_lock(&last_ptr->lock);
38910ab9724bSNaohiro Aota last_ptr->window_start = ins->objectid;
38920ab9724bSNaohiro Aota spin_unlock(&last_ptr->lock);
38930ab9724bSNaohiro Aota }
38940ab9724bSNaohiro Aota }
38950ab9724bSNaohiro Aota
found_extent(struct find_free_extent_ctl * ffe_ctl,struct btrfs_key * ins)38960ab9724bSNaohiro Aota static void found_extent(struct find_free_extent_ctl *ffe_ctl,
38970ab9724bSNaohiro Aota struct btrfs_key *ins)
38980ab9724bSNaohiro Aota {
38990ab9724bSNaohiro Aota switch (ffe_ctl->policy) {
39000ab9724bSNaohiro Aota case BTRFS_EXTENT_ALLOC_CLUSTERED:
39010ab9724bSNaohiro Aota found_extent_clustered(ffe_ctl, ins);
39020ab9724bSNaohiro Aota break;
39032eda5708SNaohiro Aota case BTRFS_EXTENT_ALLOC_ZONED:
39042eda5708SNaohiro Aota /* Nothing to do */
39052eda5708SNaohiro Aota break;
39060ab9724bSNaohiro Aota default:
39070ab9724bSNaohiro Aota BUG();
39080ab9724bSNaohiro Aota }
39090ab9724bSNaohiro Aota }
39100ab9724bSNaohiro Aota
can_allocate_chunk_zoned(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl)3911393f646eSNaohiro Aota static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
3912393f646eSNaohiro Aota struct find_free_extent_ctl *ffe_ctl)
3913393f646eSNaohiro Aota {
39145a7d107eSNaohiro Aota /* Block group's activeness is not a requirement for METADATA block groups. */
39155a7d107eSNaohiro Aota if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA))
39165a7d107eSNaohiro Aota return 0;
39175a7d107eSNaohiro Aota
3918393f646eSNaohiro Aota /* If we can activate new zone, just allocate a chunk and use it */
3919393f646eSNaohiro Aota if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
3920393f646eSNaohiro Aota return 0;
3921393f646eSNaohiro Aota
3922393f646eSNaohiro Aota /*
3923393f646eSNaohiro Aota * We already reached the max active zones. Try to finish one block
3924393f646eSNaohiro Aota * group to make a room for a new block group. This is only possible
3925393f646eSNaohiro Aota * for a data block group because btrfs_zone_finish() may need to wait
3926393f646eSNaohiro Aota * for a running transaction which can cause a deadlock for metadata
3927393f646eSNaohiro Aota * allocation.
3928393f646eSNaohiro Aota */
3929393f646eSNaohiro Aota if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
3930393f646eSNaohiro Aota int ret = btrfs_zone_finish_one_bg(fs_info);
3931393f646eSNaohiro Aota
3932393f646eSNaohiro Aota if (ret == 1)
3933393f646eSNaohiro Aota return 0;
3934393f646eSNaohiro Aota else if (ret < 0)
3935393f646eSNaohiro Aota return ret;
3936393f646eSNaohiro Aota }
3937393f646eSNaohiro Aota
3938393f646eSNaohiro Aota /*
3939393f646eSNaohiro Aota * If we have enough free space left in an already active block group
3940393f646eSNaohiro Aota * and we can't activate any other zone now, do not allow allocating a
3941393f646eSNaohiro Aota * new chunk and let find_free_extent() retry with a smaller size.
3942393f646eSNaohiro Aota */
3943393f646eSNaohiro Aota if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
3944393f646eSNaohiro Aota return -ENOSPC;
3945393f646eSNaohiro Aota
3946393f646eSNaohiro Aota /*
3947898793d9SNaohiro Aota * Even min_alloc_size is not left in any block groups. Since we cannot
3948898793d9SNaohiro Aota * activate a new block group, allocating it may not help. Let's tell a
3949898793d9SNaohiro Aota * caller to try again and hope it progress something by writing some
3950898793d9SNaohiro Aota * parts of the region. That is only possible for data block groups,
3951898793d9SNaohiro Aota * where a part of the region can be written.
3952898793d9SNaohiro Aota */
3953898793d9SNaohiro Aota if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
3954898793d9SNaohiro Aota return -EAGAIN;
3955898793d9SNaohiro Aota
3956898793d9SNaohiro Aota /*
3957393f646eSNaohiro Aota * We cannot activate a new block group and no enough space left in any
3958393f646eSNaohiro Aota * block groups. So, allocating a new block group may not help. But,
3959393f646eSNaohiro Aota * there is nothing to do anyway, so let's go with it.
3960393f646eSNaohiro Aota */
3961393f646eSNaohiro Aota return 0;
3962393f646eSNaohiro Aota }
3963393f646eSNaohiro Aota
can_allocate_chunk(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl)3964bb9950d3SNaohiro Aota static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
396550475cd5SNaohiro Aota struct find_free_extent_ctl *ffe_ctl)
396650475cd5SNaohiro Aota {
396750475cd5SNaohiro Aota switch (ffe_ctl->policy) {
396850475cd5SNaohiro Aota case BTRFS_EXTENT_ALLOC_CLUSTERED:
3969bb9950d3SNaohiro Aota return 0;
397050475cd5SNaohiro Aota case BTRFS_EXTENT_ALLOC_ZONED:
3971393f646eSNaohiro Aota return can_allocate_chunk_zoned(fs_info, ffe_ctl);
397250475cd5SNaohiro Aota default:
397350475cd5SNaohiro Aota BUG();
397450475cd5SNaohiro Aota }
397550475cd5SNaohiro Aota }
397650475cd5SNaohiro Aota
3977e1a41848SQu Wenruo /*
3978e72d79d6SQu Wenruo * Return >0 means caller needs to re-search for free extent
3979e72d79d6SQu Wenruo * Return 0 means we have the needed free extent.
3980e72d79d6SQu Wenruo * Return <0 means we failed to locate any free extent.
3981e72d79d6SQu Wenruo */
find_free_extent_update_loop(struct btrfs_fs_info * fs_info,struct btrfs_key * ins,struct find_free_extent_ctl * ffe_ctl,bool full_search)3982e72d79d6SQu Wenruo static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
3983e72d79d6SQu Wenruo struct btrfs_key *ins,
3984e72d79d6SQu Wenruo struct find_free_extent_ctl *ffe_ctl,
398515b7ee65SNaohiro Aota bool full_search)
3986e72d79d6SQu Wenruo {
39878e1d0290SJosef Bacik struct btrfs_root *root = fs_info->chunk_root;
3988e72d79d6SQu Wenruo int ret;
3989e72d79d6SQu Wenruo
3990e72d79d6SQu Wenruo if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
3991e72d79d6SQu Wenruo ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
3992e72d79d6SQu Wenruo ffe_ctl->orig_have_caching_bg = true;
3993e72d79d6SQu Wenruo
3994e72d79d6SQu Wenruo if (ins->objectid) {
39950ab9724bSNaohiro Aota found_extent(ffe_ctl, ins);
3996e72d79d6SQu Wenruo return 0;
3997e72d79d6SQu Wenruo }
3998e72d79d6SQu Wenruo
3999a85f05e5SNaohiro Aota if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
4000a85f05e5SNaohiro Aota return 1;
4001a85f05e5SNaohiro Aota
4002a85f05e5SNaohiro Aota ffe_ctl->index++;
4003a85f05e5SNaohiro Aota if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
4004a85f05e5SNaohiro Aota return 1;
4005a85f05e5SNaohiro Aota
4006b9d97cffSJosef Bacik /* See the comments for btrfs_loop_type for an explanation of the phases. */
4007e72d79d6SQu Wenruo if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
4008e72d79d6SQu Wenruo ffe_ctl->index = 0;
4009e72d79d6SQu Wenruo /*
401052bb7a21SBoris Burkov * We want to skip the LOOP_CACHING_WAIT step if we don't have
401152bb7a21SBoris Burkov * any uncached bgs and we've already done a full search
401252bb7a21SBoris Burkov * through.
4013e72d79d6SQu Wenruo */
401452bb7a21SBoris Burkov if (ffe_ctl->loop == LOOP_CACHING_NOWAIT &&
401552bb7a21SBoris Burkov (!ffe_ctl->orig_have_caching_bg && full_search))
4016e72d79d6SQu Wenruo ffe_ctl->loop++;
401752bb7a21SBoris Burkov ffe_ctl->loop++;
4018e72d79d6SQu Wenruo
4019e72d79d6SQu Wenruo if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
4020e72d79d6SQu Wenruo struct btrfs_trans_handle *trans;
4021e72d79d6SQu Wenruo int exist = 0;
4022e72d79d6SQu Wenruo
402350475cd5SNaohiro Aota /* Check if allocation policy allows to create a new chunk */
4024bb9950d3SNaohiro Aota ret = can_allocate_chunk(fs_info, ffe_ctl);
4025bb9950d3SNaohiro Aota if (ret)
4026bb9950d3SNaohiro Aota return ret;
402750475cd5SNaohiro Aota
4028e72d79d6SQu Wenruo trans = current->journal_info;
4029e72d79d6SQu Wenruo if (trans)
4030e72d79d6SQu Wenruo exist = 1;
4031e72d79d6SQu Wenruo else
4032e72d79d6SQu Wenruo trans = btrfs_join_transaction(root);
4033e72d79d6SQu Wenruo
4034e72d79d6SQu Wenruo if (IS_ERR(trans)) {
4035e72d79d6SQu Wenruo ret = PTR_ERR(trans);
4036e72d79d6SQu Wenruo return ret;
4037e72d79d6SQu Wenruo }
4038e72d79d6SQu Wenruo
4039fc471cb0SJosef Bacik ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
4040760e69c4SNaohiro Aota CHUNK_ALLOC_FORCE_FOR_EXTENT);
4041e72d79d6SQu Wenruo
4042e72d79d6SQu Wenruo /* Do not bail out on ENOSPC since we can do more. */
404352bb7a21SBoris Burkov if (ret == -ENOSPC) {
404452bb7a21SBoris Burkov ret = 0;
404552bb7a21SBoris Burkov ffe_ctl->loop++;
404652bb7a21SBoris Burkov }
4047c70e2139SNaohiro Aota else if (ret < 0)
4048e72d79d6SQu Wenruo btrfs_abort_transaction(trans, ret);
4049e72d79d6SQu Wenruo else
4050e72d79d6SQu Wenruo ret = 0;
4051e72d79d6SQu Wenruo if (!exist)
4052e72d79d6SQu Wenruo btrfs_end_transaction(trans);
4053e72d79d6SQu Wenruo if (ret)
4054e72d79d6SQu Wenruo return ret;
4055e72d79d6SQu Wenruo }
4056e72d79d6SQu Wenruo
4057e72d79d6SQu Wenruo if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
405845d8e033SNaohiro Aota if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
405945d8e033SNaohiro Aota return -ENOSPC;
406045d8e033SNaohiro Aota
4061e72d79d6SQu Wenruo /*
4062e72d79d6SQu Wenruo * Don't loop again if we already have no empty_size and
4063e72d79d6SQu Wenruo * no empty_cluster.
4064e72d79d6SQu Wenruo */
4065e72d79d6SQu Wenruo if (ffe_ctl->empty_size == 0 &&
4066e72d79d6SQu Wenruo ffe_ctl->empty_cluster == 0)
4067e72d79d6SQu Wenruo return -ENOSPC;
4068e72d79d6SQu Wenruo ffe_ctl->empty_size = 0;
4069e72d79d6SQu Wenruo ffe_ctl->empty_cluster = 0;
4070e72d79d6SQu Wenruo }
4071e72d79d6SQu Wenruo return 1;
4072e72d79d6SQu Wenruo }
4073e72d79d6SQu Wenruo return -ENOSPC;
4074e72d79d6SQu Wenruo }
4075e72d79d6SQu Wenruo
find_free_extent_check_size_class(struct find_free_extent_ctl * ffe_ctl,struct btrfs_block_group * bg)407652bb7a21SBoris Burkov static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
407752bb7a21SBoris Burkov struct btrfs_block_group *bg)
407852bb7a21SBoris Burkov {
407952bb7a21SBoris Burkov if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
408052bb7a21SBoris Burkov return true;
4081cb0922f2SBoris Burkov if (!btrfs_block_group_should_use_size_class(bg))
408252bb7a21SBoris Burkov return true;
408352bb7a21SBoris Burkov if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
408452bb7a21SBoris Burkov return true;
408552bb7a21SBoris Burkov if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
408652bb7a21SBoris Burkov bg->size_class == BTRFS_BG_SZ_NONE)
408752bb7a21SBoris Burkov return true;
408852bb7a21SBoris Burkov return ffe_ctl->size_class == bg->size_class;
408952bb7a21SBoris Burkov }
409052bb7a21SBoris Burkov
prepare_allocation_clustered(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl,struct btrfs_space_info * space_info,struct btrfs_key * ins)40917e895409SNaohiro Aota static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
40927e895409SNaohiro Aota struct find_free_extent_ctl *ffe_ctl,
40937e895409SNaohiro Aota struct btrfs_space_info *space_info,
40947e895409SNaohiro Aota struct btrfs_key *ins)
40957e895409SNaohiro Aota {
40967e895409SNaohiro Aota /*
40977e895409SNaohiro Aota * If our free space is heavily fragmented we may not be able to make
40987e895409SNaohiro Aota * big contiguous allocations, so instead of doing the expensive search
40997e895409SNaohiro Aota * for free space, simply return ENOSPC with our max_extent_size so we
41007e895409SNaohiro Aota * can go ahead and search for a more manageable chunk.
41017e895409SNaohiro Aota *
41027e895409SNaohiro Aota * If our max_extent_size is large enough for our allocation simply
41037e895409SNaohiro Aota * disable clustering since we will likely not be able to find enough
41047e895409SNaohiro Aota * space to create a cluster and induce latency trying.
41057e895409SNaohiro Aota */
41067e895409SNaohiro Aota if (space_info->max_extent_size) {
41077e895409SNaohiro Aota spin_lock(&space_info->lock);
41087e895409SNaohiro Aota if (space_info->max_extent_size &&
41097e895409SNaohiro Aota ffe_ctl->num_bytes > space_info->max_extent_size) {
41107e895409SNaohiro Aota ins->offset = space_info->max_extent_size;
41117e895409SNaohiro Aota spin_unlock(&space_info->lock);
41127e895409SNaohiro Aota return -ENOSPC;
41137e895409SNaohiro Aota } else if (space_info->max_extent_size) {
41147e895409SNaohiro Aota ffe_ctl->use_cluster = false;
41157e895409SNaohiro Aota }
41167e895409SNaohiro Aota spin_unlock(&space_info->lock);
41177e895409SNaohiro Aota }
41187e895409SNaohiro Aota
41197e895409SNaohiro Aota ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
41207e895409SNaohiro Aota &ffe_ctl->empty_cluster);
41217e895409SNaohiro Aota if (ffe_ctl->last_ptr) {
41227e895409SNaohiro Aota struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
41237e895409SNaohiro Aota
41247e895409SNaohiro Aota spin_lock(&last_ptr->lock);
41257e895409SNaohiro Aota if (last_ptr->block_group)
41267e895409SNaohiro Aota ffe_ctl->hint_byte = last_ptr->window_start;
41277e895409SNaohiro Aota if (last_ptr->fragmented) {
41287e895409SNaohiro Aota /*
41297e895409SNaohiro Aota * We still set window_start so we can keep track of the
41307e895409SNaohiro Aota * last place we found an allocation to try and save
41317e895409SNaohiro Aota * some time.
41327e895409SNaohiro Aota */
41337e895409SNaohiro Aota ffe_ctl->hint_byte = last_ptr->window_start;
41347e895409SNaohiro Aota ffe_ctl->use_cluster = false;
41357e895409SNaohiro Aota }
41367e895409SNaohiro Aota spin_unlock(&last_ptr->lock);
41377e895409SNaohiro Aota }
41387e895409SNaohiro Aota
41397e895409SNaohiro Aota return 0;
41407e895409SNaohiro Aota }
41417e895409SNaohiro Aota
prepare_allocation_zoned(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl)4142b1e30e2fSNaohiro Aota static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
4143b1e30e2fSNaohiro Aota struct find_free_extent_ctl *ffe_ctl)
4144b1e30e2fSNaohiro Aota {
4145b1e30e2fSNaohiro Aota if (ffe_ctl->for_treelog) {
4146b1e30e2fSNaohiro Aota spin_lock(&fs_info->treelog_bg_lock);
4147b1e30e2fSNaohiro Aota if (fs_info->treelog_bg)
4148b1e30e2fSNaohiro Aota ffe_ctl->hint_byte = fs_info->treelog_bg;
4149b1e30e2fSNaohiro Aota spin_unlock(&fs_info->treelog_bg_lock);
4150b1e30e2fSNaohiro Aota } else if (ffe_ctl->for_data_reloc) {
4151b1e30e2fSNaohiro Aota spin_lock(&fs_info->relocation_bg_lock);
4152b1e30e2fSNaohiro Aota if (fs_info->data_reloc_bg)
4153b1e30e2fSNaohiro Aota ffe_ctl->hint_byte = fs_info->data_reloc_bg;
4154b1e30e2fSNaohiro Aota spin_unlock(&fs_info->relocation_bg_lock);
4155*46ac4e1fSNaohiro Aota } else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
4156*46ac4e1fSNaohiro Aota struct btrfs_block_group *block_group;
4157*46ac4e1fSNaohiro Aota
4158*46ac4e1fSNaohiro Aota spin_lock(&fs_info->zone_active_bgs_lock);
4159*46ac4e1fSNaohiro Aota list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
4160*46ac4e1fSNaohiro Aota /*
4161*46ac4e1fSNaohiro Aota * No lock is OK here because avail is monotinically
4162*46ac4e1fSNaohiro Aota * decreasing, and this is just a hint.
4163*46ac4e1fSNaohiro Aota */
4164*46ac4e1fSNaohiro Aota u64 avail = block_group->zone_capacity - block_group->alloc_offset;
4165*46ac4e1fSNaohiro Aota
4166*46ac4e1fSNaohiro Aota if (block_group_bits(block_group, ffe_ctl->flags) &&
4167*46ac4e1fSNaohiro Aota avail >= ffe_ctl->num_bytes) {
4168*46ac4e1fSNaohiro Aota ffe_ctl->hint_byte = block_group->start;
4169*46ac4e1fSNaohiro Aota break;
4170*46ac4e1fSNaohiro Aota }
4171*46ac4e1fSNaohiro Aota }
4172*46ac4e1fSNaohiro Aota spin_unlock(&fs_info->zone_active_bgs_lock);
4173b1e30e2fSNaohiro Aota }
4174b1e30e2fSNaohiro Aota
4175b1e30e2fSNaohiro Aota return 0;
4176b1e30e2fSNaohiro Aota }
4177b1e30e2fSNaohiro Aota
prepare_allocation(struct btrfs_fs_info * fs_info,struct find_free_extent_ctl * ffe_ctl,struct btrfs_space_info * space_info,struct btrfs_key * ins)41787e895409SNaohiro Aota static int prepare_allocation(struct btrfs_fs_info *fs_info,
41797e895409SNaohiro Aota struct find_free_extent_ctl *ffe_ctl,
41807e895409SNaohiro Aota struct btrfs_space_info *space_info,
41817e895409SNaohiro Aota struct btrfs_key *ins)
41827e895409SNaohiro Aota {
41837e895409SNaohiro Aota switch (ffe_ctl->policy) {
41847e895409SNaohiro Aota case BTRFS_EXTENT_ALLOC_CLUSTERED:
41857e895409SNaohiro Aota return prepare_allocation_clustered(fs_info, ffe_ctl,
41867e895409SNaohiro Aota space_info, ins);
41872eda5708SNaohiro Aota case BTRFS_EXTENT_ALLOC_ZONED:
4188b1e30e2fSNaohiro Aota return prepare_allocation_zoned(fs_info, ffe_ctl);
41897e895409SNaohiro Aota default:
41907e895409SNaohiro Aota BUG();
41917e895409SNaohiro Aota }
41927e895409SNaohiro Aota }
41937e895409SNaohiro Aota
4194e72d79d6SQu Wenruo /*
4195fec577fbSChris Mason * walks the btree of allocated extents and find a hole of a given size.
4196fec577fbSChris Mason * The key ins is changed to record the hole:
4197a4820398SMiao Xie * ins->objectid == start position
419862e2749eSChris Mason * ins->flags = BTRFS_EXTENT_ITEM_KEY
4199a4820398SMiao Xie * ins->offset == the size of the hole.
4200fec577fbSChris Mason * Any available blocks before search_start are skipped.
4201a4820398SMiao Xie *
4202a4820398SMiao Xie * If there is no suitable free space, we will record the max size of
4203a4820398SMiao Xie * the free space extent currently.
4204e72d79d6SQu Wenruo *
4205e72d79d6SQu Wenruo * The overall logic and call chain:
4206e72d79d6SQu Wenruo *
4207e72d79d6SQu Wenruo * find_free_extent()
4208e72d79d6SQu Wenruo * |- Iterate through all block groups
4209e72d79d6SQu Wenruo * | |- Get a valid block group
4210e72d79d6SQu Wenruo * | |- Try to do clustered allocation in that block group
4211e72d79d6SQu Wenruo * | |- Try to do unclustered allocation in that block group
4212e72d79d6SQu Wenruo * | |- Check if the result is valid
4213e72d79d6SQu Wenruo * | | |- If valid, then exit
4214e72d79d6SQu Wenruo * | |- Jump to next block group
4215e72d79d6SQu Wenruo * |
4216e72d79d6SQu Wenruo * |- Push harder to find free extents
4217e72d79d6SQu Wenruo * |- If not found, re-iterate all block groups
4218fec577fbSChris Mason */
find_free_extent(struct btrfs_root * root,struct btrfs_key * ins,struct find_free_extent_ctl * ffe_ctl)4219437490feSQu Wenruo static noinline int find_free_extent(struct btrfs_root *root,
4220a12b0dc0SNaohiro Aota struct btrfs_key *ins,
4221a12b0dc0SNaohiro Aota struct find_free_extent_ctl *ffe_ctl)
4222fec577fbSChris Mason {
4223437490feSQu Wenruo struct btrfs_fs_info *fs_info = root->fs_info;
422480eb234aSJosef Bacik int ret = 0;
4225db8fe64fSJosef Bacik int cache_block_group_error = 0;
422632da5386SDavid Sterba struct btrfs_block_group *block_group = NULL;
422780eb234aSJosef Bacik struct btrfs_space_info *space_info;
4228a5e681d9SJosef Bacik bool full_search = false;
4229fec577fbSChris Mason
4230a12b0dc0SNaohiro Aota WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
4231b4bd745dSQu Wenruo
4232a12b0dc0SNaohiro Aota ffe_ctl->search_start = 0;
4233c10859beSNaohiro Aota /* For clustered allocation */
4234a12b0dc0SNaohiro Aota ffe_ctl->empty_cluster = 0;
4235a12b0dc0SNaohiro Aota ffe_ctl->last_ptr = NULL;
4236a12b0dc0SNaohiro Aota ffe_ctl->use_cluster = true;
4237a12b0dc0SNaohiro Aota ffe_ctl->have_caching_bg = false;
4238a12b0dc0SNaohiro Aota ffe_ctl->orig_have_caching_bg = false;
4239a12b0dc0SNaohiro Aota ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
4240a12b0dc0SNaohiro Aota ffe_ctl->loop = 0;
4241cd361199SJosef Bacik ffe_ctl->retry_uncached = false;
4242a12b0dc0SNaohiro Aota ffe_ctl->cached = 0;
4243a12b0dc0SNaohiro Aota ffe_ctl->max_extent_size = 0;
4244a12b0dc0SNaohiro Aota ffe_ctl->total_free_space = 0;
4245a12b0dc0SNaohiro Aota ffe_ctl->found_offset = 0;
4246a12b0dc0SNaohiro Aota ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
424752bb7a21SBoris Burkov ffe_ctl->size_class = btrfs_calc_block_group_size_class(ffe_ctl->num_bytes);
4248c10859beSNaohiro Aota
42492eda5708SNaohiro Aota if (btrfs_is_zoned(fs_info))
4250a12b0dc0SNaohiro Aota ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
42512eda5708SNaohiro Aota
4252962a298fSDavid Sterba ins->type = BTRFS_EXTENT_ITEM_KEY;
425380eb234aSJosef Bacik ins->objectid = 0;
425480eb234aSJosef Bacik ins->offset = 0;
4255b1a4d965SChris Mason
4256cfc2de0fSBoris Burkov trace_find_free_extent(root, ffe_ctl);
42573f7de037SJosef Bacik
4258a12b0dc0SNaohiro Aota space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
42591b1d1f66SJosef Bacik if (!space_info) {
4260a12b0dc0SNaohiro Aota btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
42611b1d1f66SJosef Bacik return -ENOSPC;
42621b1d1f66SJosef Bacik }
42632552d17eSJosef Bacik
4264a12b0dc0SNaohiro Aota ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
42657e895409SNaohiro Aota if (ret < 0)
42667e895409SNaohiro Aota return ret;
4267fa9c0d79SChris Mason
4268a12b0dc0SNaohiro Aota ffe_ctl->search_start = max(ffe_ctl->search_start,
42690eb997bfSFilipe Manana first_logical_byte(fs_info));
4270a12b0dc0SNaohiro Aota ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
4271a12b0dc0SNaohiro Aota if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
4272b4bd745dSQu Wenruo block_group = btrfs_lookup_block_group(fs_info,
4273a12b0dc0SNaohiro Aota ffe_ctl->search_start);
4274817d52f8SJosef Bacik /*
4275817d52f8SJosef Bacik * we don't want to use the block group if it doesn't match our
4276817d52f8SJosef Bacik * allocation bits, or if its not cached.
4277ccf0e725SJosef Bacik *
4278ccf0e725SJosef Bacik * However if we are re-searching with an ideal block group
4279ccf0e725SJosef Bacik * picked out then we don't care that the block group is cached.
4280817d52f8SJosef Bacik */
4281a12b0dc0SNaohiro Aota if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
4282285ff5afSJosef Bacik block_group->cached != BTRFS_CACHE_NO) {
42832552d17eSJosef Bacik down_read(&space_info->groups_sem);
428444fb5511SChris Mason if (list_empty(&block_group->list) ||
428544fb5511SChris Mason block_group->ro) {
428644fb5511SChris Mason /*
428744fb5511SChris Mason * someone is removing this block group,
428844fb5511SChris Mason * we can't jump into the have_block_group
428944fb5511SChris Mason * target because our list pointers are not
429044fb5511SChris Mason * valid
429144fb5511SChris Mason */
429244fb5511SChris Mason btrfs_put_block_group(block_group);
429344fb5511SChris Mason up_read(&space_info->groups_sem);
4294ccf0e725SJosef Bacik } else {
4295a12b0dc0SNaohiro Aota ffe_ctl->index = btrfs_bg_flags_to_raid_index(
42963e72ee88SQu Wenruo block_group->flags);
4297a12b0dc0SNaohiro Aota btrfs_lock_block_group(block_group,
4298a12b0dc0SNaohiro Aota ffe_ctl->delalloc);
4299854c2f36SBoris Burkov ffe_ctl->hinted = true;
43002552d17eSJosef Bacik goto have_block_group;
4301ccf0e725SJosef Bacik }
43022552d17eSJosef Bacik } else if (block_group) {
4303fa9c0d79SChris Mason btrfs_put_block_group(block_group);
430442e70e7aSChris Mason }
43052552d17eSJosef Bacik }
43062552d17eSJosef Bacik search:
4307854c2f36SBoris Burkov trace_find_free_extent_search_loop(root, ffe_ctl);
4308a12b0dc0SNaohiro Aota ffe_ctl->have_caching_bg = false;
4309a12b0dc0SNaohiro Aota if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
4310a12b0dc0SNaohiro Aota ffe_ctl->index == 0)
4311a5e681d9SJosef Bacik full_search = true;
431280eb234aSJosef Bacik down_read(&space_info->groups_sem);
4313b4bd745dSQu Wenruo list_for_each_entry(block_group,
4314a12b0dc0SNaohiro Aota &space_info->block_groups[ffe_ctl->index], list) {
4315c668690dSNaohiro Aota struct btrfs_block_group *bg_ret;
4316c668690dSNaohiro Aota
4317854c2f36SBoris Burkov ffe_ctl->hinted = false;
431814443937SJeff Mahoney /* If the block group is read-only, we can skip it entirely. */
431940ab3be1SNaohiro Aota if (unlikely(block_group->ro)) {
4320a12b0dc0SNaohiro Aota if (ffe_ctl->for_treelog)
432140ab3be1SNaohiro Aota btrfs_clear_treelog_bg(block_group);
4322c2707a25SJohannes Thumshirn if (ffe_ctl->for_data_reloc)
4323c2707a25SJohannes Thumshirn btrfs_clear_data_reloc_bg(block_group);
432414443937SJeff Mahoney continue;
432540ab3be1SNaohiro Aota }
432614443937SJeff Mahoney
4327a12b0dc0SNaohiro Aota btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
4328a12b0dc0SNaohiro Aota ffe_ctl->search_start = block_group->start;
432942e70e7aSChris Mason
433083a50de9SChris Mason /*
433183a50de9SChris Mason * this can happen if we end up cycling through all the
433283a50de9SChris Mason * raid types, but we want to make sure we only allocate
433383a50de9SChris Mason * for the proper type.
433483a50de9SChris Mason */
4335a12b0dc0SNaohiro Aota if (!block_group_bits(block_group, ffe_ctl->flags)) {
433683a50de9SChris Mason u64 extra = BTRFS_BLOCK_GROUP_DUP |
4337c7369b3fSDavid Sterba BTRFS_BLOCK_GROUP_RAID1_MASK |
4338a07e8a46SDavid Sterba BTRFS_BLOCK_GROUP_RAID56_MASK |
433983a50de9SChris Mason BTRFS_BLOCK_GROUP_RAID10;
434083a50de9SChris Mason
434183a50de9SChris Mason /*
434283a50de9SChris Mason * if they asked for extra copies and this block group
434383a50de9SChris Mason * doesn't provide them, bail. This does allow us to
434483a50de9SChris Mason * fill raid0 from raid1.
434583a50de9SChris Mason */
4346a12b0dc0SNaohiro Aota if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
434783a50de9SChris Mason goto loop;
43482a28468eSQu Wenruo
43492a28468eSQu Wenruo /*
43502a28468eSQu Wenruo * This block group has different flags than we want.
43512a28468eSQu Wenruo * It's possible that we have MIXED_GROUP flag but no
43522a28468eSQu Wenruo * block group is mixed. Just skip such block group.
43532a28468eSQu Wenruo */
4354a12b0dc0SNaohiro Aota btrfs_release_block_group(block_group, ffe_ctl->delalloc);
43552a28468eSQu Wenruo continue;
435683a50de9SChris Mason }
435783a50de9SChris Mason
43582552d17eSJosef Bacik have_block_group:
4359854c2f36SBoris Burkov trace_find_free_extent_have_block_group(root, ffe_ctl, block_group);
4360a12b0dc0SNaohiro Aota ffe_ctl->cached = btrfs_block_group_done(block_group);
4361a12b0dc0SNaohiro Aota if (unlikely(!ffe_ctl->cached)) {
4362a12b0dc0SNaohiro Aota ffe_ctl->have_caching_bg = true;
4363ced8ecf0SOmar Sandoval ret = btrfs_cache_block_group(block_group, false);
4364db8fe64fSJosef Bacik
4365db8fe64fSJosef Bacik /*
4366db8fe64fSJosef Bacik * If we get ENOMEM here or something else we want to
4367db8fe64fSJosef Bacik * try other block groups, because it may not be fatal.
4368db8fe64fSJosef Bacik * However if we can't find anything else we need to
4369db8fe64fSJosef Bacik * save our return here so that we return the actual
4370db8fe64fSJosef Bacik * error that caused problems, not ENOSPC.
4371db8fe64fSJosef Bacik */
4372db8fe64fSJosef Bacik if (ret < 0) {
4373db8fe64fSJosef Bacik if (!cache_block_group_error)
4374db8fe64fSJosef Bacik cache_block_group_error = ret;
4375db8fe64fSJosef Bacik ret = 0;
4376db8fe64fSJosef Bacik goto loop;
4377db8fe64fSJosef Bacik }
43781d4284bdSChris Mason ret = 0;
4379ea6a478eSJosef Bacik }
4380ccf0e725SJosef Bacik
438192fb94b6SJosef Bacik if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
438292fb94b6SJosef Bacik if (!cache_block_group_error)
438392fb94b6SJosef Bacik cache_block_group_error = -EIO;
438436cce922SJosef Bacik goto loop;
438592fb94b6SJosef Bacik }
438680eb234aSJosef Bacik
438752bb7a21SBoris Burkov if (!find_free_extent_check_size_class(ffe_ctl, block_group))
438852bb7a21SBoris Burkov goto loop;
438952bb7a21SBoris Burkov
4390c668690dSNaohiro Aota bg_ret = NULL;
4391a12b0dc0SNaohiro Aota ret = do_allocation(block_group, ffe_ctl, &bg_ret);
4392cd361199SJosef Bacik if (ret > 0)
4393fa9c0d79SChris Mason goto loop;
4394cd361199SJosef Bacik
4395cd361199SJosef Bacik if (bg_ret && bg_ret != block_group) {
4396cd361199SJosef Bacik btrfs_release_block_group(block_group, ffe_ctl->delalloc);
4397cd361199SJosef Bacik block_group = bg_ret;
4398fa9c0d79SChris Mason }
4399fa9c0d79SChris Mason
4400c668690dSNaohiro Aota /* Checks */
4401a12b0dc0SNaohiro Aota ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
4402b4bd745dSQu Wenruo fs_info->stripesize);
440380eb234aSJosef Bacik
440480eb234aSJosef Bacik /* move on to the next group */
4405a12b0dc0SNaohiro Aota if (ffe_ctl->search_start + ffe_ctl->num_bytes >
4406b3470b5dSDavid Sterba block_group->start + block_group->length) {
44072eda5708SNaohiro Aota btrfs_add_free_space_unused(block_group,
4408a12b0dc0SNaohiro Aota ffe_ctl->found_offset,
4409a12b0dc0SNaohiro Aota ffe_ctl->num_bytes);
44102552d17eSJosef Bacik goto loop;
44116226cb0aSJosef Bacik }
441280eb234aSJosef Bacik
4413a12b0dc0SNaohiro Aota if (ffe_ctl->found_offset < ffe_ctl->search_start)
44142eda5708SNaohiro Aota btrfs_add_free_space_unused(block_group,
4415a12b0dc0SNaohiro Aota ffe_ctl->found_offset,
4416a12b0dc0SNaohiro Aota ffe_ctl->search_start - ffe_ctl->found_offset);
44176226cb0aSJosef Bacik
4418a12b0dc0SNaohiro Aota ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
4419a12b0dc0SNaohiro Aota ffe_ctl->num_bytes,
442052bb7a21SBoris Burkov ffe_ctl->delalloc,
442152bb7a21SBoris Burkov ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
4422f0486c68SYan, Zheng if (ret == -EAGAIN) {
44232eda5708SNaohiro Aota btrfs_add_free_space_unused(block_group,
4424a12b0dc0SNaohiro Aota ffe_ctl->found_offset,
4425a12b0dc0SNaohiro Aota ffe_ctl->num_bytes);
4426f0486c68SYan, Zheng goto loop;
4427f0486c68SYan, Zheng }
44289cfa3e34SFilipe Manana btrfs_inc_block_group_reservations(block_group);
442911833d66SYan Zheng
443080eb234aSJosef Bacik /* we are all good, lets return */
4431a12b0dc0SNaohiro Aota ins->objectid = ffe_ctl->search_start;
4432a12b0dc0SNaohiro Aota ins->offset = ffe_ctl->num_bytes;
4433f0486c68SYan, Zheng
4434cfc2de0fSBoris Burkov trace_btrfs_reserve_extent(block_group, ffe_ctl);
4435a12b0dc0SNaohiro Aota btrfs_release_block_group(block_group, ffe_ctl->delalloc);
443680eb234aSJosef Bacik break;
44372552d17eSJosef Bacik loop:
4438cd361199SJosef Bacik if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
4439cd361199SJosef Bacik !ffe_ctl->retry_uncached) {
4440cd361199SJosef Bacik ffe_ctl->retry_uncached = true;
4441cd361199SJosef Bacik btrfs_wait_block_group_cache_progress(block_group,
4442cd361199SJosef Bacik ffe_ctl->num_bytes +
4443cd361199SJosef Bacik ffe_ctl->empty_cluster +
4444cd361199SJosef Bacik ffe_ctl->empty_size);
4445cd361199SJosef Bacik goto have_block_group;
4446cd361199SJosef Bacik }
4447a12b0dc0SNaohiro Aota release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
444814443937SJeff Mahoney cond_resched();
44492552d17eSJosef Bacik }
44502552d17eSJosef Bacik up_read(&space_info->groups_sem);
4451f5a31e16SChris Mason
4452a12b0dc0SNaohiro Aota ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
4453e72d79d6SQu Wenruo if (ret > 0)
445460d2adbbSMiao Xie goto search;
445560d2adbbSMiao Xie
4456db8fe64fSJosef Bacik if (ret == -ENOSPC && !cache_block_group_error) {
4457b4bd745dSQu Wenruo /*
4458b4bd745dSQu Wenruo * Use ffe_ctl->total_free_space as fallback if we can't find
4459b4bd745dSQu Wenruo * any contiguous hole.
4460b4bd745dSQu Wenruo */
4461a12b0dc0SNaohiro Aota if (!ffe_ctl->max_extent_size)
4462a12b0dc0SNaohiro Aota ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
44634f4db217SJosef Bacik spin_lock(&space_info->lock);
4464a12b0dc0SNaohiro Aota space_info->max_extent_size = ffe_ctl->max_extent_size;
44654f4db217SJosef Bacik spin_unlock(&space_info->lock);
4466a12b0dc0SNaohiro Aota ins->offset = ffe_ctl->max_extent_size;
4467db8fe64fSJosef Bacik } else if (ret == -ENOSPC) {
4468db8fe64fSJosef Bacik ret = cache_block_group_error;
44694f4db217SJosef Bacik }
44700f70abe2SChris Mason return ret;
4471fec577fbSChris Mason }
4472ec44a35cSChris Mason
44736f47c706SNikolay Borisov /*
44746f47c706SNikolay Borisov * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
44756f47c706SNikolay Borisov * hole that is at least as big as @num_bytes.
44766f47c706SNikolay Borisov *
44776f47c706SNikolay Borisov * @root - The root that will contain this extent
44786f47c706SNikolay Borisov *
44796f47c706SNikolay Borisov * @ram_bytes - The amount of space in ram that @num_bytes take. This
44806f47c706SNikolay Borisov * is used for accounting purposes. This value differs
44816f47c706SNikolay Borisov * from @num_bytes only in the case of compressed extents.
44826f47c706SNikolay Borisov *
44836f47c706SNikolay Borisov * @num_bytes - Number of bytes to allocate on-disk.
44846f47c706SNikolay Borisov *
44856f47c706SNikolay Borisov * @min_alloc_size - Indicates the minimum amount of space that the
44866f47c706SNikolay Borisov * allocator should try to satisfy. In some cases
44876f47c706SNikolay Borisov * @num_bytes may be larger than what is required and if
44886f47c706SNikolay Borisov * the filesystem is fragmented then allocation fails.
44896f47c706SNikolay Borisov * However, the presence of @min_alloc_size gives a
44906f47c706SNikolay Borisov * chance to try and satisfy the smaller allocation.
44916f47c706SNikolay Borisov *
44926f47c706SNikolay Borisov * @empty_size - A hint that you plan on doing more COW. This is the
44936f47c706SNikolay Borisov * size in bytes the allocator should try to find free
44946f47c706SNikolay Borisov * next to the block it returns. This is just a hint and
44956f47c706SNikolay Borisov * may be ignored by the allocator.
44966f47c706SNikolay Borisov *
44976f47c706SNikolay Borisov * @hint_byte - Hint to the allocator to start searching above the byte
44986f47c706SNikolay Borisov * address passed. It might be ignored.
44996f47c706SNikolay Borisov *
45006f47c706SNikolay Borisov * @ins - This key is modified to record the found hole. It will
45016f47c706SNikolay Borisov * have the following values:
45026f47c706SNikolay Borisov * ins->objectid == start position
45036f47c706SNikolay Borisov * ins->flags = BTRFS_EXTENT_ITEM_KEY
45046f47c706SNikolay Borisov * ins->offset == the size of the hole.
45056f47c706SNikolay Borisov *
45066f47c706SNikolay Borisov * @is_data - Boolean flag indicating whether an extent is
45076f47c706SNikolay Borisov * allocated for data (true) or metadata (false)
45086f47c706SNikolay Borisov *
45096f47c706SNikolay Borisov * @delalloc - Boolean flag indicating whether this allocation is for
45106f47c706SNikolay Borisov * delalloc or not. If 'true' data_rwsem of block groups
45116f47c706SNikolay Borisov * is going to be acquired.
45126f47c706SNikolay Borisov *
45136f47c706SNikolay Borisov *
45146f47c706SNikolay Borisov * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
45156f47c706SNikolay Borisov * case -ENOSPC is returned then @ins->offset will contain the size of the
45166f47c706SNikolay Borisov * largest available hole the allocator managed to find.
45176f47c706SNikolay Borisov */
btrfs_reserve_extent(struct btrfs_root * root,u64 ram_bytes,u64 num_bytes,u64 min_alloc_size,u64 empty_size,u64 hint_byte,struct btrfs_key * ins,int is_data,int delalloc)451818513091SWang Xiaoguang int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
451998d20f67SChris Mason u64 num_bytes, u64 min_alloc_size,
45207bb86316SChris Mason u64 empty_size, u64 hint_byte,
4521e570fd27SMiao Xie struct btrfs_key *ins, int is_data, int delalloc)
4522fec577fbSChris Mason {
4523ab8d0fc4SJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
4524a12b0dc0SNaohiro Aota struct find_free_extent_ctl ffe_ctl = {};
452536af4e07SJosef Bacik bool final_tried = num_bytes == min_alloc_size;
4526b6919a58SDavid Sterba u64 flags;
4527fec577fbSChris Mason int ret;
452840ab3be1SNaohiro Aota bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
4529c2707a25SJohannes Thumshirn bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
4530925baeddSChris Mason
45311b86826dSJeff Mahoney flags = get_alloc_profile_by_root(root, is_data);
453298d20f67SChris Mason again:
45330b246afaSJeff Mahoney WARN_ON(num_bytes < fs_info->sectorsize);
4534a12b0dc0SNaohiro Aota
4535a12b0dc0SNaohiro Aota ffe_ctl.ram_bytes = ram_bytes;
4536a12b0dc0SNaohiro Aota ffe_ctl.num_bytes = num_bytes;
4537a85f05e5SNaohiro Aota ffe_ctl.min_alloc_size = min_alloc_size;
4538a12b0dc0SNaohiro Aota ffe_ctl.empty_size = empty_size;
4539a12b0dc0SNaohiro Aota ffe_ctl.flags = flags;
4540a12b0dc0SNaohiro Aota ffe_ctl.delalloc = delalloc;
4541a12b0dc0SNaohiro Aota ffe_ctl.hint_byte = hint_byte;
4542a12b0dc0SNaohiro Aota ffe_ctl.for_treelog = for_treelog;
4543c2707a25SJohannes Thumshirn ffe_ctl.for_data_reloc = for_data_reloc;
4544a12b0dc0SNaohiro Aota
4545a12b0dc0SNaohiro Aota ret = find_free_extent(root, ins, &ffe_ctl);
45469cfa3e34SFilipe Manana if (!ret && !is_data) {
4547ab8d0fc4SJeff Mahoney btrfs_dec_block_group_reservations(fs_info, ins->objectid);
45489cfa3e34SFilipe Manana } else if (ret == -ENOSPC) {
4549a4820398SMiao Xie if (!final_tried && ins->offset) {
4550a4820398SMiao Xie num_bytes = min(num_bytes >> 1, ins->offset);
4551da17066cSJeff Mahoney num_bytes = round_down(num_bytes,
45520b246afaSJeff Mahoney fs_info->sectorsize);
455398d20f67SChris Mason num_bytes = max(num_bytes, min_alloc_size);
455418513091SWang Xiaoguang ram_bytes = num_bytes;
45559e622d6bSMiao Xie if (num_bytes == min_alloc_size)
45569e622d6bSMiao Xie final_tried = true;
455798d20f67SChris Mason goto again;
4558ab8d0fc4SJeff Mahoney } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
45590f9dd46cSJosef Bacik struct btrfs_space_info *sinfo;
45600f9dd46cSJosef Bacik
4561280c2908SJosef Bacik sinfo = btrfs_find_space_info(fs_info, flags);
45620b246afaSJeff Mahoney btrfs_err(fs_info,
4563c2707a25SJohannes Thumshirn "allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
4564c2707a25SJohannes Thumshirn flags, num_bytes, for_treelog, for_data_reloc);
456553804280SJeff Mahoney if (sinfo)
45665da6afebSJosef Bacik btrfs_dump_space_info(fs_info, sinfo,
45675da6afebSJosef Bacik num_bytes, 1);
4568925baeddSChris Mason }
45699e622d6bSMiao Xie }
45700f9dd46cSJosef Bacik
45710f9dd46cSJosef Bacik return ret;
4572e6dcd2dcSChris Mason }
4573e6dcd2dcSChris Mason
btrfs_free_reserved_extent(struct btrfs_fs_info * fs_info,u64 start,u64 len,int delalloc)45742ff7e61eSJeff Mahoney int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
4575e570fd27SMiao Xie u64 start, u64 len, int delalloc)
4576e688b725SChris Mason {
45777ef54d54SNikolay Borisov struct btrfs_block_group *cache;
45787ef54d54SNikolay Borisov
45797ef54d54SNikolay Borisov cache = btrfs_lookup_block_group(fs_info, start);
45807ef54d54SNikolay Borisov if (!cache) {
4581a0fbf736SNikolay Borisov btrfs_err(fs_info, "Unable to find block group for %llu",
4582a0fbf736SNikolay Borisov start);
45837ef54d54SNikolay Borisov return -ENOSPC;
45847ef54d54SNikolay Borisov }
45857ef54d54SNikolay Borisov
45867ef54d54SNikolay Borisov btrfs_add_free_space(cache, start, len);
45877ef54d54SNikolay Borisov btrfs_free_reserved_bytes(cache, len, delalloc);
45887ef54d54SNikolay Borisov trace_btrfs_reserved_extent_free(fs_info, start, len);
45897ef54d54SNikolay Borisov
45907ef54d54SNikolay Borisov btrfs_put_block_group(cache);
45917ef54d54SNikolay Borisov return 0;
4592e688b725SChris Mason }
4593e688b725SChris Mason
btrfs_pin_reserved_extent(struct btrfs_trans_handle * trans,u64 start,u64 len)45947bfc1007SNikolay Borisov int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
45957bfc1007SNikolay Borisov u64 len)
4596e688b725SChris Mason {
4597a0fbf736SNikolay Borisov struct btrfs_block_group *cache;
4598a0fbf736SNikolay Borisov int ret = 0;
4599a0fbf736SNikolay Borisov
46007bfc1007SNikolay Borisov cache = btrfs_lookup_block_group(trans->fs_info, start);
4601a0fbf736SNikolay Borisov if (!cache) {
46027bfc1007SNikolay Borisov btrfs_err(trans->fs_info, "unable to find block group for %llu",
46037bfc1007SNikolay Borisov start);
4604a0fbf736SNikolay Borisov return -ENOSPC;
4605a0fbf736SNikolay Borisov }
4606a0fbf736SNikolay Borisov
46076690d071SNikolay Borisov ret = pin_down_extent(trans, cache, start, len, 1);
4608a0fbf736SNikolay Borisov btrfs_put_block_group(cache);
4609a0fbf736SNikolay Borisov return ret;
4610e688b725SChris Mason }
4611e688b725SChris Mason
alloc_reserved_extent(struct btrfs_trans_handle * trans,u64 bytenr,u64 num_bytes)461234666705SJosef Bacik static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
461334666705SJosef Bacik u64 num_bytes)
461434666705SJosef Bacik {
461534666705SJosef Bacik struct btrfs_fs_info *fs_info = trans->fs_info;
461634666705SJosef Bacik int ret;
461734666705SJosef Bacik
461834666705SJosef Bacik ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
461934666705SJosef Bacik if (ret)
462034666705SJosef Bacik return ret;
462134666705SJosef Bacik
462234666705SJosef Bacik ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
462334666705SJosef Bacik if (ret) {
462434666705SJosef Bacik ASSERT(!ret);
462534666705SJosef Bacik btrfs_err(fs_info, "update block group failed for %llu %llu",
462634666705SJosef Bacik bytenr, num_bytes);
462734666705SJosef Bacik return ret;
462834666705SJosef Bacik }
462934666705SJosef Bacik
463034666705SJosef Bacik trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
463134666705SJosef Bacik return 0;
463234666705SJosef Bacik }
463334666705SJosef Bacik
alloc_reserved_file_extent(struct btrfs_trans_handle * trans,u64 parent,u64 root_objectid,u64 flags,u64 owner,u64 offset,struct btrfs_key * ins,int ref_mod)46345d4f98a2SYan Zheng static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
46355d4f98a2SYan Zheng u64 parent, u64 root_objectid,
46365d4f98a2SYan Zheng u64 flags, u64 owner, u64 offset,
46375d4f98a2SYan Zheng struct btrfs_key *ins, int ref_mod)
4638e6dcd2dcSChris Mason {
4639ef89b824SNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
464029cbcf40SJosef Bacik struct btrfs_root *extent_root;
4641e6dcd2dcSChris Mason int ret;
4642e6dcd2dcSChris Mason struct btrfs_extent_item *extent_item;
46435d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref;
4644e6dcd2dcSChris Mason struct btrfs_path *path;
46455d4f98a2SYan Zheng struct extent_buffer *leaf;
46465d4f98a2SYan Zheng int type;
46475d4f98a2SYan Zheng u32 size;
4648f2654de4SChris Mason
46495d4f98a2SYan Zheng if (parent > 0)
46505d4f98a2SYan Zheng type = BTRFS_SHARED_DATA_REF_KEY;
46515d4f98a2SYan Zheng else
46525d4f98a2SYan Zheng type = BTRFS_EXTENT_DATA_REF_KEY;
465331840ae1SZheng Yan
46545d4f98a2SYan Zheng size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
46557bb86316SChris Mason
46567bb86316SChris Mason path = btrfs_alloc_path();
4657db5b493aSTsutomu Itoh if (!path)
4658db5b493aSTsutomu Itoh return -ENOMEM;
465947e4bb98SChris Mason
466029cbcf40SJosef Bacik extent_root = btrfs_extent_root(fs_info, ins->objectid);
466129cbcf40SJosef Bacik ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size);
466279787eaaSJeff Mahoney if (ret) {
466379787eaaSJeff Mahoney btrfs_free_path(path);
466479787eaaSJeff Mahoney return ret;
466579787eaaSJeff Mahoney }
46660f9dd46cSJosef Bacik
46675d4f98a2SYan Zheng leaf = path->nodes[0];
46685d4f98a2SYan Zheng extent_item = btrfs_item_ptr(leaf, path->slots[0],
466947e4bb98SChris Mason struct btrfs_extent_item);
46705d4f98a2SYan Zheng btrfs_set_extent_refs(leaf, extent_item, ref_mod);
46715d4f98a2SYan Zheng btrfs_set_extent_generation(leaf, extent_item, trans->transid);
46725d4f98a2SYan Zheng btrfs_set_extent_flags(leaf, extent_item,
46735d4f98a2SYan Zheng flags | BTRFS_EXTENT_FLAG_DATA);
467447e4bb98SChris Mason
46755d4f98a2SYan Zheng iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
46765d4f98a2SYan Zheng btrfs_set_extent_inline_ref_type(leaf, iref, type);
46775d4f98a2SYan Zheng if (parent > 0) {
46785d4f98a2SYan Zheng struct btrfs_shared_data_ref *ref;
46795d4f98a2SYan Zheng ref = (struct btrfs_shared_data_ref *)(iref + 1);
46805d4f98a2SYan Zheng btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
46815d4f98a2SYan Zheng btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
46825d4f98a2SYan Zheng } else {
46835d4f98a2SYan Zheng struct btrfs_extent_data_ref *ref;
46845d4f98a2SYan Zheng ref = (struct btrfs_extent_data_ref *)(&iref->offset);
46855d4f98a2SYan Zheng btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
46865d4f98a2SYan Zheng btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
46875d4f98a2SYan Zheng btrfs_set_extent_data_ref_offset(leaf, ref, offset);
46885d4f98a2SYan Zheng btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
46895d4f98a2SYan Zheng }
469047e4bb98SChris Mason
4691d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, path->nodes[0]);
46927bb86316SChris Mason btrfs_free_path(path);
4693f510cfecSChris Mason
469434666705SJosef Bacik return alloc_reserved_extent(trans, ins->objectid, ins->offset);
4695e6dcd2dcSChris Mason }
4696e6dcd2dcSChris Mason
alloc_reserved_tree_block(struct btrfs_trans_handle * trans,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op)46975d4f98a2SYan Zheng static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
46984e6bd4e0SNikolay Borisov struct btrfs_delayed_ref_node *node,
469921ebfbe7SNikolay Borisov struct btrfs_delayed_extent_op *extent_op)
47005d4f98a2SYan Zheng {
47019dcdbe01SNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
470229cbcf40SJosef Bacik struct btrfs_root *extent_root;
47035d4f98a2SYan Zheng int ret;
47045d4f98a2SYan Zheng struct btrfs_extent_item *extent_item;
47054e6bd4e0SNikolay Borisov struct btrfs_key extent_key;
47065d4f98a2SYan Zheng struct btrfs_tree_block_info *block_info;
47075d4f98a2SYan Zheng struct btrfs_extent_inline_ref *iref;
47085d4f98a2SYan Zheng struct btrfs_path *path;
47095d4f98a2SYan Zheng struct extent_buffer *leaf;
47104e6bd4e0SNikolay Borisov struct btrfs_delayed_tree_ref *ref;
47113173a18fSJosef Bacik u32 size = sizeof(*extent_item) + sizeof(*iref);
471221ebfbe7SNikolay Borisov u64 flags = extent_op->flags_to_set;
47130b246afaSJeff Mahoney bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
47143173a18fSJosef Bacik
47154e6bd4e0SNikolay Borisov ref = btrfs_delayed_node_to_tree_ref(node);
47164e6bd4e0SNikolay Borisov
47174e6bd4e0SNikolay Borisov extent_key.objectid = node->bytenr;
47184e6bd4e0SNikolay Borisov if (skinny_metadata) {
47194e6bd4e0SNikolay Borisov extent_key.offset = ref->level;
47204e6bd4e0SNikolay Borisov extent_key.type = BTRFS_METADATA_ITEM_KEY;
47214e6bd4e0SNikolay Borisov } else {
47224e6bd4e0SNikolay Borisov extent_key.offset = node->num_bytes;
47234e6bd4e0SNikolay Borisov extent_key.type = BTRFS_EXTENT_ITEM_KEY;
47243173a18fSJosef Bacik size += sizeof(*block_info);
47254e6bd4e0SNikolay Borisov }
47265d4f98a2SYan Zheng
47275d4f98a2SYan Zheng path = btrfs_alloc_path();
472880ee54bfSJosef Bacik if (!path)
4729d8926bb3SMark Fasheh return -ENOMEM;
47305d4f98a2SYan Zheng
473129cbcf40SJosef Bacik extent_root = btrfs_extent_root(fs_info, extent_key.objectid);
473229cbcf40SJosef Bacik ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key,
473329cbcf40SJosef Bacik size);
473479787eaaSJeff Mahoney if (ret) {
4735dd825259SChris Mason btrfs_free_path(path);
473679787eaaSJeff Mahoney return ret;
473779787eaaSJeff Mahoney }
47385d4f98a2SYan Zheng
47395d4f98a2SYan Zheng leaf = path->nodes[0];
47405d4f98a2SYan Zheng extent_item = btrfs_item_ptr(leaf, path->slots[0],
47415d4f98a2SYan Zheng struct btrfs_extent_item);
47425d4f98a2SYan Zheng btrfs_set_extent_refs(leaf, extent_item, 1);
47435d4f98a2SYan Zheng btrfs_set_extent_generation(leaf, extent_item, trans->transid);
47445d4f98a2SYan Zheng btrfs_set_extent_flags(leaf, extent_item,
47455d4f98a2SYan Zheng flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
47465d4f98a2SYan Zheng
47473173a18fSJosef Bacik if (skinny_metadata) {
47483173a18fSJosef Bacik iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
47493173a18fSJosef Bacik } else {
47503173a18fSJosef Bacik block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
475121ebfbe7SNikolay Borisov btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
47524e6bd4e0SNikolay Borisov btrfs_set_tree_block_level(leaf, block_info, ref->level);
47535d4f98a2SYan Zheng iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
47543173a18fSJosef Bacik }
47553173a18fSJosef Bacik
4756d4b20733SNikolay Borisov if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
47575d4f98a2SYan Zheng btrfs_set_extent_inline_ref_type(leaf, iref,
47585d4f98a2SYan Zheng BTRFS_SHARED_BLOCK_REF_KEY);
4759d4b20733SNikolay Borisov btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
47605d4f98a2SYan Zheng } else {
47615d4f98a2SYan Zheng btrfs_set_extent_inline_ref_type(leaf, iref,
47625d4f98a2SYan Zheng BTRFS_TREE_BLOCK_REF_KEY);
47634e6bd4e0SNikolay Borisov btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
47645d4f98a2SYan Zheng }
47655d4f98a2SYan Zheng
4766d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
47675d4f98a2SYan Zheng btrfs_free_path(path);
47685d4f98a2SYan Zheng
476934666705SJosef Bacik return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
47705d4f98a2SYan Zheng }
47715d4f98a2SYan Zheng
btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 owner,u64 offset,u64 ram_bytes,struct btrfs_key * ins)47725d4f98a2SYan Zheng int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
477384f7d8e6SJosef Bacik struct btrfs_root *root, u64 owner,
47745846a3c2SQu Wenruo u64 offset, u64 ram_bytes,
47755846a3c2SQu Wenruo struct btrfs_key *ins)
4776e6dcd2dcSChris Mason {
477776675593SQu Wenruo struct btrfs_ref generic_ref = { 0 };
47781c2308f8SChris Mason
477984f7d8e6SJosef Bacik BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
478056bec294SChris Mason
478176675593SQu Wenruo btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
478276675593SQu Wenruo ins->objectid, ins->offset, 0);
4783f42c5da6SNikolay Borisov btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
4784f42c5da6SNikolay Borisov offset, 0, false);
47858a5040f7SQu Wenruo btrfs_ref_tree_mod(root->fs_info, &generic_ref);
47862187374fSJosef Bacik
47872187374fSJosef Bacik return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
4788e6dcd2dcSChris Mason }
4789e02119d5SChris Mason
4790e02119d5SChris Mason /*
4791e02119d5SChris Mason * this is used by the tree logging recovery code. It records that
4792e02119d5SChris Mason * an extent has been allocated and makes sure to clear the free
4793e02119d5SChris Mason * space cache bits as well
4794e02119d5SChris Mason */
btrfs_alloc_logged_file_extent(struct btrfs_trans_handle * trans,u64 root_objectid,u64 owner,u64 offset,struct btrfs_key * ins)47955d4f98a2SYan Zheng int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
47965d4f98a2SYan Zheng u64 root_objectid, u64 owner, u64 offset,
47975d4f98a2SYan Zheng struct btrfs_key *ins)
4798e02119d5SChris Mason {
479961da2abfSNikolay Borisov struct btrfs_fs_info *fs_info = trans->fs_info;
4800e02119d5SChris Mason int ret;
480132da5386SDavid Sterba struct btrfs_block_group *block_group;
4802ed7a6948SWang Xiaoguang struct btrfs_space_info *space_info;
48038c2a1a30SJosef Bacik
48048c2a1a30SJosef Bacik /*
48058c2a1a30SJosef Bacik * Mixed block groups will exclude before processing the log so we only
480601327610SNicholas D Steeves * need to do the exclude dance if this fs isn't mixed.
48078c2a1a30SJosef Bacik */
48080b246afaSJeff Mahoney if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
48092ff7e61eSJeff Mahoney ret = __exclude_logged_extent(fs_info, ins->objectid,
48102ff7e61eSJeff Mahoney ins->offset);
48118c2a1a30SJosef Bacik if (ret)
48128c2a1a30SJosef Bacik return ret;
48138c2a1a30SJosef Bacik }
4814e02119d5SChris Mason
48150b246afaSJeff Mahoney block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
48168c2a1a30SJosef Bacik if (!block_group)
48178c2a1a30SJosef Bacik return -EINVAL;
481811833d66SYan Zheng
4819ed7a6948SWang Xiaoguang space_info = block_group->space_info;
4820ed7a6948SWang Xiaoguang spin_lock(&space_info->lock);
4821ed7a6948SWang Xiaoguang spin_lock(&block_group->lock);
4822ed7a6948SWang Xiaoguang space_info->bytes_reserved += ins->offset;
4823ed7a6948SWang Xiaoguang block_group->reserved += ins->offset;
4824ed7a6948SWang Xiaoguang spin_unlock(&block_group->lock);
4825ed7a6948SWang Xiaoguang spin_unlock(&space_info->lock);
4826ed7a6948SWang Xiaoguang
4827ef89b824SNikolay Borisov ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
4828ef89b824SNikolay Borisov offset, ins, 1);
4829bd727173SJosef Bacik if (ret)
4830ab9b2c7bSJosef Bacik btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
4831b50c6e25SJosef Bacik btrfs_put_block_group(block_group);
4832e02119d5SChris Mason return ret;
4833e02119d5SChris Mason }
4834e02119d5SChris Mason
483548a3b636SEric Sandeen static struct extent_buffer *
btrfs_init_new_buffer(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,int level,u64 owner,enum btrfs_lock_nesting nest)483648a3b636SEric Sandeen btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
48379631e4ccSJosef Bacik u64 bytenr, int level, u64 owner,
48389631e4ccSJosef Bacik enum btrfs_lock_nesting nest)
483965b51a00SChris Mason {
48400b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
484165b51a00SChris Mason struct extent_buffer *buf;
4842b40130b2SJosef Bacik u64 lockdep_owner = owner;
484365b51a00SChris Mason
48443fbaf258SJosef Bacik buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
4845c871b0f2SLiu Bo if (IS_ERR(buf))
4846c871b0f2SLiu Bo return buf;
4847c871b0f2SLiu Bo
4848b72c3abaSQu Wenruo /*
4849b72c3abaSQu Wenruo * Extra safety check in case the extent tree is corrupted and extent
4850b72c3abaSQu Wenruo * allocator chooses to use a tree block which is already used and
4851b72c3abaSQu Wenruo * locked.
4852b72c3abaSQu Wenruo */
4853b72c3abaSQu Wenruo if (buf->lock_owner == current->pid) {
4854b72c3abaSQu Wenruo btrfs_err_rl(fs_info,
4855b72c3abaSQu Wenruo "tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
4856b72c3abaSQu Wenruo buf->start, btrfs_header_owner(buf), current->pid);
4857b72c3abaSQu Wenruo free_extent_buffer(buf);
4858b72c3abaSQu Wenruo return ERR_PTR(-EUCLEAN);
4859b72c3abaSQu Wenruo }
4860b72c3abaSQu Wenruo
4861e114c545SJosef Bacik /*
4862b40130b2SJosef Bacik * The reloc trees are just snapshots, so we need them to appear to be
4863b40130b2SJosef Bacik * just like any other fs tree WRT lockdep.
4864b40130b2SJosef Bacik *
4865b40130b2SJosef Bacik * The exception however is in replace_path() in relocation, where we
4866b40130b2SJosef Bacik * hold the lock on the original fs root and then search for the reloc
4867b40130b2SJosef Bacik * root. At that point we need to make sure any reloc root buffers are
4868b40130b2SJosef Bacik * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
4869b40130b2SJosef Bacik * lockdep happy.
4870b40130b2SJosef Bacik */
4871b40130b2SJosef Bacik if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
4872b40130b2SJosef Bacik !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
4873b40130b2SJosef Bacik lockdep_owner = BTRFS_FS_TREE_OBJECTID;
4874b40130b2SJosef Bacik
4875618d1d7dSFilipe Manana /* btrfs_clear_buffer_dirty() accesses generation field. */
4876cbddcc4fSTetsuo Handa btrfs_set_header_generation(buf, trans->transid);
4877cbddcc4fSTetsuo Handa
4878b40130b2SJosef Bacik /*
4879e114c545SJosef Bacik * This needs to stay, because we could allocate a freed block from an
4880e114c545SJosef Bacik * old tree into a new tree, so we need to make sure this new block is
4881e114c545SJosef Bacik * set to the appropriate level and owner.
4882e114c545SJosef Bacik */
4883b40130b2SJosef Bacik btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
4884b40130b2SJosef Bacik
48859631e4ccSJosef Bacik __btrfs_tree_lock(buf, nest);
4886190a8339SJosef Bacik btrfs_clear_buffer_dirty(trans, buf);
48873083ee2eSJosef Bacik clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
4888d3575156SNaohiro Aota clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
4889b4ce94deSChris Mason
48904db8c528SDavid Sterba set_extent_buffer_uptodate(buf);
4891b4ce94deSChris Mason
4892bc877d28SNikolay Borisov memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
4893bc877d28SNikolay Borisov btrfs_set_header_level(buf, level);
4894bc877d28SNikolay Borisov btrfs_set_header_bytenr(buf, buf->start);
4895bc877d28SNikolay Borisov btrfs_set_header_generation(buf, trans->transid);
4896bc877d28SNikolay Borisov btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
4897bc877d28SNikolay Borisov btrfs_set_header_owner(buf, owner);
4898de37aa51SNikolay Borisov write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
4899bc877d28SNikolay Borisov write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
4900d0c803c4SChris Mason if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
4901656f30dbSFilipe Manana buf->log_index = root->log_transid % 2;
49028cef4e16SYan, Zheng /*
49038cef4e16SYan, Zheng * we allow two log transactions at a time, use different
490452042d8eSAndrea Gelmini * EXTENT bit to differentiate dirty pages.
49058cef4e16SYan, Zheng */
4906656f30dbSFilipe Manana if (buf->log_index == 0)
4907fe1a598cSDavid Sterba set_extent_bit(&root->dirty_log_pages, buf->start,
4908fe1a598cSDavid Sterba buf->start + buf->len - 1,
49091d126800SDavid Sterba EXTENT_DIRTY, NULL);
49108cef4e16SYan, Zheng else
4911eea8686eSDavid Sterba set_extent_bit(&root->dirty_log_pages, buf->start,
4912eea8686eSDavid Sterba buf->start + buf->len - 1,
49131d126800SDavid Sterba EXTENT_NEW, NULL);
4914d0c803c4SChris Mason } else {
4915656f30dbSFilipe Manana buf->log_index = -1;
4916fe1a598cSDavid Sterba set_extent_bit(&trans->transaction->dirty_pages, buf->start,
49171d126800SDavid Sterba buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
4918d0c803c4SChris Mason }
4919b4ce94deSChris Mason /* this returns a buffer locked for blocking */
492065b51a00SChris Mason return buf;
492165b51a00SChris Mason }
492265b51a00SChris Mason
4923fec577fbSChris Mason /*
4924f0486c68SYan, Zheng * finds a free extent and does all the dirty work required for allocation
492567b7859eSOmar Sandoval * returns the tree buffer or an ERR_PTR on error.
4926fec577fbSChris Mason */
btrfs_alloc_tree_block(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 parent,u64 root_objectid,const struct btrfs_disk_key * key,int level,u64 hint,u64 empty_size,enum btrfs_lock_nesting nest)49274d75f8a9SDavid Sterba struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
49284d75f8a9SDavid Sterba struct btrfs_root *root,
49295d4f98a2SYan Zheng u64 parent, u64 root_objectid,
4930310712b2SOmar Sandoval const struct btrfs_disk_key *key,
4931310712b2SOmar Sandoval int level, u64 hint,
49329631e4ccSJosef Bacik u64 empty_size,
49339631e4ccSJosef Bacik enum btrfs_lock_nesting nest)
4934fec577fbSChris Mason {
49350b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
4936e2fa7227SChris Mason struct btrfs_key ins;
4937f0486c68SYan, Zheng struct btrfs_block_rsv *block_rsv;
49385f39d397SChris Mason struct extent_buffer *buf;
493967b7859eSOmar Sandoval struct btrfs_delayed_extent_op *extent_op;
4940ed4f255bSQu Wenruo struct btrfs_ref generic_ref = { 0 };
4941f0486c68SYan, Zheng u64 flags = 0;
4942f0486c68SYan, Zheng int ret;
49430b246afaSJeff Mahoney u32 blocksize = fs_info->nodesize;
49440b246afaSJeff Mahoney bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
4945f0486c68SYan, Zheng
494605653ef3SDavid Sterba #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
49470b246afaSJeff Mahoney if (btrfs_is_testing(fs_info)) {
4948faa2dbf0SJosef Bacik buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
49499631e4ccSJosef Bacik level, root_objectid, nest);
4950faa2dbf0SJosef Bacik if (!IS_ERR(buf))
4951faa2dbf0SJosef Bacik root->alloc_bytenr += blocksize;
4952faa2dbf0SJosef Bacik return buf;
4953faa2dbf0SJosef Bacik }
495405653ef3SDavid Sterba #endif
4955fccb84c9SDavid Sterba
495667f9c220SJosef Bacik block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
4957f0486c68SYan, Zheng if (IS_ERR(block_rsv))
4958f0486c68SYan, Zheng return ERR_CAST(block_rsv);
4959f0486c68SYan, Zheng
496018513091SWang Xiaoguang ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
4961e570fd27SMiao Xie empty_size, hint, &ins, 0, 0);
496267b7859eSOmar Sandoval if (ret)
496367b7859eSOmar Sandoval goto out_unuse;
496455c69072SChris Mason
4965bc877d28SNikolay Borisov buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
49669631e4ccSJosef Bacik root_objectid, nest);
496767b7859eSOmar Sandoval if (IS_ERR(buf)) {
496867b7859eSOmar Sandoval ret = PTR_ERR(buf);
496967b7859eSOmar Sandoval goto out_free_reserved;
497067b7859eSOmar Sandoval }
4971f0486c68SYan, Zheng
4972f0486c68SYan, Zheng if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4973f0486c68SYan, Zheng if (parent == 0)
4974f0486c68SYan, Zheng parent = ins.objectid;
4975f0486c68SYan, Zheng flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
4976f0486c68SYan, Zheng } else
4977f0486c68SYan, Zheng BUG_ON(parent > 0);
4978f0486c68SYan, Zheng
4979f0486c68SYan, Zheng if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
498078a6184aSMiao Xie extent_op = btrfs_alloc_delayed_extent_op();
498167b7859eSOmar Sandoval if (!extent_op) {
498267b7859eSOmar Sandoval ret = -ENOMEM;
498367b7859eSOmar Sandoval goto out_free_buf;
498467b7859eSOmar Sandoval }
4985f0486c68SYan, Zheng if (key)
4986f0486c68SYan, Zheng memcpy(&extent_op->key, key, sizeof(extent_op->key));
4987f0486c68SYan, Zheng else
4988f0486c68SYan, Zheng memset(&extent_op->key, 0, sizeof(extent_op->key));
4989f0486c68SYan, Zheng extent_op->flags_to_set = flags;
499035b3ad50SDavid Sterba extent_op->update_key = skinny_metadata ? false : true;
499135b3ad50SDavid Sterba extent_op->update_flags = true;
4992b1c79e09SJosef Bacik extent_op->level = level;
4993f0486c68SYan, Zheng
4994ed4f255bSQu Wenruo btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
4995ed4f255bSQu Wenruo ins.objectid, ins.offset, parent);
4996f42c5da6SNikolay Borisov btrfs_init_tree_ref(&generic_ref, level, root_objectid,
4997f42c5da6SNikolay Borisov root->root_key.objectid, false);
49988a5040f7SQu Wenruo btrfs_ref_tree_mod(fs_info, &generic_ref);
49992187374fSJosef Bacik ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
500067b7859eSOmar Sandoval if (ret)
500167b7859eSOmar Sandoval goto out_free_delayed;
5002f0486c68SYan, Zheng }
5003fec577fbSChris Mason return buf;
500467b7859eSOmar Sandoval
500567b7859eSOmar Sandoval out_free_delayed:
500667b7859eSOmar Sandoval btrfs_free_delayed_extent_op(extent_op);
500767b7859eSOmar Sandoval out_free_buf:
500819ea40ddSQu Wenruo btrfs_tree_unlock(buf);
500967b7859eSOmar Sandoval free_extent_buffer(buf);
501067b7859eSOmar Sandoval out_free_reserved:
50112ff7e61eSJeff Mahoney btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
501267b7859eSOmar Sandoval out_unuse:
501367f9c220SJosef Bacik btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
501467b7859eSOmar Sandoval return ERR_PTR(ret);
5015fec577fbSChris Mason }
5016a28ec197SChris Mason
50172c47e605SYan Zheng struct walk_control {
50182c47e605SYan Zheng u64 refs[BTRFS_MAX_LEVEL];
50192c47e605SYan Zheng u64 flags[BTRFS_MAX_LEVEL];
50202c47e605SYan Zheng struct btrfs_key update_progress;
5021aea6f028SJosef Bacik struct btrfs_key drop_progress;
5022aea6f028SJosef Bacik int drop_level;
50232c47e605SYan Zheng int stage;
50242c47e605SYan Zheng int level;
50252c47e605SYan Zheng int shared_level;
50262c47e605SYan Zheng int update_ref;
50272c47e605SYan Zheng int keep_locks;
50281c4850e2SYan, Zheng int reada_slot;
50291c4850e2SYan, Zheng int reada_count;
503078c52d9eSJosef Bacik int restarted;
50312c47e605SYan Zheng };
50322c47e605SYan Zheng
50332c47e605SYan Zheng #define DROP_REFERENCE 1
50342c47e605SYan Zheng #define UPDATE_BACKREF 2
50352c47e605SYan Zheng
reada_walk_down(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct walk_control * wc,struct btrfs_path * path)50361c4850e2SYan, Zheng static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
50371c4850e2SYan, Zheng struct btrfs_root *root,
50381c4850e2SYan, Zheng struct walk_control *wc,
50391c4850e2SYan, Zheng struct btrfs_path *path)
50401c4850e2SYan, Zheng {
50410b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
50421c4850e2SYan, Zheng u64 bytenr;
50431c4850e2SYan, Zheng u64 generation;
50441c4850e2SYan, Zheng u64 refs;
504594fcca9fSYan, Zheng u64 flags;
50461c4850e2SYan, Zheng u32 nritems;
50471c4850e2SYan, Zheng struct btrfs_key key;
50481c4850e2SYan, Zheng struct extent_buffer *eb;
50491c4850e2SYan, Zheng int ret;
50501c4850e2SYan, Zheng int slot;
50511c4850e2SYan, Zheng int nread = 0;
50521c4850e2SYan, Zheng
50531c4850e2SYan, Zheng if (path->slots[wc->level] < wc->reada_slot) {
50541c4850e2SYan, Zheng wc->reada_count = wc->reada_count * 2 / 3;
50551c4850e2SYan, Zheng wc->reada_count = max(wc->reada_count, 2);
50561c4850e2SYan, Zheng } else {
50571c4850e2SYan, Zheng wc->reada_count = wc->reada_count * 3 / 2;
50581c4850e2SYan, Zheng wc->reada_count = min_t(int, wc->reada_count,
50590b246afaSJeff Mahoney BTRFS_NODEPTRS_PER_BLOCK(fs_info));
50601c4850e2SYan, Zheng }
50611c4850e2SYan, Zheng
50621c4850e2SYan, Zheng eb = path->nodes[wc->level];
50631c4850e2SYan, Zheng nritems = btrfs_header_nritems(eb);
50641c4850e2SYan, Zheng
50651c4850e2SYan, Zheng for (slot = path->slots[wc->level]; slot < nritems; slot++) {
50661c4850e2SYan, Zheng if (nread >= wc->reada_count)
50671c4850e2SYan, Zheng break;
50681c4850e2SYan, Zheng
50691c4850e2SYan, Zheng cond_resched();
50701c4850e2SYan, Zheng bytenr = btrfs_node_blockptr(eb, slot);
50711c4850e2SYan, Zheng generation = btrfs_node_ptr_generation(eb, slot);
50721c4850e2SYan, Zheng
50731c4850e2SYan, Zheng if (slot == path->slots[wc->level])
50741c4850e2SYan, Zheng goto reada;
50751c4850e2SYan, Zheng
50761c4850e2SYan, Zheng if (wc->stage == UPDATE_BACKREF &&
50771c4850e2SYan, Zheng generation <= root->root_key.offset)
50781c4850e2SYan, Zheng continue;
50791c4850e2SYan, Zheng
508094fcca9fSYan, Zheng /* We don't lock the tree block, it's OK to be racy here */
50812ff7e61eSJeff Mahoney ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
50823173a18fSJosef Bacik wc->level - 1, 1, &refs,
50833173a18fSJosef Bacik &flags);
508479787eaaSJeff Mahoney /* We don't care about errors in readahead. */
508579787eaaSJeff Mahoney if (ret < 0)
508679787eaaSJeff Mahoney continue;
50871c4850e2SYan, Zheng BUG_ON(refs == 0);
508894fcca9fSYan, Zheng
508994fcca9fSYan, Zheng if (wc->stage == DROP_REFERENCE) {
50901c4850e2SYan, Zheng if (refs == 1)
50911c4850e2SYan, Zheng goto reada;
50921c4850e2SYan, Zheng
509394fcca9fSYan, Zheng if (wc->level == 1 &&
509494fcca9fSYan, Zheng (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
509594fcca9fSYan, Zheng continue;
50961c4850e2SYan, Zheng if (!wc->update_ref ||
50971c4850e2SYan, Zheng generation <= root->root_key.offset)
50981c4850e2SYan, Zheng continue;
50991c4850e2SYan, Zheng btrfs_node_key_to_cpu(eb, &key, slot);
51001c4850e2SYan, Zheng ret = btrfs_comp_cpu_keys(&key,
51011c4850e2SYan, Zheng &wc->update_progress);
51021c4850e2SYan, Zheng if (ret < 0)
51031c4850e2SYan, Zheng continue;
510494fcca9fSYan, Zheng } else {
510594fcca9fSYan, Zheng if (wc->level == 1 &&
510694fcca9fSYan, Zheng (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
510794fcca9fSYan, Zheng continue;
51081c4850e2SYan, Zheng }
51091c4850e2SYan, Zheng reada:
5110bfb484d9SJosef Bacik btrfs_readahead_node_child(eb, slot);
51111c4850e2SYan, Zheng nread++;
51121c4850e2SYan, Zheng }
51131c4850e2SYan, Zheng wc->reada_slot = slot;
51141c4850e2SYan, Zheng }
51151c4850e2SYan, Zheng
51169aca1d51SChris Mason /*
51172c016dc2SLiu Bo * helper to process tree block while walking down the tree.
51182c47e605SYan Zheng *
51192c47e605SYan Zheng * when wc->stage == UPDATE_BACKREF, this function updates
51202c47e605SYan Zheng * back refs for pointers in the block.
51212c47e605SYan Zheng *
51222c47e605SYan Zheng * NOTE: return value 1 means we should stop walking down.
5123f82d02d9SYan Zheng */
walk_down_proc(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int lookup_info)51242c47e605SYan Zheng static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
51252c47e605SYan Zheng struct btrfs_root *root,
51262c47e605SYan Zheng struct btrfs_path *path,
512794fcca9fSYan, Zheng struct walk_control *wc, int lookup_info)
51282c47e605SYan Zheng {
51292ff7e61eSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
51302c47e605SYan Zheng int level = wc->level;
51312c47e605SYan Zheng struct extent_buffer *eb = path->nodes[level];
51322c47e605SYan Zheng u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
51332c47e605SYan Zheng int ret;
51342c47e605SYan Zheng
51352c47e605SYan Zheng if (wc->stage == UPDATE_BACKREF &&
51362c47e605SYan Zheng btrfs_header_owner(eb) != root->root_key.objectid)
51372c47e605SYan Zheng return 1;
51382c47e605SYan Zheng
51392c47e605SYan Zheng /*
51402c47e605SYan Zheng * when reference count of tree block is 1, it won't increase
51412c47e605SYan Zheng * again. once full backref flag is set, we never clear it.
51422c47e605SYan Zheng */
514394fcca9fSYan, Zheng if (lookup_info &&
514494fcca9fSYan, Zheng ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
514594fcca9fSYan, Zheng (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
51462c47e605SYan Zheng BUG_ON(!path->locks[level]);
51472ff7e61eSJeff Mahoney ret = btrfs_lookup_extent_info(trans, fs_info,
51483173a18fSJosef Bacik eb->start, level, 1,
51492c47e605SYan Zheng &wc->refs[level],
51502c47e605SYan Zheng &wc->flags[level]);
515179787eaaSJeff Mahoney BUG_ON(ret == -ENOMEM);
515279787eaaSJeff Mahoney if (ret)
515379787eaaSJeff Mahoney return ret;
51542c47e605SYan Zheng BUG_ON(wc->refs[level] == 0);
51552c47e605SYan Zheng }
51562c47e605SYan Zheng
51572c47e605SYan Zheng if (wc->stage == DROP_REFERENCE) {
51582c47e605SYan Zheng if (wc->refs[level] > 1)
51592c47e605SYan Zheng return 1;
51602c47e605SYan Zheng
51612c47e605SYan Zheng if (path->locks[level] && !wc->keep_locks) {
5162bd681513SChris Mason btrfs_tree_unlock_rw(eb, path->locks[level]);
51632c47e605SYan Zheng path->locks[level] = 0;
51642c47e605SYan Zheng }
51652c47e605SYan Zheng return 0;
51662c47e605SYan Zheng }
51672c47e605SYan Zheng
51682c47e605SYan Zheng /* wc->stage == UPDATE_BACKREF */
51692c47e605SYan Zheng if (!(wc->flags[level] & flag)) {
51702c47e605SYan Zheng BUG_ON(!path->locks[level]);
5171e339a6b0SJosef Bacik ret = btrfs_inc_ref(trans, root, eb, 1);
517279787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM */
5173e339a6b0SJosef Bacik ret = btrfs_dec_ref(trans, root, eb, 0);
517479787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM */
51754aec05faSJosef Bacik ret = btrfs_set_disk_extent_flags(trans, eb, flag);
517679787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM */
51772c47e605SYan Zheng wc->flags[level] |= flag;
51782c47e605SYan Zheng }
51792c47e605SYan Zheng
51802c47e605SYan Zheng /*
51812c47e605SYan Zheng * the block is shared by multiple trees, so it's not good to
51822c47e605SYan Zheng * keep the tree lock
51832c47e605SYan Zheng */
51842c47e605SYan Zheng if (path->locks[level] && level > 0) {
5185bd681513SChris Mason btrfs_tree_unlock_rw(eb, path->locks[level]);
51862c47e605SYan Zheng path->locks[level] = 0;
51872c47e605SYan Zheng }
51882c47e605SYan Zheng return 0;
51892c47e605SYan Zheng }
51902c47e605SYan Zheng
51912c47e605SYan Zheng /*
519278c52d9eSJosef Bacik * This is used to verify a ref exists for this root to deal with a bug where we
519378c52d9eSJosef Bacik * would have a drop_progress key that hadn't been updated properly.
519478c52d9eSJosef Bacik */
check_ref_exists(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 parent,int level)519578c52d9eSJosef Bacik static int check_ref_exists(struct btrfs_trans_handle *trans,
519678c52d9eSJosef Bacik struct btrfs_root *root, u64 bytenr, u64 parent,
519778c52d9eSJosef Bacik int level)
519878c52d9eSJosef Bacik {
519978c52d9eSJosef Bacik struct btrfs_path *path;
520078c52d9eSJosef Bacik struct btrfs_extent_inline_ref *iref;
520178c52d9eSJosef Bacik int ret;
520278c52d9eSJosef Bacik
520378c52d9eSJosef Bacik path = btrfs_alloc_path();
520478c52d9eSJosef Bacik if (!path)
520578c52d9eSJosef Bacik return -ENOMEM;
520678c52d9eSJosef Bacik
520778c52d9eSJosef Bacik ret = lookup_extent_backref(trans, path, &iref, bytenr,
520878c52d9eSJosef Bacik root->fs_info->nodesize, parent,
520978c52d9eSJosef Bacik root->root_key.objectid, level, 0);
521078c52d9eSJosef Bacik btrfs_free_path(path);
521178c52d9eSJosef Bacik if (ret == -ENOENT)
521278c52d9eSJosef Bacik return 0;
521378c52d9eSJosef Bacik if (ret < 0)
521478c52d9eSJosef Bacik return ret;
521578c52d9eSJosef Bacik return 1;
521678c52d9eSJosef Bacik }
521778c52d9eSJosef Bacik
521878c52d9eSJosef Bacik /*
52192c016dc2SLiu Bo * helper to process tree block pointer.
52201c4850e2SYan, Zheng *
52211c4850e2SYan, Zheng * when wc->stage == DROP_REFERENCE, this function checks
52221c4850e2SYan, Zheng * reference count of the block pointed to. if the block
52231c4850e2SYan, Zheng * is shared and we need update back refs for the subtree
52241c4850e2SYan, Zheng * rooted at the block, this function changes wc->stage to
52251c4850e2SYan, Zheng * UPDATE_BACKREF. if the block is shared and there is no
52261c4850e2SYan, Zheng * need to update back, this function drops the reference
52271c4850e2SYan, Zheng * to the block.
52281c4850e2SYan, Zheng *
52291c4850e2SYan, Zheng * NOTE: return value 1 means we should stop walking down.
52301c4850e2SYan, Zheng */
do_walk_down(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int * lookup_info)52311c4850e2SYan, Zheng static noinline int do_walk_down(struct btrfs_trans_handle *trans,
52321c4850e2SYan, Zheng struct btrfs_root *root,
52331c4850e2SYan, Zheng struct btrfs_path *path,
523494fcca9fSYan, Zheng struct walk_control *wc, int *lookup_info)
52351c4850e2SYan, Zheng {
52360b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
52371c4850e2SYan, Zheng u64 bytenr;
52381c4850e2SYan, Zheng u64 generation;
52391c4850e2SYan, Zheng u64 parent;
5240789d6a3aSQu Wenruo struct btrfs_tree_parent_check check = { 0 };
52411c4850e2SYan, Zheng struct btrfs_key key;
5242ffd4bb2aSQu Wenruo struct btrfs_ref ref = { 0 };
52431c4850e2SYan, Zheng struct extent_buffer *next;
52441c4850e2SYan, Zheng int level = wc->level;
52451c4850e2SYan, Zheng int reada = 0;
52461c4850e2SYan, Zheng int ret = 0;
52471152651aSMark Fasheh bool need_account = false;
52481c4850e2SYan, Zheng
52491c4850e2SYan, Zheng generation = btrfs_node_ptr_generation(path->nodes[level],
52501c4850e2SYan, Zheng path->slots[level]);
52511c4850e2SYan, Zheng /*
52521c4850e2SYan, Zheng * if the lower level block was created before the snapshot
52531c4850e2SYan, Zheng * was created, we know there is no need to update back refs
52541c4850e2SYan, Zheng * for the subtree
52551c4850e2SYan, Zheng */
52561c4850e2SYan, Zheng if (wc->stage == UPDATE_BACKREF &&
525794fcca9fSYan, Zheng generation <= root->root_key.offset) {
525894fcca9fSYan, Zheng *lookup_info = 1;
52591c4850e2SYan, Zheng return 1;
526094fcca9fSYan, Zheng }
52611c4850e2SYan, Zheng
52621c4850e2SYan, Zheng bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
5263789d6a3aSQu Wenruo
5264789d6a3aSQu Wenruo check.level = level - 1;
5265789d6a3aSQu Wenruo check.transid = generation;
5266789d6a3aSQu Wenruo check.owner_root = root->root_key.objectid;
5267789d6a3aSQu Wenruo check.has_first_key = true;
5268789d6a3aSQu Wenruo btrfs_node_key_to_cpu(path->nodes[level], &check.first_key,
5269581c1760SQu Wenruo path->slots[level]);
52701c4850e2SYan, Zheng
52710b246afaSJeff Mahoney next = find_extent_buffer(fs_info, bytenr);
52721c4850e2SYan, Zheng if (!next) {
52733fbaf258SJosef Bacik next = btrfs_find_create_tree_block(fs_info, bytenr,
52743fbaf258SJosef Bacik root->root_key.objectid, level - 1);
5275c871b0f2SLiu Bo if (IS_ERR(next))
5276c871b0f2SLiu Bo return PTR_ERR(next);
52771c4850e2SYan, Zheng reada = 1;
52781c4850e2SYan, Zheng }
52791c4850e2SYan, Zheng btrfs_tree_lock(next);
52801c4850e2SYan, Zheng
52812ff7e61eSJeff Mahoney ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
52821c4850e2SYan, Zheng &wc->refs[level - 1],
52831c4850e2SYan, Zheng &wc->flags[level - 1]);
52844867268cSJosef Bacik if (ret < 0)
52854867268cSJosef Bacik goto out_unlock;
528679787eaaSJeff Mahoney
5287c2cf52ebSSimon Kirby if (unlikely(wc->refs[level - 1] == 0)) {
52880b246afaSJeff Mahoney btrfs_err(fs_info, "Missing references.");
52894867268cSJosef Bacik ret = -EIO;
52904867268cSJosef Bacik goto out_unlock;
5291c2cf52ebSSimon Kirby }
529294fcca9fSYan, Zheng *lookup_info = 0;
52931c4850e2SYan, Zheng
529494fcca9fSYan, Zheng if (wc->stage == DROP_REFERENCE) {
52951c4850e2SYan, Zheng if (wc->refs[level - 1] > 1) {
52961152651aSMark Fasheh need_account = true;
529794fcca9fSYan, Zheng if (level == 1 &&
529894fcca9fSYan, Zheng (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
529994fcca9fSYan, Zheng goto skip;
530094fcca9fSYan, Zheng
53011c4850e2SYan, Zheng if (!wc->update_ref ||
53021c4850e2SYan, Zheng generation <= root->root_key.offset)
53031c4850e2SYan, Zheng goto skip;
53041c4850e2SYan, Zheng
53051c4850e2SYan, Zheng btrfs_node_key_to_cpu(path->nodes[level], &key,
53061c4850e2SYan, Zheng path->slots[level]);
53071c4850e2SYan, Zheng ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
53081c4850e2SYan, Zheng if (ret < 0)
53091c4850e2SYan, Zheng goto skip;
53101c4850e2SYan, Zheng
53111c4850e2SYan, Zheng wc->stage = UPDATE_BACKREF;
53121c4850e2SYan, Zheng wc->shared_level = level - 1;
53131c4850e2SYan, Zheng }
531494fcca9fSYan, Zheng } else {
531594fcca9fSYan, Zheng if (level == 1 &&
531694fcca9fSYan, Zheng (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
531794fcca9fSYan, Zheng goto skip;
53181c4850e2SYan, Zheng }
53191c4850e2SYan, Zheng
5320b9fab919SChris Mason if (!btrfs_buffer_uptodate(next, generation, 0)) {
53211c4850e2SYan, Zheng btrfs_tree_unlock(next);
53221c4850e2SYan, Zheng free_extent_buffer(next);
53231c4850e2SYan, Zheng next = NULL;
532494fcca9fSYan, Zheng *lookup_info = 1;
53251c4850e2SYan, Zheng }
53261c4850e2SYan, Zheng
53271c4850e2SYan, Zheng if (!next) {
53281c4850e2SYan, Zheng if (reada && level == 1)
53291c4850e2SYan, Zheng reada_walk_down(trans, root, wc, path);
5330789d6a3aSQu Wenruo next = read_tree_block(fs_info, bytenr, &check);
533164c043deSLiu Bo if (IS_ERR(next)) {
533264c043deSLiu Bo return PTR_ERR(next);
533364c043deSLiu Bo } else if (!extent_buffer_uptodate(next)) {
5334416bc658SJosef Bacik free_extent_buffer(next);
533597d9a8a4STsutomu Itoh return -EIO;
5336416bc658SJosef Bacik }
53371c4850e2SYan, Zheng btrfs_tree_lock(next);
53381c4850e2SYan, Zheng }
53391c4850e2SYan, Zheng
53401c4850e2SYan, Zheng level--;
53414867268cSJosef Bacik ASSERT(level == btrfs_header_level(next));
53424867268cSJosef Bacik if (level != btrfs_header_level(next)) {
53434867268cSJosef Bacik btrfs_err(root->fs_info, "mismatched level");
53444867268cSJosef Bacik ret = -EIO;
53454867268cSJosef Bacik goto out_unlock;
53464867268cSJosef Bacik }
53471c4850e2SYan, Zheng path->nodes[level] = next;
53481c4850e2SYan, Zheng path->slots[level] = 0;
5349ac5887c8SJosef Bacik path->locks[level] = BTRFS_WRITE_LOCK;
53501c4850e2SYan, Zheng wc->level = level;
53511c4850e2SYan, Zheng if (wc->level == 1)
53521c4850e2SYan, Zheng wc->reada_slot = 0;
53531c4850e2SYan, Zheng return 0;
53541c4850e2SYan, Zheng skip:
53551c4850e2SYan, Zheng wc->refs[level - 1] = 0;
53561c4850e2SYan, Zheng wc->flags[level - 1] = 0;
535794fcca9fSYan, Zheng if (wc->stage == DROP_REFERENCE) {
53581c4850e2SYan, Zheng if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
53591c4850e2SYan, Zheng parent = path->nodes[level]->start;
53601c4850e2SYan, Zheng } else {
53614867268cSJosef Bacik ASSERT(root->root_key.objectid ==
53621c4850e2SYan, Zheng btrfs_header_owner(path->nodes[level]));
53634867268cSJosef Bacik if (root->root_key.objectid !=
53644867268cSJosef Bacik btrfs_header_owner(path->nodes[level])) {
53654867268cSJosef Bacik btrfs_err(root->fs_info,
53664867268cSJosef Bacik "mismatched block owner");
53674867268cSJosef Bacik ret = -EIO;
53684867268cSJosef Bacik goto out_unlock;
53694867268cSJosef Bacik }
53701c4850e2SYan, Zheng parent = 0;
53711c4850e2SYan, Zheng }
53721c4850e2SYan, Zheng
53732cd86d30SQu Wenruo /*
537478c52d9eSJosef Bacik * If we had a drop_progress we need to verify the refs are set
537578c52d9eSJosef Bacik * as expected. If we find our ref then we know that from here
537678c52d9eSJosef Bacik * on out everything should be correct, and we can clear the
537778c52d9eSJosef Bacik * ->restarted flag.
537878c52d9eSJosef Bacik */
537978c52d9eSJosef Bacik if (wc->restarted) {
538078c52d9eSJosef Bacik ret = check_ref_exists(trans, root, bytenr, parent,
538178c52d9eSJosef Bacik level - 1);
538278c52d9eSJosef Bacik if (ret < 0)
538378c52d9eSJosef Bacik goto out_unlock;
538478c52d9eSJosef Bacik if (ret == 0)
538578c52d9eSJosef Bacik goto no_delete;
538678c52d9eSJosef Bacik ret = 0;
538778c52d9eSJosef Bacik wc->restarted = 0;
538878c52d9eSJosef Bacik }
538978c52d9eSJosef Bacik
539078c52d9eSJosef Bacik /*
53912cd86d30SQu Wenruo * Reloc tree doesn't contribute to qgroup numbers, and we have
53922cd86d30SQu Wenruo * already accounted them at merge time (replace_path),
53932cd86d30SQu Wenruo * thus we could skip expensive subtree trace here.
53942cd86d30SQu Wenruo */
53952cd86d30SQu Wenruo if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
53962cd86d30SQu Wenruo need_account) {
5397deb40627SLu Fengqi ret = btrfs_qgroup_trace_subtree(trans, next,
53981152651aSMark Fasheh generation, level - 1);
53991152651aSMark Fasheh if (ret) {
54000b246afaSJeff Mahoney btrfs_err_rl(fs_info,
54015d163e0eSJeff Mahoney "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
540294647322SDavid Sterba ret);
54031152651aSMark Fasheh }
54041152651aSMark Fasheh }
5405aea6f028SJosef Bacik
5406aea6f028SJosef Bacik /*
5407aea6f028SJosef Bacik * We need to update the next key in our walk control so we can
5408aea6f028SJosef Bacik * update the drop_progress key accordingly. We don't care if
5409aea6f028SJosef Bacik * find_next_key doesn't find a key because that means we're at
5410aea6f028SJosef Bacik * the end and are going to clean up now.
5411aea6f028SJosef Bacik */
5412aea6f028SJosef Bacik wc->drop_level = level;
5413aea6f028SJosef Bacik find_next_key(path, level, &wc->drop_progress);
5414aea6f028SJosef Bacik
5415ffd4bb2aSQu Wenruo btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
5416ffd4bb2aSQu Wenruo fs_info->nodesize, parent);
5417f42c5da6SNikolay Borisov btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
5418f42c5da6SNikolay Borisov 0, false);
5419ffd4bb2aSQu Wenruo ret = btrfs_free_extent(trans, &ref);
54204867268cSJosef Bacik if (ret)
54214867268cSJosef Bacik goto out_unlock;
542294fcca9fSYan, Zheng }
542378c52d9eSJosef Bacik no_delete:
54244867268cSJosef Bacik *lookup_info = 1;
54254867268cSJosef Bacik ret = 1;
54264867268cSJosef Bacik
54274867268cSJosef Bacik out_unlock:
54281c4850e2SYan, Zheng btrfs_tree_unlock(next);
54291c4850e2SYan, Zheng free_extent_buffer(next);
54304867268cSJosef Bacik
54314867268cSJosef Bacik return ret;
54321c4850e2SYan, Zheng }
54331c4850e2SYan, Zheng
54341c4850e2SYan, Zheng /*
54352c016dc2SLiu Bo * helper to process tree block while walking up the tree.
54362c47e605SYan Zheng *
54372c47e605SYan Zheng * when wc->stage == DROP_REFERENCE, this function drops
54382c47e605SYan Zheng * reference count on the block.
54392c47e605SYan Zheng *
54402c47e605SYan Zheng * when wc->stage == UPDATE_BACKREF, this function changes
54412c47e605SYan Zheng * wc->stage back to DROP_REFERENCE if we changed wc->stage
54422c47e605SYan Zheng * to UPDATE_BACKREF previously while processing the block.
54432c47e605SYan Zheng *
54442c47e605SYan Zheng * NOTE: return value 1 means we should stop walking up.
54452c47e605SYan Zheng */
walk_up_proc(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc)54462c47e605SYan Zheng static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
54472c47e605SYan Zheng struct btrfs_root *root,
54482c47e605SYan Zheng struct btrfs_path *path,
54492c47e605SYan Zheng struct walk_control *wc)
54502c47e605SYan Zheng {
54510b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
5452f0486c68SYan, Zheng int ret;
54532c47e605SYan Zheng int level = wc->level;
54542c47e605SYan Zheng struct extent_buffer *eb = path->nodes[level];
54552c47e605SYan Zheng u64 parent = 0;
54562c47e605SYan Zheng
54572c47e605SYan Zheng if (wc->stage == UPDATE_BACKREF) {
54582c47e605SYan Zheng BUG_ON(wc->shared_level < level);
54592c47e605SYan Zheng if (level < wc->shared_level)
54602c47e605SYan Zheng goto out;
54612c47e605SYan Zheng
54622c47e605SYan Zheng ret = find_next_key(path, level + 1, &wc->update_progress);
54632c47e605SYan Zheng if (ret > 0)
54642c47e605SYan Zheng wc->update_ref = 0;
54652c47e605SYan Zheng
54662c47e605SYan Zheng wc->stage = DROP_REFERENCE;
54672c47e605SYan Zheng wc->shared_level = -1;
54682c47e605SYan Zheng path->slots[level] = 0;
54692c47e605SYan Zheng
54702c47e605SYan Zheng /*
54712c47e605SYan Zheng * check reference count again if the block isn't locked.
54722c47e605SYan Zheng * we should start walking down the tree again if reference
54732c47e605SYan Zheng * count is one.
54742c47e605SYan Zheng */
54752c47e605SYan Zheng if (!path->locks[level]) {
54762c47e605SYan Zheng BUG_ON(level == 0);
54772c47e605SYan Zheng btrfs_tree_lock(eb);
5478ac5887c8SJosef Bacik path->locks[level] = BTRFS_WRITE_LOCK;
54792c47e605SYan Zheng
54802ff7e61eSJeff Mahoney ret = btrfs_lookup_extent_info(trans, fs_info,
54813173a18fSJosef Bacik eb->start, level, 1,
54822c47e605SYan Zheng &wc->refs[level],
54832c47e605SYan Zheng &wc->flags[level]);
548479787eaaSJeff Mahoney if (ret < 0) {
548579787eaaSJeff Mahoney btrfs_tree_unlock_rw(eb, path->locks[level]);
54863268a246SLiu Bo path->locks[level] = 0;
548779787eaaSJeff Mahoney return ret;
548879787eaaSJeff Mahoney }
54892c47e605SYan Zheng BUG_ON(wc->refs[level] == 0);
54902c47e605SYan Zheng if (wc->refs[level] == 1) {
5491bd681513SChris Mason btrfs_tree_unlock_rw(eb, path->locks[level]);
54923268a246SLiu Bo path->locks[level] = 0;
54932c47e605SYan Zheng return 1;
54942c47e605SYan Zheng }
54952c47e605SYan Zheng }
54962c47e605SYan Zheng }
54972c47e605SYan Zheng
54982c47e605SYan Zheng /* wc->stage == DROP_REFERENCE */
54992c47e605SYan Zheng BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
55002c47e605SYan Zheng
55012c47e605SYan Zheng if (wc->refs[level] == 1) {
55022c47e605SYan Zheng if (level == 0) {
55032c47e605SYan Zheng if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
5504e339a6b0SJosef Bacik ret = btrfs_dec_ref(trans, root, eb, 1);
55052c47e605SYan Zheng else
5506e339a6b0SJosef Bacik ret = btrfs_dec_ref(trans, root, eb, 0);
550779787eaaSJeff Mahoney BUG_ON(ret); /* -ENOMEM */
5508c4140cbfSQu Wenruo if (is_fstree(root->root_key.objectid)) {
55098d38d7ebSLu Fengqi ret = btrfs_qgroup_trace_leaf_items(trans, eb);
55101152651aSMark Fasheh if (ret) {
55110b246afaSJeff Mahoney btrfs_err_rl(fs_info,
5512c4140cbfSQu Wenruo "error %d accounting leaf items, quota is out of sync, rescan required",
551394647322SDavid Sterba ret);
55141152651aSMark Fasheh }
55152c47e605SYan Zheng }
5516c4140cbfSQu Wenruo }
5517190a8339SJosef Bacik /* Make block locked assertion in btrfs_clear_buffer_dirty happy. */
5518d3fb6615SJosef Bacik if (!path->locks[level]) {
55192c47e605SYan Zheng btrfs_tree_lock(eb);
5520ac5887c8SJosef Bacik path->locks[level] = BTRFS_WRITE_LOCK;
55212c47e605SYan Zheng }
5522190a8339SJosef Bacik btrfs_clear_buffer_dirty(trans, eb);
55232c47e605SYan Zheng }
55242c47e605SYan Zheng
55252c47e605SYan Zheng if (eb == root->node) {
55262c47e605SYan Zheng if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
55272c47e605SYan Zheng parent = eb->start;
552865c6e82bSQu Wenruo else if (root->root_key.objectid != btrfs_header_owner(eb))
552965c6e82bSQu Wenruo goto owner_mismatch;
55302c47e605SYan Zheng } else {
55312c47e605SYan Zheng if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
55322c47e605SYan Zheng parent = path->nodes[level + 1]->start;
553365c6e82bSQu Wenruo else if (root->root_key.objectid !=
553465c6e82bSQu Wenruo btrfs_header_owner(path->nodes[level + 1]))
553565c6e82bSQu Wenruo goto owner_mismatch;
55362c47e605SYan Zheng }
55372c47e605SYan Zheng
55387a163608SFilipe Manana btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
55397a163608SFilipe Manana wc->refs[level] == 1);
55402c47e605SYan Zheng out:
55412c47e605SYan Zheng wc->refs[level] = 0;
55422c47e605SYan Zheng wc->flags[level] = 0;
5543f0486c68SYan, Zheng return 0;
554465c6e82bSQu Wenruo
554565c6e82bSQu Wenruo owner_mismatch:
554665c6e82bSQu Wenruo btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
554765c6e82bSQu Wenruo btrfs_header_owner(eb), root->root_key.objectid);
554865c6e82bSQu Wenruo return -EUCLEAN;
55492c47e605SYan Zheng }
55502c47e605SYan Zheng
walk_down_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc)55515d4f98a2SYan Zheng static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5552f82d02d9SYan Zheng struct btrfs_root *root,
55532c47e605SYan Zheng struct btrfs_path *path,
55542c47e605SYan Zheng struct walk_control *wc)
5555f82d02d9SYan Zheng {
55562c47e605SYan Zheng int level = wc->level;
555794fcca9fSYan, Zheng int lookup_info = 1;
55584e194384SJosef Bacik int ret = 0;
5559f82d02d9SYan Zheng
55602c47e605SYan Zheng while (level >= 0) {
556194fcca9fSYan, Zheng ret = walk_down_proc(trans, root, path, wc, lookup_info);
55624e194384SJosef Bacik if (ret)
5563f82d02d9SYan Zheng break;
5564f82d02d9SYan Zheng
55652c47e605SYan Zheng if (level == 0)
55662c47e605SYan Zheng break;
55672c47e605SYan Zheng
55687a7965f8SYan, Zheng if (path->slots[level] >=
55697a7965f8SYan, Zheng btrfs_header_nritems(path->nodes[level]))
55707a7965f8SYan, Zheng break;
55717a7965f8SYan, Zheng
557294fcca9fSYan, Zheng ret = do_walk_down(trans, root, path, wc, &lookup_info);
55731c4850e2SYan, Zheng if (ret > 0) {
55741c4850e2SYan, Zheng path->slots[level]++;
55751c4850e2SYan, Zheng continue;
557690d2c51dSMiao Xie } else if (ret < 0)
55774e194384SJosef Bacik break;
55781c4850e2SYan, Zheng level = wc->level;
5579f82d02d9SYan Zheng }
55804e194384SJosef Bacik return (ret == 1) ? 0 : ret;
5581f82d02d9SYan Zheng }
5582f82d02d9SYan Zheng
walk_up_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int max_level)5583d397712bSChris Mason static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
558498ed5174SChris Mason struct btrfs_root *root,
5585f82d02d9SYan Zheng struct btrfs_path *path,
55862c47e605SYan Zheng struct walk_control *wc, int max_level)
558720524f02SChris Mason {
55882c47e605SYan Zheng int level = wc->level;
558920524f02SChris Mason int ret;
55909f3a7427SChris Mason
55912c47e605SYan Zheng path->slots[level] = btrfs_header_nritems(path->nodes[level]);
55922c47e605SYan Zheng while (level < max_level && path->nodes[level]) {
55932c47e605SYan Zheng wc->level = level;
55942c47e605SYan Zheng if (path->slots[level] + 1 <
55952c47e605SYan Zheng btrfs_header_nritems(path->nodes[level])) {
55962c47e605SYan Zheng path->slots[level]++;
559720524f02SChris Mason return 0;
559820524f02SChris Mason } else {
55992c47e605SYan Zheng ret = walk_up_proc(trans, root, path, wc);
56002c47e605SYan Zheng if (ret > 0)
56012c47e605SYan Zheng return 0;
560265c6e82bSQu Wenruo if (ret < 0)
560365c6e82bSQu Wenruo return ret;
5604bd56b302SChris Mason
56052c47e605SYan Zheng if (path->locks[level]) {
5606bd681513SChris Mason btrfs_tree_unlock_rw(path->nodes[level],
5607bd681513SChris Mason path->locks[level]);
56082c47e605SYan Zheng path->locks[level] = 0;
5609f82d02d9SYan Zheng }
56102c47e605SYan Zheng free_extent_buffer(path->nodes[level]);
56112c47e605SYan Zheng path->nodes[level] = NULL;
56122c47e605SYan Zheng level++;
561320524f02SChris Mason }
561420524f02SChris Mason }
561520524f02SChris Mason return 1;
561620524f02SChris Mason }
561720524f02SChris Mason
56189aca1d51SChris Mason /*
56192c47e605SYan Zheng * drop a subvolume tree.
56202c47e605SYan Zheng *
56212c47e605SYan Zheng * this function traverses the tree freeing any blocks that only
56222c47e605SYan Zheng * referenced by the tree.
56232c47e605SYan Zheng *
56242c47e605SYan Zheng * when a shared tree block is found. this function decreases its
56252c47e605SYan Zheng * reference count by one. if update_ref is true, this function
56262c47e605SYan Zheng * also make sure backrefs for the shared block and all lower level
56272c47e605SYan Zheng * blocks are properly updated.
56289d1a2a3aSDavid Sterba *
56299d1a2a3aSDavid Sterba * If called with for_reloc == 0, may exit early with -EAGAIN
56309aca1d51SChris Mason */
btrfs_drop_snapshot(struct btrfs_root * root,int update_ref,int for_reloc)56310078a9f9SNikolay Borisov int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
563220524f02SChris Mason {
563312a824dcSFilipe Manana const bool is_reloc_root = (root->root_key.objectid ==
563412a824dcSFilipe Manana BTRFS_TREE_RELOC_OBJECTID);
5635ab8d0fc4SJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
56365caf2a00SChris Mason struct btrfs_path *path;
56372c47e605SYan Zheng struct btrfs_trans_handle *trans;
5638ab8d0fc4SJeff Mahoney struct btrfs_root *tree_root = fs_info->tree_root;
56399f3a7427SChris Mason struct btrfs_root_item *root_item = &root->root_item;
56402c47e605SYan Zheng struct walk_control *wc;
56412c47e605SYan Zheng struct btrfs_key key;
56422c47e605SYan Zheng int err = 0;
56432c47e605SYan Zheng int ret;
56442c47e605SYan Zheng int level;
5645d29a9f62SJosef Bacik bool root_dropped = false;
5646b4be6aefSJosef Bacik bool unfinished_drop = false;
564720524f02SChris Mason
56484fd786e6SMisono Tomohiro btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
56491152651aSMark Fasheh
56505caf2a00SChris Mason path = btrfs_alloc_path();
5651cb1b69f4STsutomu Itoh if (!path) {
5652cb1b69f4STsutomu Itoh err = -ENOMEM;
5653cb1b69f4STsutomu Itoh goto out;
5654cb1b69f4STsutomu Itoh }
565520524f02SChris Mason
56562c47e605SYan Zheng wc = kzalloc(sizeof(*wc), GFP_NOFS);
565738a1a919SMark Fasheh if (!wc) {
565838a1a919SMark Fasheh btrfs_free_path(path);
5659cb1b69f4STsutomu Itoh err = -ENOMEM;
5660cb1b69f4STsutomu Itoh goto out;
566138a1a919SMark Fasheh }
56622c47e605SYan Zheng
5663f3e3d9ccSQu Wenruo /*
5664f3e3d9ccSQu Wenruo * Use join to avoid potential EINTR from transaction start. See
5665f3e3d9ccSQu Wenruo * wait_reserve_ticket and the whole reservation callchain.
5666f3e3d9ccSQu Wenruo */
5667f3e3d9ccSQu Wenruo if (for_reloc)
5668f3e3d9ccSQu Wenruo trans = btrfs_join_transaction(tree_root);
5669f3e3d9ccSQu Wenruo else
5670a22285a6SYan, Zheng trans = btrfs_start_transaction(tree_root, 0);
567179787eaaSJeff Mahoney if (IS_ERR(trans)) {
567279787eaaSJeff Mahoney err = PTR_ERR(trans);
567379787eaaSJeff Mahoney goto out_free;
567479787eaaSJeff Mahoney }
567598d5dc13STsutomu Itoh
56760568e82dSJosef Bacik err = btrfs_run_delayed_items(trans);
56770568e82dSJosef Bacik if (err)
56780568e82dSJosef Bacik goto out_end_trans;
56790568e82dSJosef Bacik
568083354f07SJosef Bacik /*
568183354f07SJosef Bacik * This will help us catch people modifying the fs tree while we're
568283354f07SJosef Bacik * dropping it. It is unsafe to mess with the fs tree while it's being
568383354f07SJosef Bacik * dropped as we unlock the root node and parent nodes as we walk down
568483354f07SJosef Bacik * the tree, assuming nothing will change. If something does change
568583354f07SJosef Bacik * then we'll have stale information and drop references to blocks we've
568683354f07SJosef Bacik * already dropped.
568783354f07SJosef Bacik */
568883354f07SJosef Bacik set_bit(BTRFS_ROOT_DELETING, &root->state);
5689b4be6aefSJosef Bacik unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
5690b4be6aefSJosef Bacik
56919f3a7427SChris Mason if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
56922c47e605SYan Zheng level = btrfs_header_level(root->node);
56935d4f98a2SYan Zheng path->nodes[level] = btrfs_lock_root_node(root);
56945caf2a00SChris Mason path->slots[level] = 0;
5695ac5887c8SJosef Bacik path->locks[level] = BTRFS_WRITE_LOCK;
56962c47e605SYan Zheng memset(&wc->update_progress, 0,
56972c47e605SYan Zheng sizeof(wc->update_progress));
56989f3a7427SChris Mason } else {
56999f3a7427SChris Mason btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
57002c47e605SYan Zheng memcpy(&wc->update_progress, &key,
57012c47e605SYan Zheng sizeof(wc->update_progress));
57022c47e605SYan Zheng
5703c8422684SDavid Sterba level = btrfs_root_drop_level(root_item);
57042c47e605SYan Zheng BUG_ON(level == 0);
57056702ed49SChris Mason path->lowest_level = level;
57062c47e605SYan Zheng ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
57072c47e605SYan Zheng path->lowest_level = 0;
57082c47e605SYan Zheng if (ret < 0) {
57092c47e605SYan Zheng err = ret;
571079787eaaSJeff Mahoney goto out_end_trans;
57119f3a7427SChris Mason }
57121c4850e2SYan, Zheng WARN_ON(ret > 0);
57132c47e605SYan Zheng
57147d9eb12cSChris Mason /*
57157d9eb12cSChris Mason * unlock our path, this is safe because only this
57167d9eb12cSChris Mason * function is allowed to delete this snapshot
57177d9eb12cSChris Mason */
57185d4f98a2SYan Zheng btrfs_unlock_up_safe(path, 0);
57199aca1d51SChris Mason
57202c47e605SYan Zheng level = btrfs_header_level(root->node);
57212c47e605SYan Zheng while (1) {
57222c47e605SYan Zheng btrfs_tree_lock(path->nodes[level]);
5723ac5887c8SJosef Bacik path->locks[level] = BTRFS_WRITE_LOCK;
57242c47e605SYan Zheng
57252ff7e61eSJeff Mahoney ret = btrfs_lookup_extent_info(trans, fs_info,
57262c47e605SYan Zheng path->nodes[level]->start,
57273173a18fSJosef Bacik level, 1, &wc->refs[level],
57282c47e605SYan Zheng &wc->flags[level]);
572979787eaaSJeff Mahoney if (ret < 0) {
573079787eaaSJeff Mahoney err = ret;
573179787eaaSJeff Mahoney goto out_end_trans;
573279787eaaSJeff Mahoney }
57332c47e605SYan Zheng BUG_ON(wc->refs[level] == 0);
57342c47e605SYan Zheng
5735c8422684SDavid Sterba if (level == btrfs_root_drop_level(root_item))
573620524f02SChris Mason break;
57372c47e605SYan Zheng
57382c47e605SYan Zheng btrfs_tree_unlock(path->nodes[level]);
5739fec386acSJosef Bacik path->locks[level] = 0;
57402c47e605SYan Zheng WARN_ON(wc->refs[level] != 1);
57412c47e605SYan Zheng level--;
57422c47e605SYan Zheng }
57432c47e605SYan Zheng }
57442c47e605SYan Zheng
574578c52d9eSJosef Bacik wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
57462c47e605SYan Zheng wc->level = level;
57472c47e605SYan Zheng wc->shared_level = -1;
57482c47e605SYan Zheng wc->stage = DROP_REFERENCE;
57492c47e605SYan Zheng wc->update_ref = update_ref;
57502c47e605SYan Zheng wc->keep_locks = 0;
57510b246afaSJeff Mahoney wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
57522c47e605SYan Zheng
57532c47e605SYan Zheng while (1) {
57549d1a2a3aSDavid Sterba
57552c47e605SYan Zheng ret = walk_down_tree(trans, root, path, wc);
57562c47e605SYan Zheng if (ret < 0) {
57579a93b5a3SJosef Bacik btrfs_abort_transaction(trans, ret);
57582c47e605SYan Zheng err = ret;
57592c47e605SYan Zheng break;
57602c47e605SYan Zheng }
57612c47e605SYan Zheng
57622c47e605SYan Zheng ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
57632c47e605SYan Zheng if (ret < 0) {
57649a93b5a3SJosef Bacik btrfs_abort_transaction(trans, ret);
57652c47e605SYan Zheng err = ret;
57662c47e605SYan Zheng break;
57672c47e605SYan Zheng }
57682c47e605SYan Zheng
57692c47e605SYan Zheng if (ret > 0) {
57702c47e605SYan Zheng BUG_ON(wc->stage != DROP_REFERENCE);
57712c47e605SYan Zheng break;
57722c47e605SYan Zheng }
57732c47e605SYan Zheng
57742c47e605SYan Zheng if (wc->stage == DROP_REFERENCE) {
5775aea6f028SJosef Bacik wc->drop_level = wc->level;
5776aea6f028SJosef Bacik btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
5777aea6f028SJosef Bacik &wc->drop_progress,
5778aea6f028SJosef Bacik path->slots[wc->drop_level]);
57792c47e605SYan Zheng }
5780aea6f028SJosef Bacik btrfs_cpu_key_to_disk(&root_item->drop_progress,
5781aea6f028SJosef Bacik &wc->drop_progress);
5782c8422684SDavid Sterba btrfs_set_root_drop_level(root_item, wc->drop_level);
57832c47e605SYan Zheng
57842c47e605SYan Zheng BUG_ON(wc->level == 0);
57853a45bb20SJeff Mahoney if (btrfs_should_end_transaction(trans) ||
57862ff7e61eSJeff Mahoney (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
57872c47e605SYan Zheng ret = btrfs_update_root(trans, tree_root,
57882c47e605SYan Zheng &root->root_key,
57892c47e605SYan Zheng root_item);
579079787eaaSJeff Mahoney if (ret) {
579166642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
579279787eaaSJeff Mahoney err = ret;
579379787eaaSJeff Mahoney goto out_end_trans;
579479787eaaSJeff Mahoney }
57952c47e605SYan Zheng
579612a824dcSFilipe Manana if (!is_reloc_root)
579712a824dcSFilipe Manana btrfs_set_last_root_drop_gen(fs_info, trans->transid);
579812a824dcSFilipe Manana
57993a45bb20SJeff Mahoney btrfs_end_transaction_throttle(trans);
58002ff7e61eSJeff Mahoney if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
5801ab8d0fc4SJeff Mahoney btrfs_debug(fs_info,
5802ab8d0fc4SJeff Mahoney "drop snapshot early exit");
58033c8f2422SJosef Bacik err = -EAGAIN;
58043c8f2422SJosef Bacik goto out_free;
58053c8f2422SJosef Bacik }
58063c8f2422SJosef Bacik
580718d3bff4SJosef Bacik /*
580818d3bff4SJosef Bacik * Use join to avoid potential EINTR from transaction
580918d3bff4SJosef Bacik * start. See wait_reserve_ticket and the whole
581018d3bff4SJosef Bacik * reservation callchain.
581118d3bff4SJosef Bacik */
581218d3bff4SJosef Bacik if (for_reloc)
581318d3bff4SJosef Bacik trans = btrfs_join_transaction(tree_root);
581418d3bff4SJosef Bacik else
5815a22285a6SYan, Zheng trans = btrfs_start_transaction(tree_root, 0);
581679787eaaSJeff Mahoney if (IS_ERR(trans)) {
581779787eaaSJeff Mahoney err = PTR_ERR(trans);
581879787eaaSJeff Mahoney goto out_free;
581979787eaaSJeff Mahoney }
5820c3e69d58SChris Mason }
58219f3a7427SChris Mason }
5822b3b4aa74SDavid Sterba btrfs_release_path(path);
582379787eaaSJeff Mahoney if (err)
582479787eaaSJeff Mahoney goto out_end_trans;
58252c47e605SYan Zheng
5826ab9ce7d4SLu Fengqi ret = btrfs_del_root(trans, &root->root_key);
582779787eaaSJeff Mahoney if (ret) {
582866642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
5829e19182c0SJeff Mahoney err = ret;
583079787eaaSJeff Mahoney goto out_end_trans;
583179787eaaSJeff Mahoney }
58322c47e605SYan Zheng
583312a824dcSFilipe Manana if (!is_reloc_root) {
5834cb517eabSMiao Xie ret = btrfs_find_root(tree_root, &root->root_key, path,
583576dda93cSYan, Zheng NULL, NULL);
583679787eaaSJeff Mahoney if (ret < 0) {
583766642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
583879787eaaSJeff Mahoney err = ret;
583979787eaaSJeff Mahoney goto out_end_trans;
584079787eaaSJeff Mahoney } else if (ret > 0) {
584184cd948cSJosef Bacik /* if we fail to delete the orphan item this time
584284cd948cSJosef Bacik * around, it'll get picked up the next time.
584384cd948cSJosef Bacik *
584484cd948cSJosef Bacik * The most common failure here is just -ENOENT.
584584cd948cSJosef Bacik */
584684cd948cSJosef Bacik btrfs_del_orphan_item(trans, tree_root,
584776dda93cSYan, Zheng root->root_key.objectid);
584876dda93cSYan, Zheng }
584976dda93cSYan, Zheng }
585076dda93cSYan, Zheng
5851a3cf0e43SQu Wenruo /*
5852a3cf0e43SQu Wenruo * This subvolume is going to be completely dropped, and won't be
5853a3cf0e43SQu Wenruo * recorded as dirty roots, thus pertrans meta rsv will not be freed at
5854a3cf0e43SQu Wenruo * commit transaction time. So free it here manually.
5855a3cf0e43SQu Wenruo */
5856a3cf0e43SQu Wenruo btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
5857a3cf0e43SQu Wenruo btrfs_qgroup_free_meta_all_pertrans(root);
5858a3cf0e43SQu Wenruo
5859fc7cbcd4SDavid Sterba if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
58602b9dbef2SJosef Bacik btrfs_add_dropped_root(trans, root);
58618c38938cSJosef Bacik else
586200246528SJosef Bacik btrfs_put_root(root);
5863d29a9f62SJosef Bacik root_dropped = true;
586479787eaaSJeff Mahoney out_end_trans:
586512a824dcSFilipe Manana if (!is_reloc_root)
586612a824dcSFilipe Manana btrfs_set_last_root_drop_gen(fs_info, trans->transid);
586712a824dcSFilipe Manana
58683a45bb20SJeff Mahoney btrfs_end_transaction_throttle(trans);
586979787eaaSJeff Mahoney out_free:
58702c47e605SYan Zheng kfree(wc);
58715caf2a00SChris Mason btrfs_free_path(path);
5872cb1b69f4STsutomu Itoh out:
5873d29a9f62SJosef Bacik /*
5874b4be6aefSJosef Bacik * We were an unfinished drop root, check to see if there are any
5875b4be6aefSJosef Bacik * pending, and if not clear and wake up any waiters.
5876b4be6aefSJosef Bacik */
5877b4be6aefSJosef Bacik if (!err && unfinished_drop)
5878b4be6aefSJosef Bacik btrfs_maybe_wake_unfinished_drop(fs_info);
5879b4be6aefSJosef Bacik
5880b4be6aefSJosef Bacik /*
5881d29a9f62SJosef Bacik * So if we need to stop dropping the snapshot for whatever reason we
5882d29a9f62SJosef Bacik * need to make sure to add it back to the dead root list so that we
5883d29a9f62SJosef Bacik * keep trying to do the work later. This also cleans up roots if we
5884d29a9f62SJosef Bacik * don't have it in the radix (like when we recover after a power fail
5885d29a9f62SJosef Bacik * or unmount) so we don't leak memory.
5886d29a9f62SJosef Bacik */
5887897ca819SThomas Meyer if (!for_reloc && !root_dropped)
5888d29a9f62SJosef Bacik btrfs_add_dead_root(root);
58892c536799SJeff Mahoney return err;
589020524f02SChris Mason }
58919078a3e1SChris Mason
58922c47e605SYan Zheng /*
58932c47e605SYan Zheng * drop subtree rooted at tree block 'node'.
58942c47e605SYan Zheng *
58952c47e605SYan Zheng * NOTE: this function will unlock and release tree block 'node'
589666d7e7f0SArne Jansen * only used by relocation code
58972c47e605SYan Zheng */
btrfs_drop_subtree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * node,struct extent_buffer * parent)5898f82d02d9SYan Zheng int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
5899f82d02d9SYan Zheng struct btrfs_root *root,
5900f82d02d9SYan Zheng struct extent_buffer *node,
5901f82d02d9SYan Zheng struct extent_buffer *parent)
5902f82d02d9SYan Zheng {
59030b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
5904f82d02d9SYan Zheng struct btrfs_path *path;
59052c47e605SYan Zheng struct walk_control *wc;
5906f82d02d9SYan Zheng int level;
5907f82d02d9SYan Zheng int parent_level;
5908f82d02d9SYan Zheng int ret = 0;
5909f82d02d9SYan Zheng int wret;
5910f82d02d9SYan Zheng
59112c47e605SYan Zheng BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
59122c47e605SYan Zheng
5913f82d02d9SYan Zheng path = btrfs_alloc_path();
5914db5b493aSTsutomu Itoh if (!path)
5915db5b493aSTsutomu Itoh return -ENOMEM;
5916f82d02d9SYan Zheng
59172c47e605SYan Zheng wc = kzalloc(sizeof(*wc), GFP_NOFS);
5918db5b493aSTsutomu Itoh if (!wc) {
5919db5b493aSTsutomu Itoh btrfs_free_path(path);
5920db5b493aSTsutomu Itoh return -ENOMEM;
5921db5b493aSTsutomu Itoh }
59222c47e605SYan Zheng
592349d0c642SFilipe Manana btrfs_assert_tree_write_locked(parent);
5924f82d02d9SYan Zheng parent_level = btrfs_header_level(parent);
592567439dadSDavid Sterba atomic_inc(&parent->refs);
5926f82d02d9SYan Zheng path->nodes[parent_level] = parent;
5927f82d02d9SYan Zheng path->slots[parent_level] = btrfs_header_nritems(parent);
5928f82d02d9SYan Zheng
592949d0c642SFilipe Manana btrfs_assert_tree_write_locked(node);
5930f82d02d9SYan Zheng level = btrfs_header_level(node);
5931f82d02d9SYan Zheng path->nodes[level] = node;
5932f82d02d9SYan Zheng path->slots[level] = 0;
5933ac5887c8SJosef Bacik path->locks[level] = BTRFS_WRITE_LOCK;
59342c47e605SYan Zheng
59352c47e605SYan Zheng wc->refs[parent_level] = 1;
59362c47e605SYan Zheng wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
59372c47e605SYan Zheng wc->level = level;
59382c47e605SYan Zheng wc->shared_level = -1;
59392c47e605SYan Zheng wc->stage = DROP_REFERENCE;
59402c47e605SYan Zheng wc->update_ref = 0;
59412c47e605SYan Zheng wc->keep_locks = 1;
59420b246afaSJeff Mahoney wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
5943f82d02d9SYan Zheng
5944f82d02d9SYan Zheng while (1) {
59452c47e605SYan Zheng wret = walk_down_tree(trans, root, path, wc);
59462c47e605SYan Zheng if (wret < 0) {
5947f82d02d9SYan Zheng ret = wret;
5948f82d02d9SYan Zheng break;
59492c47e605SYan Zheng }
5950f82d02d9SYan Zheng
59512c47e605SYan Zheng wret = walk_up_tree(trans, root, path, wc, parent_level);
5952f82d02d9SYan Zheng if (wret < 0)
5953f82d02d9SYan Zheng ret = wret;
5954f82d02d9SYan Zheng if (wret != 0)
5955f82d02d9SYan Zheng break;
5956f82d02d9SYan Zheng }
5957f82d02d9SYan Zheng
59582c47e605SYan Zheng kfree(wc);
5959f82d02d9SYan Zheng btrfs_free_path(path);
5960f82d02d9SYan Zheng return ret;
5961f82d02d9SYan Zheng }
5962f82d02d9SYan Zheng
btrfs_error_unpin_extent_range(struct btrfs_fs_info * fs_info,u64 start,u64 end)59632ff7e61eSJeff Mahoney int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
59642ff7e61eSJeff Mahoney u64 start, u64 end)
5965acce952bSliubo {
59662ff7e61eSJeff Mahoney return unpin_extent_range(fs_info, start, end, false);
5967acce952bSliubo }
5968acce952bSliubo
5969499f377fSJeff Mahoney /*
5970499f377fSJeff Mahoney * It used to be that old block groups would be left around forever.
5971499f377fSJeff Mahoney * Iterating over them would be enough to trim unused space. Since we
5972499f377fSJeff Mahoney * now automatically remove them, we also need to iterate over unallocated
5973499f377fSJeff Mahoney * space.
5974499f377fSJeff Mahoney *
5975499f377fSJeff Mahoney * We don't want a transaction for this since the discard may take a
5976499f377fSJeff Mahoney * substantial amount of time. We don't require that a transaction be
5977499f377fSJeff Mahoney * running, but we do need to take a running transaction into account
5978fee7acc3SJeff Mahoney * to ensure that we're not discarding chunks that were released or
5979fee7acc3SJeff Mahoney * allocated in the current transaction.
5980499f377fSJeff Mahoney *
5981499f377fSJeff Mahoney * Holding the chunks lock will prevent other threads from allocating
5982499f377fSJeff Mahoney * or releasing chunks, but it won't prevent a running transaction
5983499f377fSJeff Mahoney * from committing and releasing the memory that the pending chunks
5984499f377fSJeff Mahoney * list head uses. For that, we need to take a reference to the
5985fee7acc3SJeff Mahoney * transaction and hold the commit root sem. We only need to hold
5986fee7acc3SJeff Mahoney * it while performing the free space search since we have already
5987fee7acc3SJeff Mahoney * held back allocations.
5988499f377fSJeff Mahoney */
btrfs_trim_free_extents(struct btrfs_device * device,u64 * trimmed)59898103d10bSNikolay Borisov static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
5990499f377fSJeff Mahoney {
599137f85ec3SQu Wenruo u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
5992499f377fSJeff Mahoney int ret;
5993499f377fSJeff Mahoney
5994499f377fSJeff Mahoney *trimmed = 0;
5995499f377fSJeff Mahoney
59960be88e36SJeff Mahoney /* Discard not supported = nothing to do. */
599770200574SChristoph Hellwig if (!bdev_max_discard_sectors(device->bdev))
59980be88e36SJeff Mahoney return 0;
59990be88e36SJeff Mahoney
600052042d8eSAndrea Gelmini /* Not writable = nothing to do. */
6001ebbede42SAnand Jain if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
6002499f377fSJeff Mahoney return 0;
6003499f377fSJeff Mahoney
6004499f377fSJeff Mahoney /* No free space = nothing to do. */
6005499f377fSJeff Mahoney if (device->total_bytes <= device->bytes_used)
6006499f377fSJeff Mahoney return 0;
6007499f377fSJeff Mahoney
6008499f377fSJeff Mahoney ret = 0;
6009499f377fSJeff Mahoney
6010499f377fSJeff Mahoney while (1) {
6011fb456252SJeff Mahoney struct btrfs_fs_info *fs_info = device->fs_info;
6012499f377fSJeff Mahoney u64 bytes;
6013499f377fSJeff Mahoney
6014499f377fSJeff Mahoney ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
6015499f377fSJeff Mahoney if (ret)
6016fee7acc3SJeff Mahoney break;
6017499f377fSJeff Mahoney
6018929be17aSNikolay Borisov find_first_clear_extent_bit(&device->alloc_state, start,
6019929be17aSNikolay Borisov &start, &end,
6020929be17aSNikolay Borisov CHUNK_TRIMMED | CHUNK_ALLOCATED);
602153460a45SNikolay Borisov
6022c57dd1f2SQu Wenruo /* Check if there are any CHUNK_* bits left */
6023c57dd1f2SQu Wenruo if (start > device->total_bytes) {
6024c57dd1f2SQu Wenruo WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
6025c57dd1f2SQu Wenruo btrfs_warn_in_rcu(fs_info,
6026c57dd1f2SQu Wenruo "ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
6027c57dd1f2SQu Wenruo start, end - start + 1,
6028cb3e217bSQu Wenruo btrfs_dev_name(device),
6029c57dd1f2SQu Wenruo device->total_bytes);
6030c57dd1f2SQu Wenruo mutex_unlock(&fs_info->chunk_mutex);
6031c57dd1f2SQu Wenruo ret = 0;
6032c57dd1f2SQu Wenruo break;
6033c57dd1f2SQu Wenruo }
6034c57dd1f2SQu Wenruo
603537f85ec3SQu Wenruo /* Ensure we skip the reserved space on each device. */
603637f85ec3SQu Wenruo start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
603753460a45SNikolay Borisov
6038929be17aSNikolay Borisov /*
6039929be17aSNikolay Borisov * If find_first_clear_extent_bit find a range that spans the
6040929be17aSNikolay Borisov * end of the device it will set end to -1, in this case it's up
6041929be17aSNikolay Borisov * to the caller to trim the value to the size of the device.
6042929be17aSNikolay Borisov */
6043929be17aSNikolay Borisov end = min(end, device->total_bytes - 1);
604453460a45SNikolay Borisov
6045929be17aSNikolay Borisov len = end - start + 1;
6046499f377fSJeff Mahoney
6047929be17aSNikolay Borisov /* We didn't find any extents */
6048929be17aSNikolay Borisov if (!len) {
6049499f377fSJeff Mahoney mutex_unlock(&fs_info->chunk_mutex);
6050499f377fSJeff Mahoney ret = 0;
6051499f377fSJeff Mahoney break;
6052499f377fSJeff Mahoney }
6053499f377fSJeff Mahoney
60548811133dSNikolay Borisov ret = btrfs_issue_discard(device->bdev, start, len,
60558811133dSNikolay Borisov &bytes);
60568811133dSNikolay Borisov if (!ret)
60570acd32c2SDavid Sterba set_extent_bit(&device->alloc_state, start,
60581d126800SDavid Sterba start + bytes - 1, CHUNK_TRIMMED, NULL);
6059499f377fSJeff Mahoney mutex_unlock(&fs_info->chunk_mutex);
6060499f377fSJeff Mahoney
6061499f377fSJeff Mahoney if (ret)
6062499f377fSJeff Mahoney break;
6063499f377fSJeff Mahoney
6064499f377fSJeff Mahoney start += len;
6065499f377fSJeff Mahoney *trimmed += bytes;
6066499f377fSJeff Mahoney
6067499f377fSJeff Mahoney if (fatal_signal_pending(current)) {
6068499f377fSJeff Mahoney ret = -ERESTARTSYS;
6069499f377fSJeff Mahoney break;
6070499f377fSJeff Mahoney }
6071499f377fSJeff Mahoney
6072499f377fSJeff Mahoney cond_resched();
6073499f377fSJeff Mahoney }
6074499f377fSJeff Mahoney
6075499f377fSJeff Mahoney return ret;
6076499f377fSJeff Mahoney }
6077499f377fSJeff Mahoney
607893bba24dSQu Wenruo /*
607993bba24dSQu Wenruo * Trim the whole filesystem by:
608093bba24dSQu Wenruo * 1) trimming the free space in each block group
608193bba24dSQu Wenruo * 2) trimming the unallocated space on each device
608293bba24dSQu Wenruo *
608393bba24dSQu Wenruo * This will also continue trimming even if a block group or device encounters
608493bba24dSQu Wenruo * an error. The return value will be the last error, or 0 if nothing bad
608593bba24dSQu Wenruo * happens.
608693bba24dSQu Wenruo */
btrfs_trim_fs(struct btrfs_fs_info * fs_info,struct fstrim_range * range)60872ff7e61eSJeff Mahoney int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
6088f7039b1dSLi Dongyang {
608923608d51SAnand Jain struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
609032da5386SDavid Sterba struct btrfs_block_group *cache = NULL;
6091499f377fSJeff Mahoney struct btrfs_device *device;
6092f7039b1dSLi Dongyang u64 group_trimmed;
609307301df7SQu Wenruo u64 range_end = U64_MAX;
6094f7039b1dSLi Dongyang u64 start;
6095f7039b1dSLi Dongyang u64 end;
6096f7039b1dSLi Dongyang u64 trimmed = 0;
609793bba24dSQu Wenruo u64 bg_failed = 0;
609893bba24dSQu Wenruo u64 dev_failed = 0;
609993bba24dSQu Wenruo int bg_ret = 0;
610093bba24dSQu Wenruo int dev_ret = 0;
6101f7039b1dSLi Dongyang int ret = 0;
6102f7039b1dSLi Dongyang
6103f981fec1SJosef Bacik if (range->start == U64_MAX)
6104f981fec1SJosef Bacik return -EINVAL;
6105f981fec1SJosef Bacik
610607301df7SQu Wenruo /*
610707301df7SQu Wenruo * Check range overflow if range->len is set.
610807301df7SQu Wenruo * The default range->len is U64_MAX.
610907301df7SQu Wenruo */
611007301df7SQu Wenruo if (range->len != U64_MAX &&
611107301df7SQu Wenruo check_add_overflow(range->start, range->len, &range_end))
611207301df7SQu Wenruo return -EINVAL;
611307301df7SQu Wenruo
61142cac13e4SLiu Bo cache = btrfs_lookup_first_block_group(fs_info, range->start);
61152e405ad8SJosef Bacik for (; cache; cache = btrfs_next_block_group(cache)) {
6116b3470b5dSDavid Sterba if (cache->start >= range_end) {
6117f7039b1dSLi Dongyang btrfs_put_block_group(cache);
6118f7039b1dSLi Dongyang break;
6119f7039b1dSLi Dongyang }
6120f7039b1dSLi Dongyang
6121b3470b5dSDavid Sterba start = max(range->start, cache->start);
6122b3470b5dSDavid Sterba end = min(range_end, cache->start + cache->length);
6123f7039b1dSLi Dongyang
6124f7039b1dSLi Dongyang if (end - start >= range->minlen) {
612532da5386SDavid Sterba if (!btrfs_block_group_done(cache)) {
6126ced8ecf0SOmar Sandoval ret = btrfs_cache_block_group(cache, true);
61271be41b78SJosef Bacik if (ret) {
612893bba24dSQu Wenruo bg_failed++;
612993bba24dSQu Wenruo bg_ret = ret;
613093bba24dSQu Wenruo continue;
61311be41b78SJosef Bacik }
6132f7039b1dSLi Dongyang }
6133f7039b1dSLi Dongyang ret = btrfs_trim_block_group(cache,
6134f7039b1dSLi Dongyang &group_trimmed,
6135f7039b1dSLi Dongyang start,
6136f7039b1dSLi Dongyang end,
6137f7039b1dSLi Dongyang range->minlen);
6138f7039b1dSLi Dongyang
6139f7039b1dSLi Dongyang trimmed += group_trimmed;
6140f7039b1dSLi Dongyang if (ret) {
614193bba24dSQu Wenruo bg_failed++;
614293bba24dSQu Wenruo bg_ret = ret;
614393bba24dSQu Wenruo continue;
614493bba24dSQu Wenruo }
6145f7039b1dSLi Dongyang }
6146f7039b1dSLi Dongyang }
6147f7039b1dSLi Dongyang
614893bba24dSQu Wenruo if (bg_failed)
614993bba24dSQu Wenruo btrfs_warn(fs_info,
615093bba24dSQu Wenruo "failed to trim %llu block group(s), last error %d",
615193bba24dSQu Wenruo bg_failed, bg_ret);
615223608d51SAnand Jain
615323608d51SAnand Jain mutex_lock(&fs_devices->device_list_mutex);
615423608d51SAnand Jain list_for_each_entry(device, &fs_devices->devices, dev_list) {
615516a200f6SAnand Jain if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
615616a200f6SAnand Jain continue;
615716a200f6SAnand Jain
61588103d10bSNikolay Borisov ret = btrfs_trim_free_extents(device, &group_trimmed);
615993bba24dSQu Wenruo if (ret) {
616093bba24dSQu Wenruo dev_failed++;
616193bba24dSQu Wenruo dev_ret = ret;
6162499f377fSJeff Mahoney break;
616393bba24dSQu Wenruo }
6164499f377fSJeff Mahoney
6165499f377fSJeff Mahoney trimmed += group_trimmed;
6166499f377fSJeff Mahoney }
616723608d51SAnand Jain mutex_unlock(&fs_devices->device_list_mutex);
6168499f377fSJeff Mahoney
616993bba24dSQu Wenruo if (dev_failed)
617093bba24dSQu Wenruo btrfs_warn(fs_info,
617193bba24dSQu Wenruo "failed to trim %llu device(s), last error %d",
617293bba24dSQu Wenruo dev_failed, dev_ret);
6173f7039b1dSLi Dongyang range->len = trimmed;
617493bba24dSQu Wenruo if (bg_ret)
617593bba24dSQu Wenruo return bg_ret;
617693bba24dSQu Wenruo return dev_ret;
6177f7039b1dSLi Dongyang }
6178