1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0
25d4f98a2SYan Zheng /*
35d4f98a2SYan Zheng * Copyright (C) 2009 Oracle. All rights reserved.
45d4f98a2SYan Zheng */
55d4f98a2SYan Zheng
65d4f98a2SYan Zheng #include <linux/sched.h>
75d4f98a2SYan Zheng #include <linux/pagemap.h>
85d4f98a2SYan Zheng #include <linux/writeback.h>
95d4f98a2SYan Zheng #include <linux/blkdev.h>
105d4f98a2SYan Zheng #include <linux/rbtree.h>
115a0e3ad6STejun Heo #include <linux/slab.h>
12726a3421SQu Wenruo #include <linux/error-injection.h>
135d4f98a2SYan Zheng #include "ctree.h"
145d4f98a2SYan Zheng #include "disk-io.h"
155d4f98a2SYan Zheng #include "transaction.h"
165d4f98a2SYan Zheng #include "volumes.h"
175d4f98a2SYan Zheng #include "locking.h"
185d4f98a2SYan Zheng #include "btrfs_inode.h"
195d4f98a2SYan Zheng #include "async-thread.h"
200af3d00bSJosef Bacik #include "free-space-cache.h"
2162b99540SQu Wenruo #include "qgroup.h"
22cdccee99SLiu Bo #include "print-tree.h"
2386736342SJosef Bacik #include "delalloc-space.h"
24aac0023cSJosef Bacik #include "block-group.h"
2519b546d7SQu Wenruo #include "backref.h"
26e9a28dc5SQu Wenruo #include "misc.h"
27c2832898SQu Wenruo #include "subpage.h"
287ae9bd18SNaohiro Aota #include "zoned.h"
2926c2c454SJosef Bacik #include "inode-item.h"
30f1e5c618SJosef Bacik #include "space-info.h"
31c7f13d42SJosef Bacik #include "fs.h"
3207e81dc9SJosef Bacik #include "accessors.h"
33a0231804SJosef Bacik #include "extent-tree.h"
3445c40c8fSJosef Bacik #include "root-tree.h"
357c8ede16SJosef Bacik #include "file-item.h"
3667707479SJosef Bacik #include "relocation.h"
377f0add25SJosef Bacik #include "super.h"
38103c1972SChristoph Hellwig #include "tree-checker.h"
395d4f98a2SYan Zheng
405d4f98a2SYan Zheng /*
410c891389SQu Wenruo * Relocation overview
420c891389SQu Wenruo *
430c891389SQu Wenruo * [What does relocation do]
440c891389SQu Wenruo *
450c891389SQu Wenruo * The objective of relocation is to relocate all extents of the target block
460c891389SQu Wenruo * group to other block groups.
470c891389SQu Wenruo * This is utilized by resize (shrink only), profile converting, compacting
480c891389SQu Wenruo * space, or balance routine to spread chunks over devices.
490c891389SQu Wenruo *
500c891389SQu Wenruo * Before | After
510c891389SQu Wenruo * ------------------------------------------------------------------
520c891389SQu Wenruo * BG A: 10 data extents | BG A: deleted
530c891389SQu Wenruo * BG B: 2 data extents | BG B: 10 data extents (2 old + 8 relocated)
540c891389SQu Wenruo * BG C: 1 extents | BG C: 3 data extents (1 old + 2 relocated)
550c891389SQu Wenruo *
560c891389SQu Wenruo * [How does relocation work]
570c891389SQu Wenruo *
580c891389SQu Wenruo * 1. Mark the target block group read-only
590c891389SQu Wenruo * New extents won't be allocated from the target block group.
600c891389SQu Wenruo *
610c891389SQu Wenruo * 2.1 Record each extent in the target block group
620c891389SQu Wenruo * To build a proper map of extents to be relocated.
630c891389SQu Wenruo *
640c891389SQu Wenruo * 2.2 Build data reloc tree and reloc trees
650c891389SQu Wenruo * Data reloc tree will contain an inode, recording all newly relocated
660c891389SQu Wenruo * data extents.
670c891389SQu Wenruo * There will be only one data reloc tree for one data block group.
680c891389SQu Wenruo *
690c891389SQu Wenruo * Reloc tree will be a special snapshot of its source tree, containing
700c891389SQu Wenruo * relocated tree blocks.
710c891389SQu Wenruo * Each tree referring to a tree block in target block group will get its
720c891389SQu Wenruo * reloc tree built.
730c891389SQu Wenruo *
740c891389SQu Wenruo * 2.3 Swap source tree with its corresponding reloc tree
750c891389SQu Wenruo * Each involved tree only refers to new extents after swap.
760c891389SQu Wenruo *
770c891389SQu Wenruo * 3. Cleanup reloc trees and data reloc tree.
780c891389SQu Wenruo * As old extents in the target block group are still referenced by reloc
790c891389SQu Wenruo * trees, we need to clean them up before really freeing the target block
800c891389SQu Wenruo * group.
810c891389SQu Wenruo *
820c891389SQu Wenruo * The main complexity is in steps 2.2 and 2.3.
830c891389SQu Wenruo *
840c891389SQu Wenruo * The entry point of relocation is relocate_block_group() function.
850c891389SQu Wenruo */
860c891389SQu Wenruo
870647bf56SWang Shilong #define RELOCATION_RESERVED_NODES 256
882a979612SQu Wenruo /*
895d4f98a2SYan Zheng * map address of tree root to tree
905d4f98a2SYan Zheng */
915d4f98a2SYan Zheng struct mapping_node {
92e9a28dc5SQu Wenruo struct {
935d4f98a2SYan Zheng struct rb_node rb_node;
945d4f98a2SYan Zheng u64 bytenr;
95e9a28dc5SQu Wenruo }; /* Use rb_simle_node for search/insert */
965d4f98a2SYan Zheng void *data;
975d4f98a2SYan Zheng };
985d4f98a2SYan Zheng
995d4f98a2SYan Zheng struct mapping_tree {
1005d4f98a2SYan Zheng struct rb_root rb_root;
1015d4f98a2SYan Zheng spinlock_t lock;
1025d4f98a2SYan Zheng };
1035d4f98a2SYan Zheng
1045d4f98a2SYan Zheng /*
1055d4f98a2SYan Zheng * present a tree block to process
1065d4f98a2SYan Zheng */
1075d4f98a2SYan Zheng struct tree_block {
108e9a28dc5SQu Wenruo struct {
1095d4f98a2SYan Zheng struct rb_node rb_node;
1105d4f98a2SYan Zheng u64 bytenr;
111e9a28dc5SQu Wenruo }; /* Use rb_simple_node for search/insert */
112f7ba2d37SJosef Bacik u64 owner;
1135d4f98a2SYan Zheng struct btrfs_key key;
1145d4f98a2SYan Zheng unsigned int level:8;
1155d4f98a2SYan Zheng unsigned int key_ready:1;
1165d4f98a2SYan Zheng };
1175d4f98a2SYan Zheng
1180257bb82SYan, Zheng #define MAX_EXTENTS 128
1190257bb82SYan, Zheng
1200257bb82SYan, Zheng struct file_extent_cluster {
1210257bb82SYan, Zheng u64 start;
1220257bb82SYan, Zheng u64 end;
1230257bb82SYan, Zheng u64 boundary[MAX_EXTENTS];
1240257bb82SYan, Zheng unsigned int nr;
1250257bb82SYan, Zheng };
1260257bb82SYan, Zheng
1275d4f98a2SYan Zheng struct reloc_control {
1285d4f98a2SYan Zheng /* block group to relocate */
12932da5386SDavid Sterba struct btrfs_block_group *block_group;
1305d4f98a2SYan Zheng /* extent tree */
1315d4f98a2SYan Zheng struct btrfs_root *extent_root;
1325d4f98a2SYan Zheng /* inode for moving data */
1335d4f98a2SYan Zheng struct inode *data_inode;
1343fd0a558SYan, Zheng
1353fd0a558SYan, Zheng struct btrfs_block_rsv *block_rsv;
1363fd0a558SYan, Zheng
137a26195a5SQu Wenruo struct btrfs_backref_cache backref_cache;
1383fd0a558SYan, Zheng
1393fd0a558SYan, Zheng struct file_extent_cluster cluster;
1405d4f98a2SYan Zheng /* tree blocks have been processed */
1415d4f98a2SYan Zheng struct extent_io_tree processed_blocks;
1425d4f98a2SYan Zheng /* map start of tree root to corresponding reloc tree */
1435d4f98a2SYan Zheng struct mapping_tree reloc_root_tree;
1445d4f98a2SYan Zheng /* list of reloc trees */
1455d4f98a2SYan Zheng struct list_head reloc_roots;
146d2311e69SQu Wenruo /* list of subvolume trees that get relocated */
147d2311e69SQu Wenruo struct list_head dirty_subvol_roots;
1483fd0a558SYan, Zheng /* size of metadata reservation for merging reloc trees */
1493fd0a558SYan, Zheng u64 merging_rsv_size;
1503fd0a558SYan, Zheng /* size of relocated tree nodes */
1513fd0a558SYan, Zheng u64 nodes_relocated;
1520647bf56SWang Shilong /* reserved size for block group relocation*/
1530647bf56SWang Shilong u64 reserved_bytes;
1543fd0a558SYan, Zheng
1555d4f98a2SYan Zheng u64 search_start;
1565d4f98a2SYan Zheng u64 extents_found;
1573fd0a558SYan, Zheng
1583fd0a558SYan, Zheng unsigned int stage:8;
1593fd0a558SYan, Zheng unsigned int create_reloc_tree:1;
1603fd0a558SYan, Zheng unsigned int merge_reloc_tree:1;
1615d4f98a2SYan Zheng unsigned int found_file_extent:1;
1625d4f98a2SYan Zheng };
1635d4f98a2SYan Zheng
1645d4f98a2SYan Zheng /* stages of data relocation */
1655d4f98a2SYan Zheng #define MOVE_DATA_EXTENTS 0
1665d4f98a2SYan Zheng #define UPDATE_DATA_PTRS 1
1675d4f98a2SYan Zheng
mark_block_processed(struct reloc_control * rc,struct btrfs_backref_node * node)1689569cc20SQu Wenruo static void mark_block_processed(struct reloc_control *rc,
169a26195a5SQu Wenruo struct btrfs_backref_node *node)
1709569cc20SQu Wenruo {
1719569cc20SQu Wenruo u32 blocksize;
1729569cc20SQu Wenruo
1739569cc20SQu Wenruo if (node->level == 0 ||
1749569cc20SQu Wenruo in_range(node->bytenr, rc->block_group->start,
1759569cc20SQu Wenruo rc->block_group->length)) {
1769569cc20SQu Wenruo blocksize = rc->extent_root->fs_info->nodesize;
1770acd32c2SDavid Sterba set_extent_bit(&rc->processed_blocks, node->bytenr,
1781d126800SDavid Sterba node->bytenr + blocksize - 1, EXTENT_DIRTY, NULL);
1799569cc20SQu Wenruo }
1809569cc20SQu Wenruo node->processed = 1;
1819569cc20SQu Wenruo }
1829569cc20SQu Wenruo
1835d4f98a2SYan Zheng
mapping_tree_init(struct mapping_tree * tree)1845d4f98a2SYan Zheng static void mapping_tree_init(struct mapping_tree *tree)
1855d4f98a2SYan Zheng {
1866bef4d31SEric Paris tree->rb_root = RB_ROOT;
1875d4f98a2SYan Zheng spin_lock_init(&tree->lock);
1885d4f98a2SYan Zheng }
1895d4f98a2SYan Zheng
1905d4f98a2SYan Zheng /*
1915d4f98a2SYan Zheng * walk up backref nodes until reach node presents tree root
1925d4f98a2SYan Zheng */
walk_up_backref(struct btrfs_backref_node * node,struct btrfs_backref_edge * edges[],int * index)193a26195a5SQu Wenruo static struct btrfs_backref_node *walk_up_backref(
194a26195a5SQu Wenruo struct btrfs_backref_node *node,
195a26195a5SQu Wenruo struct btrfs_backref_edge *edges[], int *index)
1965d4f98a2SYan Zheng {
197a26195a5SQu Wenruo struct btrfs_backref_edge *edge;
1985d4f98a2SYan Zheng int idx = *index;
1995d4f98a2SYan Zheng
2005d4f98a2SYan Zheng while (!list_empty(&node->upper)) {
2015d4f98a2SYan Zheng edge = list_entry(node->upper.next,
202a26195a5SQu Wenruo struct btrfs_backref_edge, list[LOWER]);
2035d4f98a2SYan Zheng edges[idx++] = edge;
2045d4f98a2SYan Zheng node = edge->node[UPPER];
2055d4f98a2SYan Zheng }
2063fd0a558SYan, Zheng BUG_ON(node->detached);
2075d4f98a2SYan Zheng *index = idx;
2085d4f98a2SYan Zheng return node;
2095d4f98a2SYan Zheng }
2105d4f98a2SYan Zheng
2115d4f98a2SYan Zheng /*
2125d4f98a2SYan Zheng * walk down backref nodes to find start of next reference path
2135d4f98a2SYan Zheng */
walk_down_backref(struct btrfs_backref_edge * edges[],int * index)214a26195a5SQu Wenruo static struct btrfs_backref_node *walk_down_backref(
215a26195a5SQu Wenruo struct btrfs_backref_edge *edges[], int *index)
2165d4f98a2SYan Zheng {
217a26195a5SQu Wenruo struct btrfs_backref_edge *edge;
218a26195a5SQu Wenruo struct btrfs_backref_node *lower;
2195d4f98a2SYan Zheng int idx = *index;
2205d4f98a2SYan Zheng
2215d4f98a2SYan Zheng while (idx > 0) {
2225d4f98a2SYan Zheng edge = edges[idx - 1];
2235d4f98a2SYan Zheng lower = edge->node[LOWER];
2245d4f98a2SYan Zheng if (list_is_last(&edge->list[LOWER], &lower->upper)) {
2255d4f98a2SYan Zheng idx--;
2265d4f98a2SYan Zheng continue;
2275d4f98a2SYan Zheng }
2285d4f98a2SYan Zheng edge = list_entry(edge->list[LOWER].next,
229a26195a5SQu Wenruo struct btrfs_backref_edge, list[LOWER]);
2305d4f98a2SYan Zheng edges[idx - 1] = edge;
2315d4f98a2SYan Zheng *index = idx;
2325d4f98a2SYan Zheng return edge->node[UPPER];
2335d4f98a2SYan Zheng }
2345d4f98a2SYan Zheng *index = 0;
2355d4f98a2SYan Zheng return NULL;
2365d4f98a2SYan Zheng }
2375d4f98a2SYan Zheng
reloc_root_is_dead(const struct btrfs_root * root)2384dc6ea8bSDavid Sterba static bool reloc_root_is_dead(const struct btrfs_root *root)
2396282675eSQu Wenruo {
2406282675eSQu Wenruo /*
2416282675eSQu Wenruo * Pair with set_bit/clear_bit in clean_dirty_subvols and
2426282675eSQu Wenruo * btrfs_update_reloc_root. We need to see the updated bit before
2436282675eSQu Wenruo * trying to access reloc_root
2446282675eSQu Wenruo */
2456282675eSQu Wenruo smp_rmb();
2466282675eSQu Wenruo if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
2476282675eSQu Wenruo return true;
2486282675eSQu Wenruo return false;
2496282675eSQu Wenruo }
2506282675eSQu Wenruo
2516282675eSQu Wenruo /*
2526282675eSQu Wenruo * Check if this subvolume tree has valid reloc tree.
2536282675eSQu Wenruo *
2546282675eSQu Wenruo * Reloc tree after swap is considered dead, thus not considered as valid.
2556282675eSQu Wenruo * This is enough for most callers, as they don't distinguish dead reloc root
25655465730SQu Wenruo * from no reloc root. But btrfs_should_ignore_reloc_root() below is a
25755465730SQu Wenruo * special case.
2586282675eSQu Wenruo */
have_reloc_root(const struct btrfs_root * root)2594dc6ea8bSDavid Sterba static bool have_reloc_root(const struct btrfs_root *root)
2606282675eSQu Wenruo {
2616282675eSQu Wenruo if (reloc_root_is_dead(root))
2626282675eSQu Wenruo return false;
2636282675eSQu Wenruo if (!root->reloc_root)
2646282675eSQu Wenruo return false;
2656282675eSQu Wenruo return true;
2666282675eSQu Wenruo }
267f2a97a9dSDavid Sterba
btrfs_should_ignore_reloc_root(const struct btrfs_root * root)2684dc6ea8bSDavid Sterba bool btrfs_should_ignore_reloc_root(const struct btrfs_root *root)
2693fd0a558SYan, Zheng {
2703fd0a558SYan, Zheng struct btrfs_root *reloc_root;
2713fd0a558SYan, Zheng
27292a7cc42SQu Wenruo if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
2735ae94c63SDavid Sterba return false;
2743fd0a558SYan, Zheng
2756282675eSQu Wenruo /* This root has been merged with its reloc tree, we can ignore it */
2766282675eSQu Wenruo if (reloc_root_is_dead(root))
2775ae94c63SDavid Sterba return true;
2786282675eSQu Wenruo
2793fd0a558SYan, Zheng reloc_root = root->reloc_root;
2803fd0a558SYan, Zheng if (!reloc_root)
2815ae94c63SDavid Sterba return false;
2823fd0a558SYan, Zheng
2834d4225fcSJosef Bacik if (btrfs_header_generation(reloc_root->commit_root) ==
2844d4225fcSJosef Bacik root->fs_info->running_transaction->transid)
2855ae94c63SDavid Sterba return false;
2863fd0a558SYan, Zheng /*
2875ae94c63SDavid Sterba * If there is reloc tree and it was created in previous transaction
2885ae94c63SDavid Sterba * backref lookup can find the reloc tree, so backref node for the fs
2895ae94c63SDavid Sterba * tree root is useless for relocation.
2903fd0a558SYan, Zheng */
2915ae94c63SDavid Sterba return true;
2923fd0a558SYan, Zheng }
29355465730SQu Wenruo
2945d4f98a2SYan Zheng /*
2955d4f98a2SYan Zheng * find reloc tree by address of tree root
2965d4f98a2SYan Zheng */
find_reloc_root(struct btrfs_fs_info * fs_info,u64 bytenr)2972433bea5SQu Wenruo struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
2985d4f98a2SYan Zheng {
2992433bea5SQu Wenruo struct reloc_control *rc = fs_info->reloc_ctl;
3005d4f98a2SYan Zheng struct rb_node *rb_node;
3015d4f98a2SYan Zheng struct mapping_node *node;
3025d4f98a2SYan Zheng struct btrfs_root *root = NULL;
3035d4f98a2SYan Zheng
3042433bea5SQu Wenruo ASSERT(rc);
3055d4f98a2SYan Zheng spin_lock(&rc->reloc_root_tree.lock);
306e9a28dc5SQu Wenruo rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
3075d4f98a2SYan Zheng if (rb_node) {
3085d4f98a2SYan Zheng node = rb_entry(rb_node, struct mapping_node, rb_node);
3090d031dc4SYu Zhe root = node->data;
3105d4f98a2SYan Zheng }
3115d4f98a2SYan Zheng spin_unlock(&rc->reloc_root_tree.lock);
31200246528SJosef Bacik return btrfs_grab_root(root);
3135d4f98a2SYan Zheng }
3145d4f98a2SYan Zheng
3155d4f98a2SYan Zheng /*
31629db137bSQu Wenruo * For useless nodes, do two major clean ups:
31729db137bSQu Wenruo *
31829db137bSQu Wenruo * - Cleanup the children edges and nodes
31929db137bSQu Wenruo * If child node is also orphan (no parent) during cleanup, then the child
32029db137bSQu Wenruo * node will also be cleaned up.
32129db137bSQu Wenruo *
32229db137bSQu Wenruo * - Freeing up leaves (level 0), keeps nodes detached
32329db137bSQu Wenruo * For nodes, the node is still cached as "detached"
32429db137bSQu Wenruo *
32529db137bSQu Wenruo * Return false if @node is not in the @useless_nodes list.
32629db137bSQu Wenruo * Return true if @node is in the @useless_nodes list.
32729db137bSQu Wenruo */
handle_useless_nodes(struct reloc_control * rc,struct btrfs_backref_node * node)32829db137bSQu Wenruo static bool handle_useless_nodes(struct reloc_control *rc,
329a26195a5SQu Wenruo struct btrfs_backref_node *node)
33029db137bSQu Wenruo {
331a26195a5SQu Wenruo struct btrfs_backref_cache *cache = &rc->backref_cache;
33229db137bSQu Wenruo struct list_head *useless_node = &cache->useless_node;
33329db137bSQu Wenruo bool ret = false;
33429db137bSQu Wenruo
33529db137bSQu Wenruo while (!list_empty(useless_node)) {
336a26195a5SQu Wenruo struct btrfs_backref_node *cur;
33729db137bSQu Wenruo
338a26195a5SQu Wenruo cur = list_first_entry(useless_node, struct btrfs_backref_node,
33929db137bSQu Wenruo list);
34029db137bSQu Wenruo list_del_init(&cur->list);
34129db137bSQu Wenruo
34229db137bSQu Wenruo /* Only tree root nodes can be added to @useless_nodes */
34329db137bSQu Wenruo ASSERT(list_empty(&cur->upper));
34429db137bSQu Wenruo
34529db137bSQu Wenruo if (cur == node)
34629db137bSQu Wenruo ret = true;
34729db137bSQu Wenruo
34829db137bSQu Wenruo /* The node is the lowest node */
34929db137bSQu Wenruo if (cur->lowest) {
35029db137bSQu Wenruo list_del_init(&cur->lower);
35129db137bSQu Wenruo cur->lowest = 0;
35229db137bSQu Wenruo }
35329db137bSQu Wenruo
35429db137bSQu Wenruo /* Cleanup the lower edges */
35529db137bSQu Wenruo while (!list_empty(&cur->lower)) {
356a26195a5SQu Wenruo struct btrfs_backref_edge *edge;
357a26195a5SQu Wenruo struct btrfs_backref_node *lower;
35829db137bSQu Wenruo
35929db137bSQu Wenruo edge = list_entry(cur->lower.next,
360a26195a5SQu Wenruo struct btrfs_backref_edge, list[UPPER]);
36129db137bSQu Wenruo list_del(&edge->list[UPPER]);
36229db137bSQu Wenruo list_del(&edge->list[LOWER]);
36329db137bSQu Wenruo lower = edge->node[LOWER];
364741188d3SQu Wenruo btrfs_backref_free_edge(cache, edge);
36529db137bSQu Wenruo
36629db137bSQu Wenruo /* Child node is also orphan, queue for cleanup */
36729db137bSQu Wenruo if (list_empty(&lower->upper))
36829db137bSQu Wenruo list_add(&lower->list, useless_node);
36929db137bSQu Wenruo }
37029db137bSQu Wenruo /* Mark this block processed for relocation */
37129db137bSQu Wenruo mark_block_processed(rc, cur);
37229db137bSQu Wenruo
37329db137bSQu Wenruo /*
37429db137bSQu Wenruo * Backref nodes for tree leaves are deleted from the cache.
37529db137bSQu Wenruo * Backref nodes for upper level tree blocks are left in the
37629db137bSQu Wenruo * cache to avoid unnecessary backref lookup.
37729db137bSQu Wenruo */
37829db137bSQu Wenruo if (cur->level > 0) {
37929db137bSQu Wenruo list_add(&cur->list, &cache->detached);
38029db137bSQu Wenruo cur->detached = 1;
38129db137bSQu Wenruo } else {
38229db137bSQu Wenruo rb_erase(&cur->rb_node, &cache->rb_root);
383741188d3SQu Wenruo btrfs_backref_free_node(cache, cur);
38429db137bSQu Wenruo }
38529db137bSQu Wenruo }
38629db137bSQu Wenruo return ret;
38729db137bSQu Wenruo }
38829db137bSQu Wenruo
38929db137bSQu Wenruo /*
390e7d571c7SQu Wenruo * Build backref tree for a given tree block. Root of the backref tree
391e7d571c7SQu Wenruo * corresponds the tree block, leaves of the backref tree correspond roots of
392e7d571c7SQu Wenruo * b-trees that reference the tree block.
393e7d571c7SQu Wenruo *
394e7d571c7SQu Wenruo * The basic idea of this function is check backrefs of a given block to find
395e7d571c7SQu Wenruo * upper level blocks that reference the block, and then check backrefs of
396e7d571c7SQu Wenruo * these upper level blocks recursively. The recursion stops when tree root is
397e7d571c7SQu Wenruo * reached or backrefs for the block is cached.
398e7d571c7SQu Wenruo *
399e7d571c7SQu Wenruo * NOTE: if we find that backrefs for a block are cached, we know backrefs for
400e7d571c7SQu Wenruo * all upper level blocks that directly/indirectly reference the block are also
401e7d571c7SQu Wenruo * cached.
402e7d571c7SQu Wenruo */
build_backref_tree(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_key * node_key,int level,u64 bytenr)403a26195a5SQu Wenruo static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
404eb96e221SFilipe Manana struct btrfs_trans_handle *trans,
405e7d571c7SQu Wenruo struct reloc_control *rc, struct btrfs_key *node_key,
406e7d571c7SQu Wenruo int level, u64 bytenr)
407e7d571c7SQu Wenruo {
408e7d571c7SQu Wenruo struct btrfs_backref_iter *iter;
409a26195a5SQu Wenruo struct btrfs_backref_cache *cache = &rc->backref_cache;
410e7d571c7SQu Wenruo /* For searching parent of TREE_BLOCK_REF */
411e7d571c7SQu Wenruo struct btrfs_path *path;
412a26195a5SQu Wenruo struct btrfs_backref_node *cur;
413a26195a5SQu Wenruo struct btrfs_backref_node *node = NULL;
414a26195a5SQu Wenruo struct btrfs_backref_edge *edge;
415e7d571c7SQu Wenruo int ret;
416e7d571c7SQu Wenruo int err = 0;
417e7d571c7SQu Wenruo
418d68194b2SDavid Sterba iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info);
419e7d571c7SQu Wenruo if (!iter)
420e7d571c7SQu Wenruo return ERR_PTR(-ENOMEM);
421e7d571c7SQu Wenruo path = btrfs_alloc_path();
422e7d571c7SQu Wenruo if (!path) {
423e7d571c7SQu Wenruo err = -ENOMEM;
424e7d571c7SQu Wenruo goto out;
425e7d571c7SQu Wenruo }
426e7d571c7SQu Wenruo
427b1818dabSQu Wenruo node = btrfs_backref_alloc_node(cache, bytenr, level);
428e7d571c7SQu Wenruo if (!node) {
429e7d571c7SQu Wenruo err = -ENOMEM;
430e7d571c7SQu Wenruo goto out;
431e7d571c7SQu Wenruo }
432e7d571c7SQu Wenruo
433e7d571c7SQu Wenruo node->lowest = 1;
434e7d571c7SQu Wenruo cur = node;
435e7d571c7SQu Wenruo
436e7d571c7SQu Wenruo /* Breadth-first search to build backref cache */
437e7d571c7SQu Wenruo do {
438eb96e221SFilipe Manana ret = btrfs_backref_add_tree_node(trans, cache, path, iter,
439eb96e221SFilipe Manana node_key, cur);
440e7d571c7SQu Wenruo if (ret < 0) {
441e7d571c7SQu Wenruo err = ret;
442e7d571c7SQu Wenruo goto out;
443e7d571c7SQu Wenruo }
444e7d571c7SQu Wenruo edge = list_first_entry_or_null(&cache->pending_edge,
445a26195a5SQu Wenruo struct btrfs_backref_edge, list[UPPER]);
446e7d571c7SQu Wenruo /*
447e7d571c7SQu Wenruo * The pending list isn't empty, take the first block to
448e7d571c7SQu Wenruo * process
449e7d571c7SQu Wenruo */
450e7d571c7SQu Wenruo if (edge) {
4515d4f98a2SYan Zheng list_del_init(&edge->list[UPPER]);
4525d4f98a2SYan Zheng cur = edge->node[UPPER];
4535d4f98a2SYan Zheng }
454e7d571c7SQu Wenruo } while (edge);
4555d4f98a2SYan Zheng
4561f872924SQu Wenruo /* Finish the upper linkage of newly added edges/nodes */
457fc997ed0SQu Wenruo ret = btrfs_backref_finish_upper_links(cache, node);
4581f872924SQu Wenruo if (ret < 0) {
4591f872924SQu Wenruo err = ret;
46075bfb9afSJosef Bacik goto out;
46175bfb9afSJosef Bacik }
46275bfb9afSJosef Bacik
46329db137bSQu Wenruo if (handle_useless_nodes(rc, node))
4643fd0a558SYan, Zheng node = NULL;
4655d4f98a2SYan Zheng out:
46671f572a9SQu Wenruo btrfs_backref_iter_free(iter);
46771f572a9SQu Wenruo btrfs_free_path(path);
4685d4f98a2SYan Zheng if (err) {
4691b23ea18SQu Wenruo btrfs_backref_error_cleanup(cache, node);
4705d4f98a2SYan Zheng return ERR_PTR(err);
4715d4f98a2SYan Zheng }
47275bfb9afSJosef Bacik ASSERT(!node || !node->detached);
47384780289SQu Wenruo ASSERT(list_empty(&cache->useless_node) &&
47484780289SQu Wenruo list_empty(&cache->pending_edge));
4755d4f98a2SYan Zheng return node;
4765d4f98a2SYan Zheng }
4775d4f98a2SYan Zheng
4785d4f98a2SYan Zheng /*
4793fd0a558SYan, Zheng * helper to add backref node for the newly created snapshot.
4803fd0a558SYan, Zheng * the backref node is created by cloning backref node that
4813fd0a558SYan, Zheng * corresponds to root of source tree
4823fd0a558SYan, Zheng */
clone_backref_node(struct btrfs_trans_handle * trans,struct reloc_control * rc,const struct btrfs_root * src,struct btrfs_root * dest)4833fd0a558SYan, Zheng static int clone_backref_node(struct btrfs_trans_handle *trans,
4843fd0a558SYan, Zheng struct reloc_control *rc,
4854dc6ea8bSDavid Sterba const struct btrfs_root *src,
4863fd0a558SYan, Zheng struct btrfs_root *dest)
4873fd0a558SYan, Zheng {
4883fd0a558SYan, Zheng struct btrfs_root *reloc_root = src->reloc_root;
489a26195a5SQu Wenruo struct btrfs_backref_cache *cache = &rc->backref_cache;
490a26195a5SQu Wenruo struct btrfs_backref_node *node = NULL;
491a26195a5SQu Wenruo struct btrfs_backref_node *new_node;
492a26195a5SQu Wenruo struct btrfs_backref_edge *edge;
493a26195a5SQu Wenruo struct btrfs_backref_edge *new_edge;
4943fd0a558SYan, Zheng struct rb_node *rb_node;
4953fd0a558SYan, Zheng
496e9a28dc5SQu Wenruo rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
4973fd0a558SYan, Zheng if (rb_node) {
498a26195a5SQu Wenruo node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
4993fd0a558SYan, Zheng if (node->detached)
5003fd0a558SYan, Zheng node = NULL;
5013fd0a558SYan, Zheng else
5023fd0a558SYan, Zheng BUG_ON(node->new_bytenr != reloc_root->node->start);
5033fd0a558SYan, Zheng }
5043fd0a558SYan, Zheng
5053fd0a558SYan, Zheng if (!node) {
506e9a28dc5SQu Wenruo rb_node = rb_simple_search(&cache->rb_root,
5073fd0a558SYan, Zheng reloc_root->commit_root->start);
5083fd0a558SYan, Zheng if (rb_node) {
509a26195a5SQu Wenruo node = rb_entry(rb_node, struct btrfs_backref_node,
5103fd0a558SYan, Zheng rb_node);
5113fd0a558SYan, Zheng BUG_ON(node->detached);
5123fd0a558SYan, Zheng }
5133fd0a558SYan, Zheng }
5143fd0a558SYan, Zheng
5153fd0a558SYan, Zheng if (!node)
5163fd0a558SYan, Zheng return 0;
5173fd0a558SYan, Zheng
518b1818dabSQu Wenruo new_node = btrfs_backref_alloc_node(cache, dest->node->start,
519b1818dabSQu Wenruo node->level);
5203fd0a558SYan, Zheng if (!new_node)
5213fd0a558SYan, Zheng return -ENOMEM;
5223fd0a558SYan, Zheng
5233fd0a558SYan, Zheng new_node->lowest = node->lowest;
5246848ad64SYan, Zheng new_node->checked = 1;
52500246528SJosef Bacik new_node->root = btrfs_grab_root(dest);
5260b530bc5SJosef Bacik ASSERT(new_node->root);
5273fd0a558SYan, Zheng
5283fd0a558SYan, Zheng if (!node->lowest) {
5293fd0a558SYan, Zheng list_for_each_entry(edge, &node->lower, list[UPPER]) {
53047254d07SQu Wenruo new_edge = btrfs_backref_alloc_edge(cache);
5313fd0a558SYan, Zheng if (!new_edge)
5323fd0a558SYan, Zheng goto fail;
5333fd0a558SYan, Zheng
534f39911e5SQu Wenruo btrfs_backref_link_edge(new_edge, edge->node[LOWER],
535f39911e5SQu Wenruo new_node, LINK_UPPER);
5363fd0a558SYan, Zheng }
53776b9e23dSMiao Xie } else {
53876b9e23dSMiao Xie list_add_tail(&new_node->lower, &cache->leaves);
5393fd0a558SYan, Zheng }
5403fd0a558SYan, Zheng
541e9a28dc5SQu Wenruo rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
5423fd0a558SYan, Zheng &new_node->rb_node);
54343c04fb1SJeff Mahoney if (rb_node)
544982c92cbSQu Wenruo btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
5453fd0a558SYan, Zheng
5463fd0a558SYan, Zheng if (!new_node->lowest) {
5473fd0a558SYan, Zheng list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
5483fd0a558SYan, Zheng list_add_tail(&new_edge->list[LOWER],
5493fd0a558SYan, Zheng &new_edge->node[LOWER]->upper);
5503fd0a558SYan, Zheng }
5513fd0a558SYan, Zheng }
5523fd0a558SYan, Zheng return 0;
5533fd0a558SYan, Zheng fail:
5543fd0a558SYan, Zheng while (!list_empty(&new_node->lower)) {
5553fd0a558SYan, Zheng new_edge = list_entry(new_node->lower.next,
556a26195a5SQu Wenruo struct btrfs_backref_edge, list[UPPER]);
5573fd0a558SYan, Zheng list_del(&new_edge->list[UPPER]);
558741188d3SQu Wenruo btrfs_backref_free_edge(cache, new_edge);
5593fd0a558SYan, Zheng }
560741188d3SQu Wenruo btrfs_backref_free_node(cache, new_node);
5613fd0a558SYan, Zheng return -ENOMEM;
5623fd0a558SYan, Zheng }
5633fd0a558SYan, Zheng
5643fd0a558SYan, Zheng /*
5655d4f98a2SYan Zheng * helper to add 'address of tree root -> reloc tree' mapping
5665d4f98a2SYan Zheng */
__add_reloc_root(struct btrfs_root * root)567ffd7b339SJeff Mahoney static int __must_check __add_reloc_root(struct btrfs_root *root)
5685d4f98a2SYan Zheng {
5690b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
5705d4f98a2SYan Zheng struct rb_node *rb_node;
5715d4f98a2SYan Zheng struct mapping_node *node;
5720b246afaSJeff Mahoney struct reloc_control *rc = fs_info->reloc_ctl;
5735d4f98a2SYan Zheng
5745d4f98a2SYan Zheng node = kmalloc(sizeof(*node), GFP_NOFS);
575ffd7b339SJeff Mahoney if (!node)
576ffd7b339SJeff Mahoney return -ENOMEM;
5775d4f98a2SYan Zheng
578ea287ab1SJosef Bacik node->bytenr = root->commit_root->start;
5795d4f98a2SYan Zheng node->data = root;
5805d4f98a2SYan Zheng
5815d4f98a2SYan Zheng spin_lock(&rc->reloc_root_tree.lock);
582e9a28dc5SQu Wenruo rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
5835d4f98a2SYan Zheng node->bytenr, &node->rb_node);
5845d4f98a2SYan Zheng spin_unlock(&rc->reloc_root_tree.lock);
585ffd7b339SJeff Mahoney if (rb_node) {
58657a304cfSJosef Bacik btrfs_err(fs_info,
5875d163e0eSJeff Mahoney "Duplicate root found for start=%llu while inserting into relocation tree",
5885d163e0eSJeff Mahoney node->bytenr);
58957a304cfSJosef Bacik return -EEXIST;
590ffd7b339SJeff Mahoney }
5915d4f98a2SYan Zheng
5925d4f98a2SYan Zheng list_add_tail(&root->root_list, &rc->reloc_roots);
5935d4f98a2SYan Zheng return 0;
5945d4f98a2SYan Zheng }
5955d4f98a2SYan Zheng
5965d4f98a2SYan Zheng /*
597c974c464SWang Shilong * helper to delete the 'address of tree root -> reloc tree'
5985d4f98a2SYan Zheng * mapping
5995d4f98a2SYan Zheng */
__del_reloc_root(struct btrfs_root * root)600c974c464SWang Shilong static void __del_reloc_root(struct btrfs_root *root)
6015d4f98a2SYan Zheng {
6020b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
6035d4f98a2SYan Zheng struct rb_node *rb_node;
6045d4f98a2SYan Zheng struct mapping_node *node = NULL;
6050b246afaSJeff Mahoney struct reloc_control *rc = fs_info->reloc_ctl;
606f44deb74SJosef Bacik bool put_ref = false;
6075d4f98a2SYan Zheng
60865c6e82bSQu Wenruo if (rc && root->node) {
6095d4f98a2SYan Zheng spin_lock(&rc->reloc_root_tree.lock);
610e9a28dc5SQu Wenruo rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
611ea287ab1SJosef Bacik root->commit_root->start);
612c974c464SWang Shilong if (rb_node) {
613c974c464SWang Shilong node = rb_entry(rb_node, struct mapping_node, rb_node);
614c974c464SWang Shilong rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
615ea287ab1SJosef Bacik RB_CLEAR_NODE(&node->rb_node);
616c974c464SWang Shilong }
617c974c464SWang Shilong spin_unlock(&rc->reloc_root_tree.lock);
618c78a10aeSJosef Bacik ASSERT(!node || (struct btrfs_root *)node->data == root);
619389305b2SQu Wenruo }
620c974c464SWang Shilong
621f44deb74SJosef Bacik /*
622f44deb74SJosef Bacik * We only put the reloc root here if it's on the list. There's a lot
623f44deb74SJosef Bacik * of places where the pattern is to splice the rc->reloc_roots, process
624f44deb74SJosef Bacik * the reloc roots, and then add the reloc root back onto
625f44deb74SJosef Bacik * rc->reloc_roots. If we call __del_reloc_root while it's off of the
626f44deb74SJosef Bacik * list we don't want the reference being dropped, because the guy
627f44deb74SJosef Bacik * messing with the list is in charge of the reference.
628f44deb74SJosef Bacik */
6290b246afaSJeff Mahoney spin_lock(&fs_info->trans_lock);
630f44deb74SJosef Bacik if (!list_empty(&root->root_list)) {
631f44deb74SJosef Bacik put_ref = true;
632c974c464SWang Shilong list_del_init(&root->root_list);
633f44deb74SJosef Bacik }
6340b246afaSJeff Mahoney spin_unlock(&fs_info->trans_lock);
635f44deb74SJosef Bacik if (put_ref)
636f44deb74SJosef Bacik btrfs_put_root(root);
637c974c464SWang Shilong kfree(node);
638c974c464SWang Shilong }
639c974c464SWang Shilong
640c974c464SWang Shilong /*
641c974c464SWang Shilong * helper to update the 'address of tree root -> reloc tree'
642c974c464SWang Shilong * mapping
643c974c464SWang Shilong */
__update_reloc_root(struct btrfs_root * root)644ea287ab1SJosef Bacik static int __update_reloc_root(struct btrfs_root *root)
645c974c464SWang Shilong {
6460b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
647c974c464SWang Shilong struct rb_node *rb_node;
648c974c464SWang Shilong struct mapping_node *node = NULL;
6490b246afaSJeff Mahoney struct reloc_control *rc = fs_info->reloc_ctl;
650c974c464SWang Shilong
651c974c464SWang Shilong spin_lock(&rc->reloc_root_tree.lock);
652e9a28dc5SQu Wenruo rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
653ea287ab1SJosef Bacik root->commit_root->start);
6545d4f98a2SYan Zheng if (rb_node) {
6555d4f98a2SYan Zheng node = rb_entry(rb_node, struct mapping_node, rb_node);
6565d4f98a2SYan Zheng rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
6575d4f98a2SYan Zheng }
6585d4f98a2SYan Zheng spin_unlock(&rc->reloc_root_tree.lock);
6595d4f98a2SYan Zheng
6608f71f3e0SLiu Bo if (!node)
6618f71f3e0SLiu Bo return 0;
6625d4f98a2SYan Zheng BUG_ON((struct btrfs_root *)node->data != root);
6635d4f98a2SYan Zheng
6645d4f98a2SYan Zheng spin_lock(&rc->reloc_root_tree.lock);
665ea287ab1SJosef Bacik node->bytenr = root->node->start;
666e9a28dc5SQu Wenruo rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
6675d4f98a2SYan Zheng node->bytenr, &node->rb_node);
6685d4f98a2SYan Zheng spin_unlock(&rc->reloc_root_tree.lock);
66943c04fb1SJeff Mahoney if (rb_node)
670982c92cbSQu Wenruo btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
6715d4f98a2SYan Zheng return 0;
6725d4f98a2SYan Zheng }
6735d4f98a2SYan Zheng
create_reloc_root(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 objectid)6743fd0a558SYan, Zheng static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
6753fd0a558SYan, Zheng struct btrfs_root *root, u64 objectid)
6765d4f98a2SYan Zheng {
6770b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
6785d4f98a2SYan Zheng struct btrfs_root *reloc_root;
6795d4f98a2SYan Zheng struct extent_buffer *eb;
6805d4f98a2SYan Zheng struct btrfs_root_item *root_item;
6815d4f98a2SYan Zheng struct btrfs_key root_key;
68284c50ba5SJosef Bacik int ret = 0;
68384c50ba5SJosef Bacik bool must_abort = false;
6845d4f98a2SYan Zheng
6855d4f98a2SYan Zheng root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
68684c50ba5SJosef Bacik if (!root_item)
68784c50ba5SJosef Bacik return ERR_PTR(-ENOMEM);
6885d4f98a2SYan Zheng
6895d4f98a2SYan Zheng root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
6905d4f98a2SYan Zheng root_key.type = BTRFS_ROOT_ITEM_KEY;
6913fd0a558SYan, Zheng root_key.offset = objectid;
6925d4f98a2SYan Zheng
6933fd0a558SYan, Zheng if (root->root_key.objectid == objectid) {
694054570a1SFilipe Manana u64 commit_root_gen;
695054570a1SFilipe Manana
6963fd0a558SYan, Zheng /* called by btrfs_init_reloc_root */
6975d4f98a2SYan Zheng ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
6985d4f98a2SYan Zheng BTRFS_TREE_RELOC_OBJECTID);
69984c50ba5SJosef Bacik if (ret)
70084c50ba5SJosef Bacik goto fail;
70184c50ba5SJosef Bacik
702054570a1SFilipe Manana /*
703054570a1SFilipe Manana * Set the last_snapshot field to the generation of the commit
704054570a1SFilipe Manana * root - like this ctree.c:btrfs_block_can_be_shared() behaves
705054570a1SFilipe Manana * correctly (returns true) when the relocation root is created
706054570a1SFilipe Manana * either inside the critical section of a transaction commit
707054570a1SFilipe Manana * (through transaction.c:qgroup_account_snapshot()) and when
708054570a1SFilipe Manana * it's created before the transaction commit is started.
709054570a1SFilipe Manana */
710054570a1SFilipe Manana commit_root_gen = btrfs_header_generation(root->commit_root);
711054570a1SFilipe Manana btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
7123fd0a558SYan, Zheng } else {
7133fd0a558SYan, Zheng /*
7143fd0a558SYan, Zheng * called by btrfs_reloc_post_snapshot_hook.
7153fd0a558SYan, Zheng * the source tree is a reloc tree, all tree blocks
7163fd0a558SYan, Zheng * modified after it was created have RELOC flag
7173fd0a558SYan, Zheng * set in their headers. so it's OK to not update
7183fd0a558SYan, Zheng * the 'last_snapshot'.
7193fd0a558SYan, Zheng */
7203fd0a558SYan, Zheng ret = btrfs_copy_root(trans, root, root->node, &eb,
7213fd0a558SYan, Zheng BTRFS_TREE_RELOC_OBJECTID);
72284c50ba5SJosef Bacik if (ret)
72384c50ba5SJosef Bacik goto fail;
7243fd0a558SYan, Zheng }
7253fd0a558SYan, Zheng
72684c50ba5SJosef Bacik /*
72784c50ba5SJosef Bacik * We have changed references at this point, we must abort the
72884c50ba5SJosef Bacik * transaction if anything fails.
72984c50ba5SJosef Bacik */
73084c50ba5SJosef Bacik must_abort = true;
73184c50ba5SJosef Bacik
7325d4f98a2SYan Zheng memcpy(root_item, &root->root_item, sizeof(*root_item));
7335d4f98a2SYan Zheng btrfs_set_root_bytenr(root_item, eb->start);
7345d4f98a2SYan Zheng btrfs_set_root_level(root_item, btrfs_header_level(eb));
7355d4f98a2SYan Zheng btrfs_set_root_generation(root_item, trans->transid);
7363fd0a558SYan, Zheng
7373fd0a558SYan, Zheng if (root->root_key.objectid == objectid) {
7383fd0a558SYan, Zheng btrfs_set_root_refs(root_item, 0);
7393fd0a558SYan, Zheng memset(&root_item->drop_progress, 0,
7403fd0a558SYan, Zheng sizeof(struct btrfs_disk_key));
741c8422684SDavid Sterba btrfs_set_root_drop_level(root_item, 0);
7423fd0a558SYan, Zheng }
7435d4f98a2SYan Zheng
7445d4f98a2SYan Zheng btrfs_tree_unlock(eb);
7455d4f98a2SYan Zheng free_extent_buffer(eb);
7465d4f98a2SYan Zheng
7470b246afaSJeff Mahoney ret = btrfs_insert_root(trans, fs_info->tree_root,
7485d4f98a2SYan Zheng &root_key, root_item);
74984c50ba5SJosef Bacik if (ret)
75084c50ba5SJosef Bacik goto fail;
75184c50ba5SJosef Bacik
7525d4f98a2SYan Zheng kfree(root_item);
7535d4f98a2SYan Zheng
7543dbf1738SJosef Bacik reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
75584c50ba5SJosef Bacik if (IS_ERR(reloc_root)) {
75684c50ba5SJosef Bacik ret = PTR_ERR(reloc_root);
75784c50ba5SJosef Bacik goto abort;
75884c50ba5SJosef Bacik }
75992a7cc42SQu Wenruo set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
7605d4f98a2SYan Zheng reloc_root->last_trans = trans->transid;
7613fd0a558SYan, Zheng return reloc_root;
76284c50ba5SJosef Bacik fail:
76384c50ba5SJosef Bacik kfree(root_item);
76484c50ba5SJosef Bacik abort:
76584c50ba5SJosef Bacik if (must_abort)
76684c50ba5SJosef Bacik btrfs_abort_transaction(trans, ret);
76784c50ba5SJosef Bacik return ERR_PTR(ret);
7683fd0a558SYan, Zheng }
7693fd0a558SYan, Zheng
7703fd0a558SYan, Zheng /*
7713fd0a558SYan, Zheng * create reloc tree for a given fs tree. reloc tree is just a
7723fd0a558SYan, Zheng * snapshot of the fs tree with special root objectid.
773f44deb74SJosef Bacik *
774f44deb74SJosef Bacik * The reloc_root comes out of here with two references, one for
775f44deb74SJosef Bacik * root->reloc_root, and another for being on the rc->reloc_roots list.
7763fd0a558SYan, Zheng */
btrfs_init_reloc_root(struct btrfs_trans_handle * trans,struct btrfs_root * root)7773fd0a558SYan, Zheng int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
7783fd0a558SYan, Zheng struct btrfs_root *root)
7793fd0a558SYan, Zheng {
7800b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
7813fd0a558SYan, Zheng struct btrfs_root *reloc_root;
7820b246afaSJeff Mahoney struct reloc_control *rc = fs_info->reloc_ctl;
78320dd2cbfSMiao Xie struct btrfs_block_rsv *rsv;
7843fd0a558SYan, Zheng int clear_rsv = 0;
785ffd7b339SJeff Mahoney int ret;
7863fd0a558SYan, Zheng
787aec7db3bSJosef Bacik if (!rc)
7882abc726aSJosef Bacik return 0;
7892abc726aSJosef Bacik
7901fac4a54SQu Wenruo /*
7911fac4a54SQu Wenruo * The subvolume has reloc tree but the swap is finished, no need to
7921fac4a54SQu Wenruo * create/update the dead reloc tree
7931fac4a54SQu Wenruo */
7946282675eSQu Wenruo if (reloc_root_is_dead(root))
7951fac4a54SQu Wenruo return 0;
7961fac4a54SQu Wenruo
797aec7db3bSJosef Bacik /*
798aec7db3bSJosef Bacik * This is subtle but important. We do not do
799aec7db3bSJosef Bacik * record_root_in_transaction for reloc roots, instead we record their
800aec7db3bSJosef Bacik * corresponding fs root, and then here we update the last trans for the
801aec7db3bSJosef Bacik * reloc root. This means that we have to do this for the entire life
802aec7db3bSJosef Bacik * of the reloc root, regardless of which stage of the relocation we are
803aec7db3bSJosef Bacik * in.
804aec7db3bSJosef Bacik */
8053fd0a558SYan, Zheng if (root->reloc_root) {
8063fd0a558SYan, Zheng reloc_root = root->reloc_root;
8073fd0a558SYan, Zheng reloc_root->last_trans = trans->transid;
8083fd0a558SYan, Zheng return 0;
8093fd0a558SYan, Zheng }
8103fd0a558SYan, Zheng
811aec7db3bSJosef Bacik /*
812aec7db3bSJosef Bacik * We are merging reloc roots, we do not need new reloc trees. Also
813aec7db3bSJosef Bacik * reloc trees never need their own reloc tree.
814aec7db3bSJosef Bacik */
815aec7db3bSJosef Bacik if (!rc->create_reloc_tree ||
816aec7db3bSJosef Bacik root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
817aec7db3bSJosef Bacik return 0;
818aec7db3bSJosef Bacik
81920dd2cbfSMiao Xie if (!trans->reloc_reserved) {
82020dd2cbfSMiao Xie rsv = trans->block_rsv;
8213fd0a558SYan, Zheng trans->block_rsv = rc->block_rsv;
8223fd0a558SYan, Zheng clear_rsv = 1;
8233fd0a558SYan, Zheng }
8243fd0a558SYan, Zheng reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
8253fd0a558SYan, Zheng if (clear_rsv)
82620dd2cbfSMiao Xie trans->block_rsv = rsv;
82700bb36a0SJosef Bacik if (IS_ERR(reloc_root))
82800bb36a0SJosef Bacik return PTR_ERR(reloc_root);
8295d4f98a2SYan Zheng
830ffd7b339SJeff Mahoney ret = __add_reloc_root(reloc_root);
83157a304cfSJosef Bacik ASSERT(ret != -EEXIST);
83200bb36a0SJosef Bacik if (ret) {
83300bb36a0SJosef Bacik /* Pairs with create_reloc_root */
83400bb36a0SJosef Bacik btrfs_put_root(reloc_root);
83500bb36a0SJosef Bacik return ret;
83600bb36a0SJosef Bacik }
837f44deb74SJosef Bacik root->reloc_root = btrfs_grab_root(reloc_root);
8385d4f98a2SYan Zheng return 0;
8395d4f98a2SYan Zheng }
8405d4f98a2SYan Zheng
8415d4f98a2SYan Zheng /*
8425d4f98a2SYan Zheng * update root item of reloc tree
8435d4f98a2SYan Zheng */
btrfs_update_reloc_root(struct btrfs_trans_handle * trans,struct btrfs_root * root)8445d4f98a2SYan Zheng int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
8455d4f98a2SYan Zheng struct btrfs_root *root)
8465d4f98a2SYan Zheng {
8470b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
8485d4f98a2SYan Zheng struct btrfs_root *reloc_root;
8495d4f98a2SYan Zheng struct btrfs_root_item *root_item;
8505d4f98a2SYan Zheng int ret;
8515d4f98a2SYan Zheng
8526282675eSQu Wenruo if (!have_reloc_root(root))
853592fbcd5SJosef Bacik return 0;
8545d4f98a2SYan Zheng
8555d4f98a2SYan Zheng reloc_root = root->reloc_root;
8565d4f98a2SYan Zheng root_item = &reloc_root->root_item;
8575d4f98a2SYan Zheng
858f44deb74SJosef Bacik /*
859f44deb74SJosef Bacik * We are probably ok here, but __del_reloc_root() will drop its ref of
860f44deb74SJosef Bacik * the root. We have the ref for root->reloc_root, but just in case
861f44deb74SJosef Bacik * hold it while we update the reloc root.
862f44deb74SJosef Bacik */
863f44deb74SJosef Bacik btrfs_grab_root(reloc_root);
864f44deb74SJosef Bacik
865d2311e69SQu Wenruo /* root->reloc_root will stay until current relocation finished */
8667ad0c586SQu Wenruo if (fs_info->reloc_ctl && fs_info->reloc_ctl->merge_reloc_tree &&
8673fd0a558SYan, Zheng btrfs_root_refs(root_item) == 0) {
868d2311e69SQu Wenruo set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
8696282675eSQu Wenruo /*
8706282675eSQu Wenruo * Mark the tree as dead before we change reloc_root so
8716282675eSQu Wenruo * have_reloc_root will not touch it from now on.
8726282675eSQu Wenruo */
8736282675eSQu Wenruo smp_wmb();
874c974c464SWang Shilong __del_reloc_root(reloc_root);
8755d4f98a2SYan Zheng }
8765d4f98a2SYan Zheng
8775d4f98a2SYan Zheng if (reloc_root->commit_root != reloc_root->node) {
878ea287ab1SJosef Bacik __update_reloc_root(reloc_root);
8795d4f98a2SYan Zheng btrfs_set_root_node(root_item, reloc_root->node);
8805d4f98a2SYan Zheng free_extent_buffer(reloc_root->commit_root);
8815d4f98a2SYan Zheng reloc_root->commit_root = btrfs_root_node(reloc_root);
8825d4f98a2SYan Zheng }
8835d4f98a2SYan Zheng
8840b246afaSJeff Mahoney ret = btrfs_update_root(trans, fs_info->tree_root,
8855d4f98a2SYan Zheng &reloc_root->root_key, root_item);
886f44deb74SJosef Bacik btrfs_put_root(reloc_root);
887592fbcd5SJosef Bacik return ret;
8885d4f98a2SYan Zheng }
8895d4f98a2SYan Zheng
8905d4f98a2SYan Zheng /*
8915d4f98a2SYan Zheng * helper to find first cached inode with inode number >= objectid
8925d4f98a2SYan Zheng * in a subvolume
8935d4f98a2SYan Zheng */
find_next_inode(struct btrfs_root * root,u64 objectid)8945d4f98a2SYan Zheng static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
8955d4f98a2SYan Zheng {
8965d4f98a2SYan Zheng struct rb_node *node;
8975d4f98a2SYan Zheng struct rb_node *prev;
8985d4f98a2SYan Zheng struct btrfs_inode *entry;
8995d4f98a2SYan Zheng struct inode *inode;
9005d4f98a2SYan Zheng
9015d4f98a2SYan Zheng spin_lock(&root->inode_lock);
9025d4f98a2SYan Zheng again:
9035d4f98a2SYan Zheng node = root->inode_tree.rb_node;
9045d4f98a2SYan Zheng prev = NULL;
9055d4f98a2SYan Zheng while (node) {
9065d4f98a2SYan Zheng prev = node;
9075d4f98a2SYan Zheng entry = rb_entry(node, struct btrfs_inode, rb_node);
9085d4f98a2SYan Zheng
9094a0cc7caSNikolay Borisov if (objectid < btrfs_ino(entry))
9105d4f98a2SYan Zheng node = node->rb_left;
9114a0cc7caSNikolay Borisov else if (objectid > btrfs_ino(entry))
9125d4f98a2SYan Zheng node = node->rb_right;
9135d4f98a2SYan Zheng else
9145d4f98a2SYan Zheng break;
9155d4f98a2SYan Zheng }
9165d4f98a2SYan Zheng if (!node) {
9175d4f98a2SYan Zheng while (prev) {
9185d4f98a2SYan Zheng entry = rb_entry(prev, struct btrfs_inode, rb_node);
9194a0cc7caSNikolay Borisov if (objectid <= btrfs_ino(entry)) {
9205d4f98a2SYan Zheng node = prev;
9215d4f98a2SYan Zheng break;
9225d4f98a2SYan Zheng }
9235d4f98a2SYan Zheng prev = rb_next(prev);
9245d4f98a2SYan Zheng }
9255d4f98a2SYan Zheng }
9265d4f98a2SYan Zheng while (node) {
9275d4f98a2SYan Zheng entry = rb_entry(node, struct btrfs_inode, rb_node);
9285d4f98a2SYan Zheng inode = igrab(&entry->vfs_inode);
9295d4f98a2SYan Zheng if (inode) {
9305d4f98a2SYan Zheng spin_unlock(&root->inode_lock);
9315d4f98a2SYan Zheng return inode;
9325d4f98a2SYan Zheng }
9335d4f98a2SYan Zheng
9344a0cc7caSNikolay Borisov objectid = btrfs_ino(entry) + 1;
9355d4f98a2SYan Zheng if (cond_resched_lock(&root->inode_lock))
9365d4f98a2SYan Zheng goto again;
9375d4f98a2SYan Zheng
9385d4f98a2SYan Zheng node = rb_next(node);
9395d4f98a2SYan Zheng }
9405d4f98a2SYan Zheng spin_unlock(&root->inode_lock);
9415d4f98a2SYan Zheng return NULL;
9425d4f98a2SYan Zheng }
9435d4f98a2SYan Zheng
9445d4f98a2SYan Zheng /*
9455d4f98a2SYan Zheng * get new location of data
9465d4f98a2SYan Zheng */
get_new_location(struct inode * reloc_inode,u64 * new_bytenr,u64 bytenr,u64 num_bytes)9475d4f98a2SYan Zheng static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
9485d4f98a2SYan Zheng u64 bytenr, u64 num_bytes)
9495d4f98a2SYan Zheng {
9505d4f98a2SYan Zheng struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
9515d4f98a2SYan Zheng struct btrfs_path *path;
9525d4f98a2SYan Zheng struct btrfs_file_extent_item *fi;
9535d4f98a2SYan Zheng struct extent_buffer *leaf;
9545d4f98a2SYan Zheng int ret;
9555d4f98a2SYan Zheng
9565d4f98a2SYan Zheng path = btrfs_alloc_path();
9575d4f98a2SYan Zheng if (!path)
9585d4f98a2SYan Zheng return -ENOMEM;
9595d4f98a2SYan Zheng
9605d4f98a2SYan Zheng bytenr -= BTRFS_I(reloc_inode)->index_cnt;
961f85b7379SDavid Sterba ret = btrfs_lookup_file_extent(NULL, root, path,
962f85b7379SDavid Sterba btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
9635d4f98a2SYan Zheng if (ret < 0)
9645d4f98a2SYan Zheng goto out;
9655d4f98a2SYan Zheng if (ret > 0) {
9665d4f98a2SYan Zheng ret = -ENOENT;
9675d4f98a2SYan Zheng goto out;
9685d4f98a2SYan Zheng }
9695d4f98a2SYan Zheng
9705d4f98a2SYan Zheng leaf = path->nodes[0];
9715d4f98a2SYan Zheng fi = btrfs_item_ptr(leaf, path->slots[0],
9725d4f98a2SYan Zheng struct btrfs_file_extent_item);
9735d4f98a2SYan Zheng
9745d4f98a2SYan Zheng BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
9755d4f98a2SYan Zheng btrfs_file_extent_compression(leaf, fi) ||
9765d4f98a2SYan Zheng btrfs_file_extent_encryption(leaf, fi) ||
9775d4f98a2SYan Zheng btrfs_file_extent_other_encoding(leaf, fi));
9785d4f98a2SYan Zheng
9795d4f98a2SYan Zheng if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
98083d4cfd4SJosef Bacik ret = -EINVAL;
9815d4f98a2SYan Zheng goto out;
9825d4f98a2SYan Zheng }
9835d4f98a2SYan Zheng
9845d4f98a2SYan Zheng *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
9855d4f98a2SYan Zheng ret = 0;
9865d4f98a2SYan Zheng out:
9875d4f98a2SYan Zheng btrfs_free_path(path);
9885d4f98a2SYan Zheng return ret;
9895d4f98a2SYan Zheng }
9905d4f98a2SYan Zheng
9915d4f98a2SYan Zheng /*
9925d4f98a2SYan Zheng * update file extent items in the tree leaf to point to
9935d4f98a2SYan Zheng * the new locations.
9945d4f98a2SYan Zheng */
9953fd0a558SYan, Zheng static noinline_for_stack
replace_file_extents(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_root * root,struct extent_buffer * leaf)9963fd0a558SYan, Zheng int replace_file_extents(struct btrfs_trans_handle *trans,
9975d4f98a2SYan Zheng struct reloc_control *rc,
9985d4f98a2SYan Zheng struct btrfs_root *root,
9993fd0a558SYan, Zheng struct extent_buffer *leaf)
10005d4f98a2SYan Zheng {
10010b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
10025d4f98a2SYan Zheng struct btrfs_key key;
10035d4f98a2SYan Zheng struct btrfs_file_extent_item *fi;
10045d4f98a2SYan Zheng struct inode *inode = NULL;
10055d4f98a2SYan Zheng u64 parent;
10065d4f98a2SYan Zheng u64 bytenr;
10073fd0a558SYan, Zheng u64 new_bytenr = 0;
10085d4f98a2SYan Zheng u64 num_bytes;
10095d4f98a2SYan Zheng u64 end;
10105d4f98a2SYan Zheng u32 nritems;
10115d4f98a2SYan Zheng u32 i;
101283d4cfd4SJosef Bacik int ret = 0;
10135d4f98a2SYan Zheng int first = 1;
10145d4f98a2SYan Zheng int dirty = 0;
10155d4f98a2SYan Zheng
10165d4f98a2SYan Zheng if (rc->stage != UPDATE_DATA_PTRS)
10175d4f98a2SYan Zheng return 0;
10185d4f98a2SYan Zheng
10195d4f98a2SYan Zheng /* reloc trees always use full backref */
10205d4f98a2SYan Zheng if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10215d4f98a2SYan Zheng parent = leaf->start;
10225d4f98a2SYan Zheng else
10235d4f98a2SYan Zheng parent = 0;
10245d4f98a2SYan Zheng
10255d4f98a2SYan Zheng nritems = btrfs_header_nritems(leaf);
10265d4f98a2SYan Zheng for (i = 0; i < nritems; i++) {
102782fa113fSQu Wenruo struct btrfs_ref ref = { 0 };
102882fa113fSQu Wenruo
10295d4f98a2SYan Zheng cond_resched();
10305d4f98a2SYan Zheng btrfs_item_key_to_cpu(leaf, &key, i);
10315d4f98a2SYan Zheng if (key.type != BTRFS_EXTENT_DATA_KEY)
10325d4f98a2SYan Zheng continue;
10335d4f98a2SYan Zheng fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
10345d4f98a2SYan Zheng if (btrfs_file_extent_type(leaf, fi) ==
10355d4f98a2SYan Zheng BTRFS_FILE_EXTENT_INLINE)
10365d4f98a2SYan Zheng continue;
10375d4f98a2SYan Zheng bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
10385d4f98a2SYan Zheng num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
10395d4f98a2SYan Zheng if (bytenr == 0)
10405d4f98a2SYan Zheng continue;
10419569cc20SQu Wenruo if (!in_range(bytenr, rc->block_group->start,
10429569cc20SQu Wenruo rc->block_group->length))
10435d4f98a2SYan Zheng continue;
10445d4f98a2SYan Zheng
10455d4f98a2SYan Zheng /*
1046fb12489bSMatthew Wilcox (Oracle) * if we are modifying block in fs tree, wait for read_folio
10475d4f98a2SYan Zheng * to complete and drop the extent cache
10485d4f98a2SYan Zheng */
10495d4f98a2SYan Zheng if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
10505d4f98a2SYan Zheng if (first) {
10515d4f98a2SYan Zheng inode = find_next_inode(root, key.objectid);
10525d4f98a2SYan Zheng first = 0;
10534a0cc7caSNikolay Borisov } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
1054e55cf7caSDavid Sterba btrfs_add_delayed_iput(BTRFS_I(inode));
10555d4f98a2SYan Zheng inode = find_next_inode(root, key.objectid);
10565d4f98a2SYan Zheng }
10574a0cc7caSNikolay Borisov if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
10589c5c9604SJosef Bacik struct extent_state *cached_state = NULL;
10599c5c9604SJosef Bacik
10605d4f98a2SYan Zheng end = key.offset +
10615d4f98a2SYan Zheng btrfs_file_extent_num_bytes(leaf, fi);
10625d4f98a2SYan Zheng WARN_ON(!IS_ALIGNED(key.offset,
10630b246afaSJeff Mahoney fs_info->sectorsize));
10640b246afaSJeff Mahoney WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
10655d4f98a2SYan Zheng end--;
10665d4f98a2SYan Zheng ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
10679c5c9604SJosef Bacik key.offset, end,
10689c5c9604SJosef Bacik &cached_state);
10695d4f98a2SYan Zheng if (!ret)
10705d4f98a2SYan Zheng continue;
10715d4f98a2SYan Zheng
10724c0c8cfcSFilipe Manana btrfs_drop_extent_map_range(BTRFS_I(inode),
10734c0c8cfcSFilipe Manana key.offset, end, true);
10745d4f98a2SYan Zheng unlock_extent(&BTRFS_I(inode)->io_tree,
10759c5c9604SJosef Bacik key.offset, end, &cached_state);
10765d4f98a2SYan Zheng }
10775d4f98a2SYan Zheng }
10785d4f98a2SYan Zheng
10795d4f98a2SYan Zheng ret = get_new_location(rc->data_inode, &new_bytenr,
10805d4f98a2SYan Zheng bytenr, num_bytes);
108183d4cfd4SJosef Bacik if (ret) {
108283d4cfd4SJosef Bacik /*
108383d4cfd4SJosef Bacik * Don't have to abort since we've not changed anything
108483d4cfd4SJosef Bacik * in the file extent yet.
108583d4cfd4SJosef Bacik */
108683d4cfd4SJosef Bacik break;
10873fd0a558SYan, Zheng }
10885d4f98a2SYan Zheng
10895d4f98a2SYan Zheng btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
10905d4f98a2SYan Zheng dirty = 1;
10915d4f98a2SYan Zheng
10925d4f98a2SYan Zheng key.offset -= btrfs_file_extent_offset(leaf, fi);
109382fa113fSQu Wenruo btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
109482fa113fSQu Wenruo num_bytes, parent);
109582fa113fSQu Wenruo btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1096f42c5da6SNikolay Borisov key.objectid, key.offset,
1097f42c5da6SNikolay Borisov root->root_key.objectid, false);
109882fa113fSQu Wenruo ret = btrfs_inc_extent_ref(trans, &ref);
109983d4cfd4SJosef Bacik if (ret) {
110066642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
110183d4cfd4SJosef Bacik break;
110283d4cfd4SJosef Bacik }
11035d4f98a2SYan Zheng
1104ffd4bb2aSQu Wenruo btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1105ffd4bb2aSQu Wenruo num_bytes, parent);
1106ffd4bb2aSQu Wenruo btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1107f42c5da6SNikolay Borisov key.objectid, key.offset,
1108f42c5da6SNikolay Borisov root->root_key.objectid, false);
1109ffd4bb2aSQu Wenruo ret = btrfs_free_extent(trans, &ref);
111083d4cfd4SJosef Bacik if (ret) {
111166642832SJeff Mahoney btrfs_abort_transaction(trans, ret);
111283d4cfd4SJosef Bacik break;
111383d4cfd4SJosef Bacik }
11145d4f98a2SYan Zheng }
11155d4f98a2SYan Zheng if (dirty)
1116d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
11173fd0a558SYan, Zheng if (inode)
1118e55cf7caSDavid Sterba btrfs_add_delayed_iput(BTRFS_I(inode));
111983d4cfd4SJosef Bacik return ret;
11205d4f98a2SYan Zheng }
11215d4f98a2SYan Zheng
memcmp_node_keys(const struct extent_buffer * eb,int slot,const struct btrfs_path * path,int level)11224dc6ea8bSDavid Sterba static noinline_for_stack int memcmp_node_keys(const struct extent_buffer *eb,
11234dc6ea8bSDavid Sterba int slot, const struct btrfs_path *path,
11244dc6ea8bSDavid Sterba int level)
11255d4f98a2SYan Zheng {
11265d4f98a2SYan Zheng struct btrfs_disk_key key1;
11275d4f98a2SYan Zheng struct btrfs_disk_key key2;
11285d4f98a2SYan Zheng btrfs_node_key(eb, &key1, slot);
11295d4f98a2SYan Zheng btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
11305d4f98a2SYan Zheng return memcmp(&key1, &key2, sizeof(key1));
11315d4f98a2SYan Zheng }
11325d4f98a2SYan Zheng
11335d4f98a2SYan Zheng /*
11345d4f98a2SYan Zheng * try to replace tree blocks in fs tree with the new blocks
11355d4f98a2SYan Zheng * in reloc tree. tree blocks haven't been modified since the
11365d4f98a2SYan Zheng * reloc tree was create can be replaced.
11375d4f98a2SYan Zheng *
11385d4f98a2SYan Zheng * if a block was replaced, level of the block + 1 is returned.
11395d4f98a2SYan Zheng * if no block got replaced, 0 is returned. if there are other
11405d4f98a2SYan Zheng * errors, a negative error number is returned.
11415d4f98a2SYan Zheng */
11423fd0a558SYan, Zheng static noinline_for_stack
replace_path(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_root * dest,struct btrfs_root * src,struct btrfs_path * path,struct btrfs_key * next_key,int lowest_level,int max_level)11433d0174f7SQu Wenruo int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
11445d4f98a2SYan Zheng struct btrfs_root *dest, struct btrfs_root *src,
11455d4f98a2SYan Zheng struct btrfs_path *path, struct btrfs_key *next_key,
11465d4f98a2SYan Zheng int lowest_level, int max_level)
11475d4f98a2SYan Zheng {
11480b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = dest->fs_info;
11495d4f98a2SYan Zheng struct extent_buffer *eb;
11505d4f98a2SYan Zheng struct extent_buffer *parent;
115182fa113fSQu Wenruo struct btrfs_ref ref = { 0 };
11525d4f98a2SYan Zheng struct btrfs_key key;
11535d4f98a2SYan Zheng u64 old_bytenr;
11545d4f98a2SYan Zheng u64 new_bytenr;
11555d4f98a2SYan Zheng u64 old_ptr_gen;
11565d4f98a2SYan Zheng u64 new_ptr_gen;
11575d4f98a2SYan Zheng u64 last_snapshot;
11585d4f98a2SYan Zheng u32 blocksize;
11593fd0a558SYan, Zheng int cow = 0;
11605d4f98a2SYan Zheng int level;
11615d4f98a2SYan Zheng int ret;
11625d4f98a2SYan Zheng int slot;
11635d4f98a2SYan Zheng
11647a9213a9SJosef Bacik ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
11657a9213a9SJosef Bacik ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
11665d4f98a2SYan Zheng
11675d4f98a2SYan Zheng last_snapshot = btrfs_root_last_snapshot(&src->root_item);
11683fd0a558SYan, Zheng again:
11695d4f98a2SYan Zheng slot = path->slots[lowest_level];
11705d4f98a2SYan Zheng btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
11715d4f98a2SYan Zheng
11725d4f98a2SYan Zheng eb = btrfs_lock_root_node(dest);
11735d4f98a2SYan Zheng level = btrfs_header_level(eb);
11745d4f98a2SYan Zheng
11755d4f98a2SYan Zheng if (level < lowest_level) {
11765d4f98a2SYan Zheng btrfs_tree_unlock(eb);
11775d4f98a2SYan Zheng free_extent_buffer(eb);
11785d4f98a2SYan Zheng return 0;
11795d4f98a2SYan Zheng }
11805d4f98a2SYan Zheng
11813fd0a558SYan, Zheng if (cow) {
11829631e4ccSJosef Bacik ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb,
11839631e4ccSJosef Bacik BTRFS_NESTING_COW);
118445b87c5dSJosef Bacik if (ret) {
118545b87c5dSJosef Bacik btrfs_tree_unlock(eb);
118645b87c5dSJosef Bacik free_extent_buffer(eb);
118745b87c5dSJosef Bacik return ret;
118845b87c5dSJosef Bacik }
11893fd0a558SYan, Zheng }
11905d4f98a2SYan Zheng
11915d4f98a2SYan Zheng if (next_key) {
11925d4f98a2SYan Zheng next_key->objectid = (u64)-1;
11935d4f98a2SYan Zheng next_key->type = (u8)-1;
11945d4f98a2SYan Zheng next_key->offset = (u64)-1;
11955d4f98a2SYan Zheng }
11965d4f98a2SYan Zheng
11975d4f98a2SYan Zheng parent = eb;
11985d4f98a2SYan Zheng while (1) {
11995d4f98a2SYan Zheng level = btrfs_header_level(parent);
12007a9213a9SJosef Bacik ASSERT(level >= lowest_level);
12015d4f98a2SYan Zheng
1202fdf8d595SAnand Jain ret = btrfs_bin_search(parent, 0, &key, &slot);
1203cbca7d59SFilipe Manana if (ret < 0)
1204cbca7d59SFilipe Manana break;
12055d4f98a2SYan Zheng if (ret && slot > 0)
12065d4f98a2SYan Zheng slot--;
12075d4f98a2SYan Zheng
12085d4f98a2SYan Zheng if (next_key && slot + 1 < btrfs_header_nritems(parent))
12095d4f98a2SYan Zheng btrfs_node_key_to_cpu(parent, next_key, slot + 1);
12105d4f98a2SYan Zheng
12115d4f98a2SYan Zheng old_bytenr = btrfs_node_blockptr(parent, slot);
12120b246afaSJeff Mahoney blocksize = fs_info->nodesize;
12135d4f98a2SYan Zheng old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
12145d4f98a2SYan Zheng
12155d4f98a2SYan Zheng if (level <= max_level) {
12165d4f98a2SYan Zheng eb = path->nodes[level];
12175d4f98a2SYan Zheng new_bytenr = btrfs_node_blockptr(eb,
12185d4f98a2SYan Zheng path->slots[level]);
12195d4f98a2SYan Zheng new_ptr_gen = btrfs_node_ptr_generation(eb,
12205d4f98a2SYan Zheng path->slots[level]);
12215d4f98a2SYan Zheng } else {
12225d4f98a2SYan Zheng new_bytenr = 0;
12235d4f98a2SYan Zheng new_ptr_gen = 0;
12245d4f98a2SYan Zheng }
12255d4f98a2SYan Zheng
1226fae7f21cSDulshani Gunawardhana if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
12275d4f98a2SYan Zheng ret = level;
12285d4f98a2SYan Zheng break;
12295d4f98a2SYan Zheng }
12305d4f98a2SYan Zheng
12315d4f98a2SYan Zheng if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
12325d4f98a2SYan Zheng memcmp_node_keys(parent, slot, path, level)) {
12333fd0a558SYan, Zheng if (level <= lowest_level) {
12345d4f98a2SYan Zheng ret = 0;
12355d4f98a2SYan Zheng break;
12365d4f98a2SYan Zheng }
12375d4f98a2SYan Zheng
12386b3426beSJosef Bacik eb = btrfs_read_node_slot(parent, slot);
123964c043deSLiu Bo if (IS_ERR(eb)) {
124064c043deSLiu Bo ret = PTR_ERR(eb);
1241264813acSLiu Bo break;
1242416bc658SJosef Bacik }
12435d4f98a2SYan Zheng btrfs_tree_lock(eb);
12443fd0a558SYan, Zheng if (cow) {
12455d4f98a2SYan Zheng ret = btrfs_cow_block(trans, dest, eb, parent,
12469631e4ccSJosef Bacik slot, &eb,
12479631e4ccSJosef Bacik BTRFS_NESTING_COW);
124845b87c5dSJosef Bacik if (ret) {
124945b87c5dSJosef Bacik btrfs_tree_unlock(eb);
125045b87c5dSJosef Bacik free_extent_buffer(eb);
125145b87c5dSJosef Bacik break;
125245b87c5dSJosef Bacik }
12535d4f98a2SYan Zheng }
12545d4f98a2SYan Zheng
12555d4f98a2SYan Zheng btrfs_tree_unlock(parent);
12565d4f98a2SYan Zheng free_extent_buffer(parent);
12575d4f98a2SYan Zheng
12585d4f98a2SYan Zheng parent = eb;
12595d4f98a2SYan Zheng continue;
12605d4f98a2SYan Zheng }
12615d4f98a2SYan Zheng
12623fd0a558SYan, Zheng if (!cow) {
12633fd0a558SYan, Zheng btrfs_tree_unlock(parent);
12643fd0a558SYan, Zheng free_extent_buffer(parent);
12653fd0a558SYan, Zheng cow = 1;
12663fd0a558SYan, Zheng goto again;
12673fd0a558SYan, Zheng }
12683fd0a558SYan, Zheng
12695d4f98a2SYan Zheng btrfs_node_key_to_cpu(path->nodes[level], &key,
12705d4f98a2SYan Zheng path->slots[level]);
1271b3b4aa74SDavid Sterba btrfs_release_path(path);
12725d4f98a2SYan Zheng
12735d4f98a2SYan Zheng path->lowest_level = level;
1274b40130b2SJosef Bacik set_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
12755d4f98a2SYan Zheng ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1276b40130b2SJosef Bacik clear_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state);
12775d4f98a2SYan Zheng path->lowest_level = 0;
12780e9873e2SJosef Bacik if (ret) {
12790e9873e2SJosef Bacik if (ret > 0)
12800e9873e2SJosef Bacik ret = -ENOENT;
12810e9873e2SJosef Bacik break;
12820e9873e2SJosef Bacik }
12835d4f98a2SYan Zheng
12845d4f98a2SYan Zheng /*
1285824d8dffSQu Wenruo * Info qgroup to trace both subtrees.
1286824d8dffSQu Wenruo *
1287824d8dffSQu Wenruo * We must trace both trees.
1288824d8dffSQu Wenruo * 1) Tree reloc subtree
1289824d8dffSQu Wenruo * If not traced, we will leak data numbers
1290824d8dffSQu Wenruo * 2) Fs subtree
1291824d8dffSQu Wenruo * If not traced, we will double count old data
1292f616f5cdSQu Wenruo *
1293f616f5cdSQu Wenruo * We don't scan the subtree right now, but only record
1294f616f5cdSQu Wenruo * the swapped tree blocks.
1295f616f5cdSQu Wenruo * The real subtree rescan is delayed until we have new
1296f616f5cdSQu Wenruo * CoW on the subtree root node before transaction commit.
1297824d8dffSQu Wenruo */
1298370a11b8SQu Wenruo ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
1299370a11b8SQu Wenruo rc->block_group, parent, slot,
1300370a11b8SQu Wenruo path->nodes[level], path->slots[level],
1301370a11b8SQu Wenruo last_snapshot);
1302370a11b8SQu Wenruo if (ret < 0)
1303370a11b8SQu Wenruo break;
1304824d8dffSQu Wenruo /*
13055d4f98a2SYan Zheng * swap blocks in fs tree and reloc tree.
13065d4f98a2SYan Zheng */
13075d4f98a2SYan Zheng btrfs_set_node_blockptr(parent, slot, new_bytenr);
13085d4f98a2SYan Zheng btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1309d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, parent);
13105d4f98a2SYan Zheng
13115d4f98a2SYan Zheng btrfs_set_node_blockptr(path->nodes[level],
13125d4f98a2SYan Zheng path->slots[level], old_bytenr);
13135d4f98a2SYan Zheng btrfs_set_node_ptr_generation(path->nodes[level],
13145d4f98a2SYan Zheng path->slots[level], old_ptr_gen);
1315d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, path->nodes[level]);
13165d4f98a2SYan Zheng
131782fa113fSQu Wenruo btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
131882fa113fSQu Wenruo blocksize, path->nodes[level]->start);
1319f42c5da6SNikolay Borisov btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
1320f42c5da6SNikolay Borisov 0, true);
132182fa113fSQu Wenruo ret = btrfs_inc_extent_ref(trans, &ref);
1322253e258cSJosef Bacik if (ret) {
1323253e258cSJosef Bacik btrfs_abort_transaction(trans, ret);
1324253e258cSJosef Bacik break;
1325253e258cSJosef Bacik }
132682fa113fSQu Wenruo btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
132782fa113fSQu Wenruo blocksize, 0);
1328f42c5da6SNikolay Borisov btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0,
1329f42c5da6SNikolay Borisov true);
133082fa113fSQu Wenruo ret = btrfs_inc_extent_ref(trans, &ref);
1331253e258cSJosef Bacik if (ret) {
1332253e258cSJosef Bacik btrfs_abort_transaction(trans, ret);
1333253e258cSJosef Bacik break;
1334253e258cSJosef Bacik }
13355d4f98a2SYan Zheng
1336ffd4bb2aSQu Wenruo btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1337ffd4bb2aSQu Wenruo blocksize, path->nodes[level]->start);
1338f42c5da6SNikolay Borisov btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
1339f42c5da6SNikolay Borisov 0, true);
1340ffd4bb2aSQu Wenruo ret = btrfs_free_extent(trans, &ref);
1341253e258cSJosef Bacik if (ret) {
1342253e258cSJosef Bacik btrfs_abort_transaction(trans, ret);
1343253e258cSJosef Bacik break;
1344253e258cSJosef Bacik }
13455d4f98a2SYan Zheng
1346ffd4bb2aSQu Wenruo btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1347ffd4bb2aSQu Wenruo blocksize, 0);
1348f42c5da6SNikolay Borisov btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid,
1349f42c5da6SNikolay Borisov 0, true);
1350ffd4bb2aSQu Wenruo ret = btrfs_free_extent(trans, &ref);
1351253e258cSJosef Bacik if (ret) {
1352253e258cSJosef Bacik btrfs_abort_transaction(trans, ret);
1353253e258cSJosef Bacik break;
1354253e258cSJosef Bacik }
13555d4f98a2SYan Zheng
13565d4f98a2SYan Zheng btrfs_unlock_up_safe(path, 0);
13575d4f98a2SYan Zheng
13585d4f98a2SYan Zheng ret = level;
13595d4f98a2SYan Zheng break;
13605d4f98a2SYan Zheng }
13615d4f98a2SYan Zheng btrfs_tree_unlock(parent);
13625d4f98a2SYan Zheng free_extent_buffer(parent);
13635d4f98a2SYan Zheng return ret;
13645d4f98a2SYan Zheng }
13655d4f98a2SYan Zheng
13665d4f98a2SYan Zheng /*
13675d4f98a2SYan Zheng * helper to find next relocated block in reloc tree
13685d4f98a2SYan Zheng */
13695d4f98a2SYan Zheng static noinline_for_stack
walk_up_reloc_tree(struct btrfs_root * root,struct btrfs_path * path,int * level)13705d4f98a2SYan Zheng int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
13715d4f98a2SYan Zheng int *level)
13725d4f98a2SYan Zheng {
13735d4f98a2SYan Zheng struct extent_buffer *eb;
13745d4f98a2SYan Zheng int i;
13755d4f98a2SYan Zheng u64 last_snapshot;
13765d4f98a2SYan Zheng u32 nritems;
13775d4f98a2SYan Zheng
13785d4f98a2SYan Zheng last_snapshot = btrfs_root_last_snapshot(&root->root_item);
13795d4f98a2SYan Zheng
13805d4f98a2SYan Zheng for (i = 0; i < *level; i++) {
13815d4f98a2SYan Zheng free_extent_buffer(path->nodes[i]);
13825d4f98a2SYan Zheng path->nodes[i] = NULL;
13835d4f98a2SYan Zheng }
13845d4f98a2SYan Zheng
13855d4f98a2SYan Zheng for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
13865d4f98a2SYan Zheng eb = path->nodes[i];
13875d4f98a2SYan Zheng nritems = btrfs_header_nritems(eb);
13885d4f98a2SYan Zheng while (path->slots[i] + 1 < nritems) {
13895d4f98a2SYan Zheng path->slots[i]++;
13905d4f98a2SYan Zheng if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
13915d4f98a2SYan Zheng last_snapshot)
13925d4f98a2SYan Zheng continue;
13935d4f98a2SYan Zheng
13945d4f98a2SYan Zheng *level = i;
13955d4f98a2SYan Zheng return 0;
13965d4f98a2SYan Zheng }
13975d4f98a2SYan Zheng free_extent_buffer(path->nodes[i]);
13985d4f98a2SYan Zheng path->nodes[i] = NULL;
13995d4f98a2SYan Zheng }
14005d4f98a2SYan Zheng return 1;
14015d4f98a2SYan Zheng }
14025d4f98a2SYan Zheng
14035d4f98a2SYan Zheng /*
14045d4f98a2SYan Zheng * walk down reloc tree to find relocated block of lowest level
14055d4f98a2SYan Zheng */
14065d4f98a2SYan Zheng static noinline_for_stack
walk_down_reloc_tree(struct btrfs_root * root,struct btrfs_path * path,int * level)14075d4f98a2SYan Zheng int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
14085d4f98a2SYan Zheng int *level)
14095d4f98a2SYan Zheng {
14105d4f98a2SYan Zheng struct extent_buffer *eb = NULL;
14115d4f98a2SYan Zheng int i;
14125d4f98a2SYan Zheng u64 ptr_gen = 0;
14135d4f98a2SYan Zheng u64 last_snapshot;
14145d4f98a2SYan Zheng u32 nritems;
14155d4f98a2SYan Zheng
14165d4f98a2SYan Zheng last_snapshot = btrfs_root_last_snapshot(&root->root_item);
14175d4f98a2SYan Zheng
14185d4f98a2SYan Zheng for (i = *level; i > 0; i--) {
14195d4f98a2SYan Zheng eb = path->nodes[i];
14205d4f98a2SYan Zheng nritems = btrfs_header_nritems(eb);
14215d4f98a2SYan Zheng while (path->slots[i] < nritems) {
14225d4f98a2SYan Zheng ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
14235d4f98a2SYan Zheng if (ptr_gen > last_snapshot)
14245d4f98a2SYan Zheng break;
14255d4f98a2SYan Zheng path->slots[i]++;
14265d4f98a2SYan Zheng }
14275d4f98a2SYan Zheng if (path->slots[i] >= nritems) {
14285d4f98a2SYan Zheng if (i == *level)
14295d4f98a2SYan Zheng break;
14305d4f98a2SYan Zheng *level = i + 1;
14315d4f98a2SYan Zheng return 0;
14325d4f98a2SYan Zheng }
14335d4f98a2SYan Zheng if (i == 1) {
14345d4f98a2SYan Zheng *level = i;
14355d4f98a2SYan Zheng return 0;
14365d4f98a2SYan Zheng }
14375d4f98a2SYan Zheng
14388ef385bbSJosef Bacik eb = btrfs_read_node_slot(eb, path->slots[i]);
14398ef385bbSJosef Bacik if (IS_ERR(eb))
144064c043deSLiu Bo return PTR_ERR(eb);
14415d4f98a2SYan Zheng BUG_ON(btrfs_header_level(eb) != i - 1);
14425d4f98a2SYan Zheng path->nodes[i - 1] = eb;
14435d4f98a2SYan Zheng path->slots[i - 1] = 0;
14445d4f98a2SYan Zheng }
14455d4f98a2SYan Zheng return 1;
14465d4f98a2SYan Zheng }
14475d4f98a2SYan Zheng
14485d4f98a2SYan Zheng /*
14495d4f98a2SYan Zheng * invalidate extent cache for file extents whose key in range of
14505d4f98a2SYan Zheng * [min_key, max_key)
14515d4f98a2SYan Zheng */
invalidate_extent_cache(struct btrfs_root * root,const struct btrfs_key * min_key,const struct btrfs_key * max_key)14525d4f98a2SYan Zheng static int invalidate_extent_cache(struct btrfs_root *root,
14534dc6ea8bSDavid Sterba const struct btrfs_key *min_key,
14544dc6ea8bSDavid Sterba const struct btrfs_key *max_key)
14555d4f98a2SYan Zheng {
14560b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
14575d4f98a2SYan Zheng struct inode *inode = NULL;
14585d4f98a2SYan Zheng u64 objectid;
14595d4f98a2SYan Zheng u64 start, end;
146033345d01SLi Zefan u64 ino;
14615d4f98a2SYan Zheng
14625d4f98a2SYan Zheng objectid = min_key->objectid;
14635d4f98a2SYan Zheng while (1) {
14649c5c9604SJosef Bacik struct extent_state *cached_state = NULL;
14659c5c9604SJosef Bacik
14665d4f98a2SYan Zheng cond_resched();
14675d4f98a2SYan Zheng iput(inode);
14685d4f98a2SYan Zheng
14695d4f98a2SYan Zheng if (objectid > max_key->objectid)
14705d4f98a2SYan Zheng break;
14715d4f98a2SYan Zheng
14725d4f98a2SYan Zheng inode = find_next_inode(root, objectid);
14735d4f98a2SYan Zheng if (!inode)
14745d4f98a2SYan Zheng break;
14754a0cc7caSNikolay Borisov ino = btrfs_ino(BTRFS_I(inode));
14765d4f98a2SYan Zheng
147733345d01SLi Zefan if (ino > max_key->objectid) {
14785d4f98a2SYan Zheng iput(inode);
14795d4f98a2SYan Zheng break;
14805d4f98a2SYan Zheng }
14815d4f98a2SYan Zheng
148233345d01SLi Zefan objectid = ino + 1;
14835d4f98a2SYan Zheng if (!S_ISREG(inode->i_mode))
14845d4f98a2SYan Zheng continue;
14855d4f98a2SYan Zheng
148633345d01SLi Zefan if (unlikely(min_key->objectid == ino)) {
14875d4f98a2SYan Zheng if (min_key->type > BTRFS_EXTENT_DATA_KEY)
14885d4f98a2SYan Zheng continue;
14895d4f98a2SYan Zheng if (min_key->type < BTRFS_EXTENT_DATA_KEY)
14905d4f98a2SYan Zheng start = 0;
14915d4f98a2SYan Zheng else {
14925d4f98a2SYan Zheng start = min_key->offset;
14930b246afaSJeff Mahoney WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
14945d4f98a2SYan Zheng }
14955d4f98a2SYan Zheng } else {
14965d4f98a2SYan Zheng start = 0;
14975d4f98a2SYan Zheng }
14985d4f98a2SYan Zheng
149933345d01SLi Zefan if (unlikely(max_key->objectid == ino)) {
15005d4f98a2SYan Zheng if (max_key->type < BTRFS_EXTENT_DATA_KEY)
15015d4f98a2SYan Zheng continue;
15025d4f98a2SYan Zheng if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
15035d4f98a2SYan Zheng end = (u64)-1;
15045d4f98a2SYan Zheng } else {
15055d4f98a2SYan Zheng if (max_key->offset == 0)
15065d4f98a2SYan Zheng continue;
15075d4f98a2SYan Zheng end = max_key->offset;
15080b246afaSJeff Mahoney WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
15095d4f98a2SYan Zheng end--;
15105d4f98a2SYan Zheng }
15115d4f98a2SYan Zheng } else {
15125d4f98a2SYan Zheng end = (u64)-1;
15135d4f98a2SYan Zheng }
15145d4f98a2SYan Zheng
1515fb12489bSMatthew Wilcox (Oracle) /* the lock_extent waits for read_folio to complete */
15169c5c9604SJosef Bacik lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
15174c0c8cfcSFilipe Manana btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, true);
15189c5c9604SJosef Bacik unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
15195d4f98a2SYan Zheng }
15205d4f98a2SYan Zheng return 0;
15215d4f98a2SYan Zheng }
15225d4f98a2SYan Zheng
find_next_key(struct btrfs_path * path,int level,struct btrfs_key * key)15235d4f98a2SYan Zheng static int find_next_key(struct btrfs_path *path, int level,
15245d4f98a2SYan Zheng struct btrfs_key *key)
15255d4f98a2SYan Zheng
15265d4f98a2SYan Zheng {
15275d4f98a2SYan Zheng while (level < BTRFS_MAX_LEVEL) {
15285d4f98a2SYan Zheng if (!path->nodes[level])
15295d4f98a2SYan Zheng break;
15305d4f98a2SYan Zheng if (path->slots[level] + 1 <
15315d4f98a2SYan Zheng btrfs_header_nritems(path->nodes[level])) {
15325d4f98a2SYan Zheng btrfs_node_key_to_cpu(path->nodes[level], key,
15335d4f98a2SYan Zheng path->slots[level] + 1);
15345d4f98a2SYan Zheng return 0;
15355d4f98a2SYan Zheng }
15365d4f98a2SYan Zheng level++;
15375d4f98a2SYan Zheng }
15385d4f98a2SYan Zheng return 1;
15395d4f98a2SYan Zheng }
15405d4f98a2SYan Zheng
15415d4f98a2SYan Zheng /*
1542d2311e69SQu Wenruo * Insert current subvolume into reloc_control::dirty_subvol_roots
1543d2311e69SQu Wenruo */
insert_dirty_subvol(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_root * root)1544ac54da6cSJosef Bacik static int insert_dirty_subvol(struct btrfs_trans_handle *trans,
1545d2311e69SQu Wenruo struct reloc_control *rc,
1546d2311e69SQu Wenruo struct btrfs_root *root)
1547d2311e69SQu Wenruo {
1548d2311e69SQu Wenruo struct btrfs_root *reloc_root = root->reloc_root;
1549d2311e69SQu Wenruo struct btrfs_root_item *reloc_root_item;
15507934133fSJosef Bacik int ret;
1551d2311e69SQu Wenruo
1552d2311e69SQu Wenruo /* @root must be a subvolume tree root with a valid reloc tree */
1553d2311e69SQu Wenruo ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1554d2311e69SQu Wenruo ASSERT(reloc_root);
1555d2311e69SQu Wenruo
1556d2311e69SQu Wenruo reloc_root_item = &reloc_root->root_item;
1557d2311e69SQu Wenruo memset(&reloc_root_item->drop_progress, 0,
1558d2311e69SQu Wenruo sizeof(reloc_root_item->drop_progress));
1559c8422684SDavid Sterba btrfs_set_root_drop_level(reloc_root_item, 0);
1560d2311e69SQu Wenruo btrfs_set_root_refs(reloc_root_item, 0);
15617934133fSJosef Bacik ret = btrfs_update_reloc_root(trans, root);
15627934133fSJosef Bacik if (ret)
15637934133fSJosef Bacik return ret;
1564d2311e69SQu Wenruo
1565d2311e69SQu Wenruo if (list_empty(&root->reloc_dirty_list)) {
156600246528SJosef Bacik btrfs_grab_root(root);
1567d2311e69SQu Wenruo list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
1568d2311e69SQu Wenruo }
1569ac54da6cSJosef Bacik
1570ac54da6cSJosef Bacik return 0;
1571d2311e69SQu Wenruo }
1572d2311e69SQu Wenruo
clean_dirty_subvols(struct reloc_control * rc)1573d2311e69SQu Wenruo static int clean_dirty_subvols(struct reloc_control *rc)
1574d2311e69SQu Wenruo {
1575d2311e69SQu Wenruo struct btrfs_root *root;
1576d2311e69SQu Wenruo struct btrfs_root *next;
1577d2311e69SQu Wenruo int ret = 0;
157830d40577SQu Wenruo int ret2;
1579d2311e69SQu Wenruo
1580d2311e69SQu Wenruo list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
1581d2311e69SQu Wenruo reloc_dirty_list) {
158230d40577SQu Wenruo if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
158330d40577SQu Wenruo /* Merged subvolume, cleanup its reloc root */
1584d2311e69SQu Wenruo struct btrfs_root *reloc_root = root->reloc_root;
1585d2311e69SQu Wenruo
1586d2311e69SQu Wenruo list_del_init(&root->reloc_dirty_list);
1587d2311e69SQu Wenruo root->reloc_root = NULL;
15886282675eSQu Wenruo /*
15896282675eSQu Wenruo * Need barrier to ensure clear_bit() only happens after
15906282675eSQu Wenruo * root->reloc_root = NULL. Pairs with have_reloc_root.
15916282675eSQu Wenruo */
15926282675eSQu Wenruo smp_wmb();
15931fac4a54SQu Wenruo clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
1594f28de8d8SJosef Bacik if (reloc_root) {
1595f44deb74SJosef Bacik /*
1596f44deb74SJosef Bacik * btrfs_drop_snapshot drops our ref we hold for
1597f44deb74SJosef Bacik * ->reloc_root. If it fails however we must
1598f44deb74SJosef Bacik * drop the ref ourselves.
1599f44deb74SJosef Bacik */
1600f28de8d8SJosef Bacik ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
1601f44deb74SJosef Bacik if (ret2 < 0) {
1602f44deb74SJosef Bacik btrfs_put_root(reloc_root);
1603f44deb74SJosef Bacik if (!ret)
1604f28de8d8SJosef Bacik ret = ret2;
1605f28de8d8SJosef Bacik }
1606f44deb74SJosef Bacik }
160700246528SJosef Bacik btrfs_put_root(root);
160830d40577SQu Wenruo } else {
160930d40577SQu Wenruo /* Orphan reloc tree, just clean it up */
16100078a9f9SNikolay Borisov ret2 = btrfs_drop_snapshot(root, 0, 1);
1611f44deb74SJosef Bacik if (ret2 < 0) {
1612f44deb74SJosef Bacik btrfs_put_root(root);
1613f44deb74SJosef Bacik if (!ret)
161430d40577SQu Wenruo ret = ret2;
161530d40577SQu Wenruo }
1616d2311e69SQu Wenruo }
1617f44deb74SJosef Bacik }
1618d2311e69SQu Wenruo return ret;
1619d2311e69SQu Wenruo }
1620d2311e69SQu Wenruo
1621d2311e69SQu Wenruo /*
16225d4f98a2SYan Zheng * merge the relocated tree blocks in reloc tree with corresponding
16235d4f98a2SYan Zheng * fs tree.
16245d4f98a2SYan Zheng */
merge_reloc_root(struct reloc_control * rc,struct btrfs_root * root)16255d4f98a2SYan Zheng static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
16265d4f98a2SYan Zheng struct btrfs_root *root)
16275d4f98a2SYan Zheng {
16280b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
16295d4f98a2SYan Zheng struct btrfs_key key;
16305d4f98a2SYan Zheng struct btrfs_key next_key;
16319e6a0c52SJosef Bacik struct btrfs_trans_handle *trans = NULL;
16325d4f98a2SYan Zheng struct btrfs_root *reloc_root;
16335d4f98a2SYan Zheng struct btrfs_root_item *root_item;
16345d4f98a2SYan Zheng struct btrfs_path *path;
16353fd0a558SYan, Zheng struct extent_buffer *leaf;
1636fca3a45dSJosef Bacik int reserve_level;
16375d4f98a2SYan Zheng int level;
16385d4f98a2SYan Zheng int max_level;
16395d4f98a2SYan Zheng int replaced = 0;
1640c6a592f2SNikolay Borisov int ret = 0;
16413fd0a558SYan, Zheng u32 min_reserved;
16425d4f98a2SYan Zheng
16435d4f98a2SYan Zheng path = btrfs_alloc_path();
16445d4f98a2SYan Zheng if (!path)
16455d4f98a2SYan Zheng return -ENOMEM;
1646e4058b54SDavid Sterba path->reada = READA_FORWARD;
16475d4f98a2SYan Zheng
16485d4f98a2SYan Zheng reloc_root = root->reloc_root;
16495d4f98a2SYan Zheng root_item = &reloc_root->root_item;
16505d4f98a2SYan Zheng
16515d4f98a2SYan Zheng if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
16525d4f98a2SYan Zheng level = btrfs_root_level(root_item);
165367439dadSDavid Sterba atomic_inc(&reloc_root->node->refs);
16545d4f98a2SYan Zheng path->nodes[level] = reloc_root->node;
16555d4f98a2SYan Zheng path->slots[level] = 0;
16565d4f98a2SYan Zheng } else {
16575d4f98a2SYan Zheng btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
16585d4f98a2SYan Zheng
1659c8422684SDavid Sterba level = btrfs_root_drop_level(root_item);
16605d4f98a2SYan Zheng BUG_ON(level == 0);
16615d4f98a2SYan Zheng path->lowest_level = level;
16625d4f98a2SYan Zheng ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
166333c66f43SYan Zheng path->lowest_level = 0;
16645d4f98a2SYan Zheng if (ret < 0) {
16655d4f98a2SYan Zheng btrfs_free_path(path);
16665d4f98a2SYan Zheng return ret;
16675d4f98a2SYan Zheng }
16685d4f98a2SYan Zheng
16695d4f98a2SYan Zheng btrfs_node_key_to_cpu(path->nodes[level], &next_key,
16705d4f98a2SYan Zheng path->slots[level]);
16715d4f98a2SYan Zheng WARN_ON(memcmp(&key, &next_key, sizeof(key)));
16725d4f98a2SYan Zheng
16735d4f98a2SYan Zheng btrfs_unlock_up_safe(path, 0);
16745d4f98a2SYan Zheng }
16755d4f98a2SYan Zheng
167644d354abSQu Wenruo /*
167744d354abSQu Wenruo * In merge_reloc_root(), we modify the upper level pointer to swap the
167844d354abSQu Wenruo * tree blocks between reloc tree and subvolume tree. Thus for tree
167944d354abSQu Wenruo * block COW, we COW at most from level 1 to root level for each tree.
168044d354abSQu Wenruo *
168144d354abSQu Wenruo * Thus the needed metadata size is at most root_level * nodesize,
168244d354abSQu Wenruo * and * 2 since we have two trees to COW.
168344d354abSQu Wenruo */
1684fca3a45dSJosef Bacik reserve_level = max_t(int, 1, btrfs_root_level(root_item));
1685fca3a45dSJosef Bacik min_reserved = fs_info->nodesize * reserve_level * 2;
16865d4f98a2SYan Zheng memset(&next_key, 0, sizeof(next_key));
16875d4f98a2SYan Zheng
16885d4f98a2SYan Zheng while (1) {
16899270501cSJosef Bacik ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv,
16909270501cSJosef Bacik min_reserved,
169144d354abSQu Wenruo BTRFS_RESERVE_FLUSH_LIMIT);
1692c6a592f2SNikolay Borisov if (ret)
16939e6a0c52SJosef Bacik goto out;
16949e6a0c52SJosef Bacik trans = btrfs_start_transaction(root, 0);
16959e6a0c52SJosef Bacik if (IS_ERR(trans)) {
1696c6a592f2SNikolay Borisov ret = PTR_ERR(trans);
16979e6a0c52SJosef Bacik trans = NULL;
16989e6a0c52SJosef Bacik goto out;
16999e6a0c52SJosef Bacik }
17002abc726aSJosef Bacik
17012abc726aSJosef Bacik /*
17022abc726aSJosef Bacik * At this point we no longer have a reloc_control, so we can't
17032abc726aSJosef Bacik * depend on btrfs_init_reloc_root to update our last_trans.
17042abc726aSJosef Bacik *
17052abc726aSJosef Bacik * But that's ok, we started the trans handle on our
17062abc726aSJosef Bacik * corresponding fs_root, which means it's been added to the
17072abc726aSJosef Bacik * dirty list. At commit time we'll still call
17082abc726aSJosef Bacik * btrfs_update_reloc_root() and update our root item
17092abc726aSJosef Bacik * appropriately.
17102abc726aSJosef Bacik */
17112abc726aSJosef Bacik reloc_root->last_trans = trans->transid;
17129e6a0c52SJosef Bacik trans->block_rsv = rc->block_rsv;
17133fd0a558SYan, Zheng
17143fd0a558SYan, Zheng replaced = 0;
17155d4f98a2SYan Zheng max_level = level;
17165d4f98a2SYan Zheng
17175d4f98a2SYan Zheng ret = walk_down_reloc_tree(reloc_root, path, &level);
1718c6a592f2SNikolay Borisov if (ret < 0)
17195d4f98a2SYan Zheng goto out;
17205d4f98a2SYan Zheng if (ret > 0)
17215d4f98a2SYan Zheng break;
17225d4f98a2SYan Zheng
17235d4f98a2SYan Zheng if (!find_next_key(path, level, &key) &&
17245d4f98a2SYan Zheng btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
17255d4f98a2SYan Zheng ret = 0;
17265d4f98a2SYan Zheng } else {
17273d0174f7SQu Wenruo ret = replace_path(trans, rc, root, reloc_root, path,
17283fd0a558SYan, Zheng &next_key, level, max_level);
17295d4f98a2SYan Zheng }
1730c6a592f2SNikolay Borisov if (ret < 0)
17315d4f98a2SYan Zheng goto out;
17325d4f98a2SYan Zheng if (ret > 0) {
17335d4f98a2SYan Zheng level = ret;
17345d4f98a2SYan Zheng btrfs_node_key_to_cpu(path->nodes[level], &key,
17355d4f98a2SYan Zheng path->slots[level]);
17365d4f98a2SYan Zheng replaced = 1;
17375d4f98a2SYan Zheng }
17385d4f98a2SYan Zheng
17395d4f98a2SYan Zheng ret = walk_up_reloc_tree(reloc_root, path, &level);
17405d4f98a2SYan Zheng if (ret > 0)
17415d4f98a2SYan Zheng break;
17425d4f98a2SYan Zheng
17435d4f98a2SYan Zheng BUG_ON(level == 0);
17445d4f98a2SYan Zheng /*
17455d4f98a2SYan Zheng * save the merging progress in the drop_progress.
17465d4f98a2SYan Zheng * this is OK since root refs == 1 in this case.
17475d4f98a2SYan Zheng */
17485d4f98a2SYan Zheng btrfs_node_key(path->nodes[level], &root_item->drop_progress,
17495d4f98a2SYan Zheng path->slots[level]);
1750c8422684SDavid Sterba btrfs_set_root_drop_level(root_item, level);
17515d4f98a2SYan Zheng
17523a45bb20SJeff Mahoney btrfs_end_transaction_throttle(trans);
17539e6a0c52SJosef Bacik trans = NULL;
17545d4f98a2SYan Zheng
17552ff7e61eSJeff Mahoney btrfs_btree_balance_dirty(fs_info);
17565d4f98a2SYan Zheng
17575d4f98a2SYan Zheng if (replaced && rc->stage == UPDATE_DATA_PTRS)
17585d4f98a2SYan Zheng invalidate_extent_cache(root, &key, &next_key);
17595d4f98a2SYan Zheng }
17605d4f98a2SYan Zheng
17615d4f98a2SYan Zheng /*
17625d4f98a2SYan Zheng * handle the case only one block in the fs tree need to be
17635d4f98a2SYan Zheng * relocated and the block is tree root.
17645d4f98a2SYan Zheng */
17655d4f98a2SYan Zheng leaf = btrfs_lock_root_node(root);
17669631e4ccSJosef Bacik ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf,
17679631e4ccSJosef Bacik BTRFS_NESTING_COW);
17685d4f98a2SYan Zheng btrfs_tree_unlock(leaf);
17695d4f98a2SYan Zheng free_extent_buffer(leaf);
17705d4f98a2SYan Zheng out:
17715d4f98a2SYan Zheng btrfs_free_path(path);
17725d4f98a2SYan Zheng
1773ac54da6cSJosef Bacik if (ret == 0) {
1774ac54da6cSJosef Bacik ret = insert_dirty_subvol(trans, rc, root);
1775ac54da6cSJosef Bacik if (ret)
1776ac54da6cSJosef Bacik btrfs_abort_transaction(trans, ret);
1777ac54da6cSJosef Bacik }
17785d4f98a2SYan Zheng
17799e6a0c52SJosef Bacik if (trans)
17803a45bb20SJeff Mahoney btrfs_end_transaction_throttle(trans);
17815d4f98a2SYan Zheng
17822ff7e61eSJeff Mahoney btrfs_btree_balance_dirty(fs_info);
17835d4f98a2SYan Zheng
17845d4f98a2SYan Zheng if (replaced && rc->stage == UPDATE_DATA_PTRS)
17855d4f98a2SYan Zheng invalidate_extent_cache(root, &key, &next_key);
17865d4f98a2SYan Zheng
1787c6a592f2SNikolay Borisov return ret;
17885d4f98a2SYan Zheng }
17895d4f98a2SYan Zheng
17903fd0a558SYan, Zheng static noinline_for_stack
prepare_to_merge(struct reloc_control * rc,int err)17913fd0a558SYan, Zheng int prepare_to_merge(struct reloc_control *rc, int err)
17925d4f98a2SYan Zheng {
17933fd0a558SYan, Zheng struct btrfs_root *root = rc->extent_root;
17940b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
17953fd0a558SYan, Zheng struct btrfs_root *reloc_root;
17965d4f98a2SYan Zheng struct btrfs_trans_handle *trans;
17973fd0a558SYan, Zheng LIST_HEAD(reloc_roots);
17983fd0a558SYan, Zheng u64 num_bytes = 0;
17993fd0a558SYan, Zheng int ret;
18003fd0a558SYan, Zheng
18010b246afaSJeff Mahoney mutex_lock(&fs_info->reloc_mutex);
18020b246afaSJeff Mahoney rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
18033fd0a558SYan, Zheng rc->merging_rsv_size += rc->nodes_relocated * 2;
18040b246afaSJeff Mahoney mutex_unlock(&fs_info->reloc_mutex);
18057585717fSChris Mason
18063fd0a558SYan, Zheng again:
18073fd0a558SYan, Zheng if (!err) {
18083fd0a558SYan, Zheng num_bytes = rc->merging_rsv_size;
18099270501cSJosef Bacik ret = btrfs_block_rsv_add(fs_info, rc->block_rsv, num_bytes,
181008e007d2SMiao Xie BTRFS_RESERVE_FLUSH_ALL);
18113fd0a558SYan, Zheng if (ret)
18123fd0a558SYan, Zheng err = ret;
18133fd0a558SYan, Zheng }
18143fd0a558SYan, Zheng
18157a7eaa40SJosef Bacik trans = btrfs_join_transaction(rc->extent_root);
18163612b495STsutomu Itoh if (IS_ERR(trans)) {
18173612b495STsutomu Itoh if (!err)
18182ff7e61eSJeff Mahoney btrfs_block_rsv_release(fs_info, rc->block_rsv,
181963f018beSNikolay Borisov num_bytes, NULL);
18203612b495STsutomu Itoh return PTR_ERR(trans);
18213612b495STsutomu Itoh }
18223fd0a558SYan, Zheng
18233fd0a558SYan, Zheng if (!err) {
18243fd0a558SYan, Zheng if (num_bytes != rc->merging_rsv_size) {
18253a45bb20SJeff Mahoney btrfs_end_transaction(trans);
18262ff7e61eSJeff Mahoney btrfs_block_rsv_release(fs_info, rc->block_rsv,
182763f018beSNikolay Borisov num_bytes, NULL);
18283fd0a558SYan, Zheng goto again;
18293fd0a558SYan, Zheng }
18303fd0a558SYan, Zheng }
18313fd0a558SYan, Zheng
18323fd0a558SYan, Zheng rc->merge_reloc_tree = 1;
18333fd0a558SYan, Zheng
18343fd0a558SYan, Zheng while (!list_empty(&rc->reloc_roots)) {
18353fd0a558SYan, Zheng reloc_root = list_entry(rc->reloc_roots.next,
18363fd0a558SYan, Zheng struct btrfs_root, root_list);
18373fd0a558SYan, Zheng list_del_init(&reloc_root->root_list);
18383fd0a558SYan, Zheng
1839a820feb5SDavid Sterba root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1840a820feb5SDavid Sterba false);
1841e0b085b0SJosef Bacik if (IS_ERR(root)) {
1842e0b085b0SJosef Bacik /*
1843e0b085b0SJosef Bacik * Even if we have an error we need this reloc root
1844e0b085b0SJosef Bacik * back on our list so we can clean up properly.
1845e0b085b0SJosef Bacik */
1846e0b085b0SJosef Bacik list_add(&reloc_root->root_list, &reloc_roots);
1847e0b085b0SJosef Bacik btrfs_abort_transaction(trans, (int)PTR_ERR(root));
1848e0b085b0SJosef Bacik if (!err)
1849e0b085b0SJosef Bacik err = PTR_ERR(root);
1850e0b085b0SJosef Bacik break;
1851e0b085b0SJosef Bacik }
185205d7ce50SQu Wenruo
185305d7ce50SQu Wenruo if (unlikely(root->reloc_root != reloc_root)) {
185405d7ce50SQu Wenruo if (root->reloc_root) {
185505d7ce50SQu Wenruo btrfs_err(fs_info,
185605d7ce50SQu Wenruo "reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu",
185705d7ce50SQu Wenruo root->root_key.objectid,
185805d7ce50SQu Wenruo root->reloc_root->root_key.objectid,
185905d7ce50SQu Wenruo root->reloc_root->root_key.type,
186005d7ce50SQu Wenruo root->reloc_root->root_key.offset,
186105d7ce50SQu Wenruo btrfs_root_generation(
186205d7ce50SQu Wenruo &root->reloc_root->root_item),
186305d7ce50SQu Wenruo reloc_root->root_key.objectid,
186405d7ce50SQu Wenruo reloc_root->root_key.type,
186505d7ce50SQu Wenruo reloc_root->root_key.offset,
186605d7ce50SQu Wenruo btrfs_root_generation(
186705d7ce50SQu Wenruo &reloc_root->root_item));
186805d7ce50SQu Wenruo } else {
186905d7ce50SQu Wenruo btrfs_err(fs_info,
187005d7ce50SQu Wenruo "reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu",
187105d7ce50SQu Wenruo root->root_key.objectid,
187205d7ce50SQu Wenruo reloc_root->root_key.objectid,
187305d7ce50SQu Wenruo reloc_root->root_key.type,
187405d7ce50SQu Wenruo reloc_root->root_key.offset,
187505d7ce50SQu Wenruo btrfs_root_generation(
187605d7ce50SQu Wenruo &reloc_root->root_item));
187705d7ce50SQu Wenruo }
187805d7ce50SQu Wenruo list_add(&reloc_root->root_list, &reloc_roots);
187905d7ce50SQu Wenruo btrfs_put_root(root);
188005d7ce50SQu Wenruo btrfs_abort_transaction(trans, -EUCLEAN);
188105d7ce50SQu Wenruo if (!err)
188205d7ce50SQu Wenruo err = -EUCLEAN;
188305d7ce50SQu Wenruo break;
188405d7ce50SQu Wenruo }
18853fd0a558SYan, Zheng
18863fd0a558SYan, Zheng /*
18873fd0a558SYan, Zheng * set reference count to 1, so btrfs_recover_relocation
18883fd0a558SYan, Zheng * knows it should resumes merging
18893fd0a558SYan, Zheng */
18903fd0a558SYan, Zheng if (!err)
18913fd0a558SYan, Zheng btrfs_set_root_refs(&reloc_root->root_item, 1);
1892bbae13f8SJosef Bacik ret = btrfs_update_reloc_root(trans, root);
18933fd0a558SYan, Zheng
1894bbae13f8SJosef Bacik /*
1895bbae13f8SJosef Bacik * Even if we have an error we need this reloc root back on our
1896bbae13f8SJosef Bacik * list so we can clean up properly.
1897bbae13f8SJosef Bacik */
18983fd0a558SYan, Zheng list_add(&reloc_root->root_list, &reloc_roots);
189900246528SJosef Bacik btrfs_put_root(root);
1900bbae13f8SJosef Bacik
1901bbae13f8SJosef Bacik if (ret) {
1902bbae13f8SJosef Bacik btrfs_abort_transaction(trans, ret);
1903bbae13f8SJosef Bacik if (!err)
1904bbae13f8SJosef Bacik err = ret;
1905bbae13f8SJosef Bacik break;
1906bbae13f8SJosef Bacik }
19073fd0a558SYan, Zheng }
19083fd0a558SYan, Zheng
19093fd0a558SYan, Zheng list_splice(&reloc_roots, &rc->reloc_roots);
19103fd0a558SYan, Zheng
19113fd0a558SYan, Zheng if (!err)
1912fb686c68SJosef Bacik err = btrfs_commit_transaction(trans);
19133fd0a558SYan, Zheng else
19143a45bb20SJeff Mahoney btrfs_end_transaction(trans);
19153fd0a558SYan, Zheng return err;
19163fd0a558SYan, Zheng }
19173fd0a558SYan, Zheng
19183fd0a558SYan, Zheng static noinline_for_stack
free_reloc_roots(struct list_head * list)1919aca1bba6SLiu Bo void free_reloc_roots(struct list_head *list)
1920aca1bba6SLiu Bo {
1921a7571232SNikolay Borisov struct btrfs_root *reloc_root, *tmp;
1922aca1bba6SLiu Bo
1923a7571232SNikolay Borisov list_for_each_entry_safe(reloc_root, tmp, list, root_list)
1924bb166d72SNaohiro Aota __del_reloc_root(reloc_root);
1925aca1bba6SLiu Bo }
1926aca1bba6SLiu Bo
1927aca1bba6SLiu Bo static noinline_for_stack
merge_reloc_roots(struct reloc_control * rc)192894404e82SDavid Sterba void merge_reloc_roots(struct reloc_control *rc)
19293fd0a558SYan, Zheng {
19300b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
19315d4f98a2SYan Zheng struct btrfs_root *root;
19325d4f98a2SYan Zheng struct btrfs_root *reloc_root;
19333fd0a558SYan, Zheng LIST_HEAD(reloc_roots);
19343fd0a558SYan, Zheng int found = 0;
1935aca1bba6SLiu Bo int ret = 0;
19363fd0a558SYan, Zheng again:
19373fd0a558SYan, Zheng root = rc->extent_root;
19387585717fSChris Mason
19397585717fSChris Mason /*
19407585717fSChris Mason * this serializes us with btrfs_record_root_in_transaction,
19417585717fSChris Mason * we have to make sure nobody is in the middle of
19427585717fSChris Mason * adding their roots to the list while we are
19437585717fSChris Mason * doing this splice
19447585717fSChris Mason */
19450b246afaSJeff Mahoney mutex_lock(&fs_info->reloc_mutex);
19463fd0a558SYan, Zheng list_splice_init(&rc->reloc_roots, &reloc_roots);
19470b246afaSJeff Mahoney mutex_unlock(&fs_info->reloc_mutex);
19485d4f98a2SYan Zheng
19493fd0a558SYan, Zheng while (!list_empty(&reloc_roots)) {
19503fd0a558SYan, Zheng found = 1;
19513fd0a558SYan, Zheng reloc_root = list_entry(reloc_roots.next,
19523fd0a558SYan, Zheng struct btrfs_root, root_list);
19535d4f98a2SYan Zheng
1954a820feb5SDavid Sterba root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
1955a820feb5SDavid Sterba false);
19565d4f98a2SYan Zheng if (btrfs_root_refs(&reloc_root->root_item) > 0) {
195705d7ce50SQu Wenruo if (WARN_ON(IS_ERR(root))) {
195824213fa4SJosef Bacik /*
195924213fa4SJosef Bacik * For recovery we read the fs roots on mount,
196024213fa4SJosef Bacik * and if we didn't find the root then we marked
196124213fa4SJosef Bacik * the reloc root as a garbage root. For normal
196224213fa4SJosef Bacik * relocation obviously the root should exist in
196324213fa4SJosef Bacik * memory. However there's no reason we can't
196424213fa4SJosef Bacik * handle the error properly here just in case.
196524213fa4SJosef Bacik */
196624213fa4SJosef Bacik ret = PTR_ERR(root);
196724213fa4SJosef Bacik goto out;
196824213fa4SJosef Bacik }
196905d7ce50SQu Wenruo if (WARN_ON(root->reloc_root != reloc_root)) {
197024213fa4SJosef Bacik /*
197105d7ce50SQu Wenruo * This can happen if on-disk metadata has some
197205d7ce50SQu Wenruo * corruption, e.g. bad reloc tree key offset.
197324213fa4SJosef Bacik */
197424213fa4SJosef Bacik ret = -EINVAL;
197524213fa4SJosef Bacik goto out;
197624213fa4SJosef Bacik }
19773fd0a558SYan, Zheng ret = merge_reloc_root(rc, root);
197800246528SJosef Bacik btrfs_put_root(root);
1979b37b39cdSJosef Bacik if (ret) {
198025e293c2SWang Shilong if (list_empty(&reloc_root->root_list))
198125e293c2SWang Shilong list_add_tail(&reloc_root->root_list,
198225e293c2SWang Shilong &reloc_roots);
1983aca1bba6SLiu Bo goto out;
1984b37b39cdSJosef Bacik }
19853fd0a558SYan, Zheng } else {
198651415b6cSQu Wenruo if (!IS_ERR(root)) {
198751415b6cSQu Wenruo if (root->reloc_root == reloc_root) {
198851415b6cSQu Wenruo root->reloc_root = NULL;
198951415b6cSQu Wenruo btrfs_put_root(reloc_root);
199051415b6cSQu Wenruo }
19911dae7e0eSQu Wenruo clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE,
19921dae7e0eSQu Wenruo &root->state);
199351415b6cSQu Wenruo btrfs_put_root(root);
199451415b6cSQu Wenruo }
199551415b6cSQu Wenruo
19963fd0a558SYan, Zheng list_del_init(&reloc_root->root_list);
199730d40577SQu Wenruo /* Don't forget to queue this reloc root for cleanup */
199830d40577SQu Wenruo list_add_tail(&reloc_root->reloc_dirty_list,
199930d40577SQu Wenruo &rc->dirty_subvol_roots);
20003fd0a558SYan, Zheng }
20015d4f98a2SYan Zheng }
20025d4f98a2SYan Zheng
20033fd0a558SYan, Zheng if (found) {
20043fd0a558SYan, Zheng found = 0;
20053fd0a558SYan, Zheng goto again;
20065d4f98a2SYan Zheng }
2007aca1bba6SLiu Bo out:
2008aca1bba6SLiu Bo if (ret) {
20090b246afaSJeff Mahoney btrfs_handle_fs_error(fs_info, ret, NULL);
2010aca1bba6SLiu Bo free_reloc_roots(&reloc_roots);
2011467bb1d2SWang Shilong
2012467bb1d2SWang Shilong /* new reloc root may be added */
20130b246afaSJeff Mahoney mutex_lock(&fs_info->reloc_mutex);
2014467bb1d2SWang Shilong list_splice_init(&rc->reloc_roots, &reloc_roots);
20150b246afaSJeff Mahoney mutex_unlock(&fs_info->reloc_mutex);
2016467bb1d2SWang Shilong free_reloc_roots(&reloc_roots);
2017aca1bba6SLiu Bo }
2018aca1bba6SLiu Bo
20197b7b7431SJosef Bacik /*
20207b7b7431SJosef Bacik * We used to have
20217b7b7431SJosef Bacik *
20227b7b7431SJosef Bacik * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
20237b7b7431SJosef Bacik *
20247b7b7431SJosef Bacik * here, but it's wrong. If we fail to start the transaction in
20257b7b7431SJosef Bacik * prepare_to_merge() we will have only 0 ref reloc roots, none of which
20267b7b7431SJosef Bacik * have actually been removed from the reloc_root_tree rb tree. This is
20277b7b7431SJosef Bacik * fine because we're bailing here, and we hold a reference on the root
20287b7b7431SJosef Bacik * for the list that holds it, so these roots will be cleaned up when we
20297b7b7431SJosef Bacik * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root
20307b7b7431SJosef Bacik * will be cleaned up on unmount.
20317b7b7431SJosef Bacik *
20327b7b7431SJosef Bacik * The remaining nodes will be cleaned up by free_reloc_control.
20337b7b7431SJosef Bacik */
20345d4f98a2SYan Zheng }
20355d4f98a2SYan Zheng
free_block_list(struct rb_root * blocks)20365d4f98a2SYan Zheng static void free_block_list(struct rb_root *blocks)
20375d4f98a2SYan Zheng {
20385d4f98a2SYan Zheng struct tree_block *block;
20395d4f98a2SYan Zheng struct rb_node *rb_node;
20405d4f98a2SYan Zheng while ((rb_node = rb_first(blocks))) {
20415d4f98a2SYan Zheng block = rb_entry(rb_node, struct tree_block, rb_node);
20425d4f98a2SYan Zheng rb_erase(rb_node, blocks);
20435d4f98a2SYan Zheng kfree(block);
20445d4f98a2SYan Zheng }
20455d4f98a2SYan Zheng }
20465d4f98a2SYan Zheng
record_reloc_root_in_trans(struct btrfs_trans_handle * trans,struct btrfs_root * reloc_root)20475d4f98a2SYan Zheng static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
20485d4f98a2SYan Zheng struct btrfs_root *reloc_root)
20495d4f98a2SYan Zheng {
20500b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = reloc_root->fs_info;
20515d4f98a2SYan Zheng struct btrfs_root *root;
2052442b1ac5SJosef Bacik int ret;
20535d4f98a2SYan Zheng
20545d4f98a2SYan Zheng if (reloc_root->last_trans == trans->transid)
20555d4f98a2SYan Zheng return 0;
20565d4f98a2SYan Zheng
2057a820feb5SDavid Sterba root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
2058404bccbcSJosef Bacik
2059404bccbcSJosef Bacik /*
2060404bccbcSJosef Bacik * This should succeed, since we can't have a reloc root without having
2061404bccbcSJosef Bacik * already looked up the actual root and created the reloc root for this
2062404bccbcSJosef Bacik * root.
2063404bccbcSJosef Bacik *
2064404bccbcSJosef Bacik * However if there's some sort of corruption where we have a ref to a
2065404bccbcSJosef Bacik * reloc root without a corresponding root this could return ENOENT.
2066404bccbcSJosef Bacik */
2067404bccbcSJosef Bacik if (IS_ERR(root)) {
2068404bccbcSJosef Bacik ASSERT(0);
2069404bccbcSJosef Bacik return PTR_ERR(root);
2070404bccbcSJosef Bacik }
2071404bccbcSJosef Bacik if (root->reloc_root != reloc_root) {
2072404bccbcSJosef Bacik ASSERT(0);
2073404bccbcSJosef Bacik btrfs_err(fs_info,
2074404bccbcSJosef Bacik "root %llu has two reloc roots associated with it",
2075404bccbcSJosef Bacik reloc_root->root_key.offset);
2076404bccbcSJosef Bacik btrfs_put_root(root);
2077404bccbcSJosef Bacik return -EUCLEAN;
2078404bccbcSJosef Bacik }
2079442b1ac5SJosef Bacik ret = btrfs_record_root_in_trans(trans, root);
208000246528SJosef Bacik btrfs_put_root(root);
20815d4f98a2SYan Zheng
2082442b1ac5SJosef Bacik return ret;
20835d4f98a2SYan Zheng }
20845d4f98a2SYan Zheng
20853fd0a558SYan, Zheng static noinline_for_stack
select_reloc_root(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_backref_node * node,struct btrfs_backref_edge * edges[])20863fd0a558SYan, Zheng struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
20873fd0a558SYan, Zheng struct reloc_control *rc,
2088a26195a5SQu Wenruo struct btrfs_backref_node *node,
2089a26195a5SQu Wenruo struct btrfs_backref_edge *edges[])
20905d4f98a2SYan Zheng {
2091a26195a5SQu Wenruo struct btrfs_backref_node *next;
20925d4f98a2SYan Zheng struct btrfs_root *root;
20933fd0a558SYan, Zheng int index = 0;
209492de551bSJosef Bacik int ret;
20953fd0a558SYan, Zheng
20965d4f98a2SYan Zheng next = node;
20975d4f98a2SYan Zheng while (1) {
20985d4f98a2SYan Zheng cond_resched();
20995d4f98a2SYan Zheng next = walk_up_backref(next, edges, &index);
21005d4f98a2SYan Zheng root = next->root;
21018ee66afeSJosef Bacik
21028ee66afeSJosef Bacik /*
21038ee66afeSJosef Bacik * If there is no root, then our references for this block are
21048ee66afeSJosef Bacik * incomplete, as we should be able to walk all the way up to a
21058ee66afeSJosef Bacik * block that is owned by a root.
21068ee66afeSJosef Bacik *
21078ee66afeSJosef Bacik * This path is only for SHAREABLE roots, so if we come upon a
21088ee66afeSJosef Bacik * non-SHAREABLE root then we have backrefs that resolve
21098ee66afeSJosef Bacik * improperly.
21108ee66afeSJosef Bacik *
21118ee66afeSJosef Bacik * Both of these cases indicate file system corruption, or a bug
21128ee66afeSJosef Bacik * in the backref walking code.
21138ee66afeSJosef Bacik */
21148ee66afeSJosef Bacik if (!root) {
21158ee66afeSJosef Bacik ASSERT(0);
21168ee66afeSJosef Bacik btrfs_err(trans->fs_info,
21178ee66afeSJosef Bacik "bytenr %llu doesn't have a backref path ending in a root",
21188ee66afeSJosef Bacik node->bytenr);
21198ee66afeSJosef Bacik return ERR_PTR(-EUCLEAN);
21208ee66afeSJosef Bacik }
21218ee66afeSJosef Bacik if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
21228ee66afeSJosef Bacik ASSERT(0);
21238ee66afeSJosef Bacik btrfs_err(trans->fs_info,
21248ee66afeSJosef Bacik "bytenr %llu has multiple refs with one ending in a non-shareable root",
21258ee66afeSJosef Bacik node->bytenr);
21268ee66afeSJosef Bacik return ERR_PTR(-EUCLEAN);
21278ee66afeSJosef Bacik }
21285d4f98a2SYan Zheng
21295d4f98a2SYan Zheng if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
213092de551bSJosef Bacik ret = record_reloc_root_in_trans(trans, root);
213192de551bSJosef Bacik if (ret)
213292de551bSJosef Bacik return ERR_PTR(ret);
21335d4f98a2SYan Zheng break;
21345d4f98a2SYan Zheng }
21355d4f98a2SYan Zheng
213692de551bSJosef Bacik ret = btrfs_record_root_in_trans(trans, root);
213792de551bSJosef Bacik if (ret)
213892de551bSJosef Bacik return ERR_PTR(ret);
21393fd0a558SYan, Zheng root = root->reloc_root;
21403fd0a558SYan, Zheng
214139200e59SJosef Bacik /*
214239200e59SJosef Bacik * We could have raced with another thread which failed, so
214339200e59SJosef Bacik * root->reloc_root may not be set, return ENOENT in this case.
214439200e59SJosef Bacik */
214539200e59SJosef Bacik if (!root)
214639200e59SJosef Bacik return ERR_PTR(-ENOENT);
214739200e59SJosef Bacik
21483fd0a558SYan, Zheng if (next->new_bytenr != root->node->start) {
21498ee66afeSJosef Bacik /*
21508ee66afeSJosef Bacik * We just created the reloc root, so we shouldn't have
21518ee66afeSJosef Bacik * ->new_bytenr set and this shouldn't be in the changed
21528ee66afeSJosef Bacik * list. If it is then we have multiple roots pointing
21538ee66afeSJosef Bacik * at the same bytenr which indicates corruption, or
21548ee66afeSJosef Bacik * we've made a mistake in the backref walking code.
21558ee66afeSJosef Bacik */
21568ee66afeSJosef Bacik ASSERT(next->new_bytenr == 0);
21578ee66afeSJosef Bacik ASSERT(list_empty(&next->list));
21588ee66afeSJosef Bacik if (next->new_bytenr || !list_empty(&next->list)) {
21598ee66afeSJosef Bacik btrfs_err(trans->fs_info,
21608ee66afeSJosef Bacik "bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
21618ee66afeSJosef Bacik node->bytenr, next->bytenr);
21628ee66afeSJosef Bacik return ERR_PTR(-EUCLEAN);
21638ee66afeSJosef Bacik }
21648ee66afeSJosef Bacik
21653fd0a558SYan, Zheng next->new_bytenr = root->node->start;
216600246528SJosef Bacik btrfs_put_root(next->root);
216700246528SJosef Bacik next->root = btrfs_grab_root(root);
21680b530bc5SJosef Bacik ASSERT(next->root);
21693fd0a558SYan, Zheng list_add_tail(&next->list,
21703fd0a558SYan, Zheng &rc->backref_cache.changed);
21719569cc20SQu Wenruo mark_block_processed(rc, next);
21725d4f98a2SYan Zheng break;
21735d4f98a2SYan Zheng }
21745d4f98a2SYan Zheng
21753fd0a558SYan, Zheng WARN_ON(1);
21765d4f98a2SYan Zheng root = NULL;
21775d4f98a2SYan Zheng next = walk_down_backref(edges, &index);
21785d4f98a2SYan Zheng if (!next || next->level <= node->level)
21795d4f98a2SYan Zheng break;
21805d4f98a2SYan Zheng }
2181cbdc2ebcSJosef Bacik if (!root) {
2182cbdc2ebcSJosef Bacik /*
2183cbdc2ebcSJosef Bacik * This can happen if there's fs corruption or if there's a bug
2184cbdc2ebcSJosef Bacik * in the backref lookup code.
2185cbdc2ebcSJosef Bacik */
2186cbdc2ebcSJosef Bacik ASSERT(0);
2187cbdc2ebcSJosef Bacik return ERR_PTR(-ENOENT);
2188cbdc2ebcSJosef Bacik }
21895d4f98a2SYan Zheng
21903fd0a558SYan, Zheng next = node;
21913fd0a558SYan, Zheng /* setup backref node path for btrfs_reloc_cow_block */
21923fd0a558SYan, Zheng while (1) {
21933fd0a558SYan, Zheng rc->backref_cache.path[next->level] = next;
21943fd0a558SYan, Zheng if (--index < 0)
21953fd0a558SYan, Zheng break;
21963fd0a558SYan, Zheng next = edges[index]->node[UPPER];
21973fd0a558SYan, Zheng }
21985d4f98a2SYan Zheng return root;
21995d4f98a2SYan Zheng }
22005d4f98a2SYan Zheng
22013fd0a558SYan, Zheng /*
220292a7cc42SQu Wenruo * Select a tree root for relocation.
220392a7cc42SQu Wenruo *
220492a7cc42SQu Wenruo * Return NULL if the block is not shareable. We should use do_relocation() in
220592a7cc42SQu Wenruo * this case.
220692a7cc42SQu Wenruo *
220792a7cc42SQu Wenruo * Return a tree root pointer if the block is shareable.
220892a7cc42SQu Wenruo * Return -ENOENT if the block is root of reloc tree.
22093fd0a558SYan, Zheng */
22105d4f98a2SYan Zheng static noinline_for_stack
select_one_root(struct btrfs_backref_node * node)2211a26195a5SQu Wenruo struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
22125d4f98a2SYan Zheng {
2213a26195a5SQu Wenruo struct btrfs_backref_node *next;
22143fd0a558SYan, Zheng struct btrfs_root *root;
22153fd0a558SYan, Zheng struct btrfs_root *fs_root = NULL;
2216a26195a5SQu Wenruo struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
22173fd0a558SYan, Zheng int index = 0;
22183fd0a558SYan, Zheng
22193fd0a558SYan, Zheng next = node;
22203fd0a558SYan, Zheng while (1) {
22213fd0a558SYan, Zheng cond_resched();
22223fd0a558SYan, Zheng next = walk_up_backref(next, edges, &index);
22233fd0a558SYan, Zheng root = next->root;
22248717cf44SJosef Bacik
22258717cf44SJosef Bacik /*
22268717cf44SJosef Bacik * This can occur if we have incomplete extent refs leading all
22278717cf44SJosef Bacik * the way up a particular path, in this case return -EUCLEAN.
22288717cf44SJosef Bacik */
22298717cf44SJosef Bacik if (!root)
22308717cf44SJosef Bacik return ERR_PTR(-EUCLEAN);
22313fd0a558SYan, Zheng
223292a7cc42SQu Wenruo /* No other choice for non-shareable tree */
223392a7cc42SQu Wenruo if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
22343fd0a558SYan, Zheng return root;
22353fd0a558SYan, Zheng
22363fd0a558SYan, Zheng if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
22373fd0a558SYan, Zheng fs_root = root;
22383fd0a558SYan, Zheng
22393fd0a558SYan, Zheng if (next != node)
22403fd0a558SYan, Zheng return NULL;
22413fd0a558SYan, Zheng
22423fd0a558SYan, Zheng next = walk_down_backref(edges, &index);
22433fd0a558SYan, Zheng if (!next || next->level <= node->level)
22443fd0a558SYan, Zheng break;
22453fd0a558SYan, Zheng }
22463fd0a558SYan, Zheng
22473fd0a558SYan, Zheng if (!fs_root)
22483fd0a558SYan, Zheng return ERR_PTR(-ENOENT);
22493fd0a558SYan, Zheng return fs_root;
22505d4f98a2SYan Zheng }
22515d4f98a2SYan Zheng
22525d4f98a2SYan Zheng static noinline_for_stack
calcu_metadata_size(struct reloc_control * rc,struct btrfs_backref_node * node,int reserve)22533fd0a558SYan, Zheng u64 calcu_metadata_size(struct reloc_control *rc,
2254a26195a5SQu Wenruo struct btrfs_backref_node *node, int reserve)
22555d4f98a2SYan Zheng {
22560b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2257a26195a5SQu Wenruo struct btrfs_backref_node *next = node;
2258a26195a5SQu Wenruo struct btrfs_backref_edge *edge;
2259a26195a5SQu Wenruo struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
22603fd0a558SYan, Zheng u64 num_bytes = 0;
22613fd0a558SYan, Zheng int index = 0;
22625d4f98a2SYan Zheng
22633fd0a558SYan, Zheng BUG_ON(reserve && node->processed);
22643fd0a558SYan, Zheng
22653fd0a558SYan, Zheng while (next) {
22663fd0a558SYan, Zheng cond_resched();
22675d4f98a2SYan Zheng while (1) {
22683fd0a558SYan, Zheng if (next->processed && (reserve || next != node))
22695d4f98a2SYan Zheng break;
22705d4f98a2SYan Zheng
22710b246afaSJeff Mahoney num_bytes += fs_info->nodesize;
22723fd0a558SYan, Zheng
22733fd0a558SYan, Zheng if (list_empty(&next->upper))
22743fd0a558SYan, Zheng break;
22753fd0a558SYan, Zheng
22763fd0a558SYan, Zheng edge = list_entry(next->upper.next,
2277a26195a5SQu Wenruo struct btrfs_backref_edge, list[LOWER]);
22783fd0a558SYan, Zheng edges[index++] = edge;
22793fd0a558SYan, Zheng next = edge->node[UPPER];
22805d4f98a2SYan Zheng }
22813fd0a558SYan, Zheng next = walk_down_backref(edges, &index);
22823fd0a558SYan, Zheng }
22833fd0a558SYan, Zheng return num_bytes;
22843fd0a558SYan, Zheng }
22853fd0a558SYan, Zheng
reserve_metadata_space(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_backref_node * node)22863fd0a558SYan, Zheng static int reserve_metadata_space(struct btrfs_trans_handle *trans,
22873fd0a558SYan, Zheng struct reloc_control *rc,
2288a26195a5SQu Wenruo struct btrfs_backref_node *node)
22893fd0a558SYan, Zheng {
22903fd0a558SYan, Zheng struct btrfs_root *root = rc->extent_root;
2291da17066cSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
22923fd0a558SYan, Zheng u64 num_bytes;
22933fd0a558SYan, Zheng int ret;
22940647bf56SWang Shilong u64 tmp;
22953fd0a558SYan, Zheng
22963fd0a558SYan, Zheng num_bytes = calcu_metadata_size(rc, node, 1) * 2;
22973fd0a558SYan, Zheng
22983fd0a558SYan, Zheng trans->block_rsv = rc->block_rsv;
22990647bf56SWang Shilong rc->reserved_bytes += num_bytes;
23008ca17f0fSJosef Bacik
23018ca17f0fSJosef Bacik /*
23028ca17f0fSJosef Bacik * We are under a transaction here so we can only do limited flushing.
23038ca17f0fSJosef Bacik * If we get an enospc just kick back -EAGAIN so we know to drop the
23048ca17f0fSJosef Bacik * transaction and try to refill when we can flush all the things.
23058ca17f0fSJosef Bacik */
23069270501cSJosef Bacik ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv, num_bytes,
23078ca17f0fSJosef Bacik BTRFS_RESERVE_FLUSH_LIMIT);
23083fd0a558SYan, Zheng if (ret) {
2309da17066cSJeff Mahoney tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
23100647bf56SWang Shilong while (tmp <= rc->reserved_bytes)
23110647bf56SWang Shilong tmp <<= 1;
23120647bf56SWang Shilong /*
23130647bf56SWang Shilong * only one thread can access block_rsv at this point,
23140647bf56SWang Shilong * so we don't need hold lock to protect block_rsv.
23150647bf56SWang Shilong * we expand more reservation size here to allow enough
231652042d8eSAndrea Gelmini * space for relocation and we will return earlier in
23170647bf56SWang Shilong * enospc case.
23180647bf56SWang Shilong */
2319da17066cSJeff Mahoney rc->block_rsv->size = tmp + fs_info->nodesize *
23200647bf56SWang Shilong RELOCATION_RESERVED_NODES;
23218ca17f0fSJosef Bacik return -EAGAIN;
23223fd0a558SYan, Zheng }
23233fd0a558SYan, Zheng
23243fd0a558SYan, Zheng return 0;
23253fd0a558SYan, Zheng }
23263fd0a558SYan, Zheng
23275d4f98a2SYan Zheng /*
23285d4f98a2SYan Zheng * relocate a block tree, and then update pointers in upper level
23295d4f98a2SYan Zheng * blocks that reference the block to point to the new location.
23305d4f98a2SYan Zheng *
23315d4f98a2SYan Zheng * if called by link_to_upper, the block has already been relocated.
23325d4f98a2SYan Zheng * in that case this function just updates pointers.
23335d4f98a2SYan Zheng */
do_relocation(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_backref_node * node,struct btrfs_key * key,struct btrfs_path * path,int lowest)23345d4f98a2SYan Zheng static int do_relocation(struct btrfs_trans_handle *trans,
23353fd0a558SYan, Zheng struct reloc_control *rc,
2336a26195a5SQu Wenruo struct btrfs_backref_node *node,
23375d4f98a2SYan Zheng struct btrfs_key *key,
23385d4f98a2SYan Zheng struct btrfs_path *path, int lowest)
23395d4f98a2SYan Zheng {
2340a26195a5SQu Wenruo struct btrfs_backref_node *upper;
2341a26195a5SQu Wenruo struct btrfs_backref_edge *edge;
2342a26195a5SQu Wenruo struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
23435d4f98a2SYan Zheng struct btrfs_root *root;
23445d4f98a2SYan Zheng struct extent_buffer *eb;
23455d4f98a2SYan Zheng u32 blocksize;
23465d4f98a2SYan Zheng u64 bytenr;
23475d4f98a2SYan Zheng int slot;
23488df01fddSNikolay Borisov int ret = 0;
23495d4f98a2SYan Zheng
2350ffe30dd8SJosef Bacik /*
2351ffe30dd8SJosef Bacik * If we are lowest then this is the first time we're processing this
2352ffe30dd8SJosef Bacik * block, and thus shouldn't have an eb associated with it yet.
2353ffe30dd8SJosef Bacik */
2354ffe30dd8SJosef Bacik ASSERT(!lowest || !node->eb);
23555d4f98a2SYan Zheng
23565d4f98a2SYan Zheng path->lowest_level = node->level + 1;
23573fd0a558SYan, Zheng rc->backref_cache.path[node->level] = node;
23585d4f98a2SYan Zheng list_for_each_entry(edge, &node->upper, list[LOWER]) {
235982fa113fSQu Wenruo struct btrfs_ref ref = { 0 };
2360581c1760SQu Wenruo
23615d4f98a2SYan Zheng cond_resched();
23625d4f98a2SYan Zheng
23635d4f98a2SYan Zheng upper = edge->node[UPPER];
2364dc4103f9SWang Shilong root = select_reloc_root(trans, rc, upper, edges);
2365cbdc2ebcSJosef Bacik if (IS_ERR(root)) {
2366cbdc2ebcSJosef Bacik ret = PTR_ERR(root);
2367cbdc2ebcSJosef Bacik goto next;
2368cbdc2ebcSJosef Bacik }
23695d4f98a2SYan Zheng
23703fd0a558SYan, Zheng if (upper->eb && !upper->locked) {
23713fd0a558SYan, Zheng if (!lowest) {
2372fdf8d595SAnand Jain ret = btrfs_bin_search(upper->eb, 0, key, &slot);
23738df01fddSNikolay Borisov if (ret < 0)
2374cbca7d59SFilipe Manana goto next;
23753fd0a558SYan, Zheng BUG_ON(ret);
23763fd0a558SYan, Zheng bytenr = btrfs_node_blockptr(upper->eb, slot);
23773fd0a558SYan, Zheng if (node->eb->start == bytenr)
23783fd0a558SYan, Zheng goto next;
23793fd0a558SYan, Zheng }
2380b0fe7078SQu Wenruo btrfs_backref_drop_node_buffer(upper);
23813fd0a558SYan, Zheng }
23825d4f98a2SYan Zheng
23835d4f98a2SYan Zheng if (!upper->eb) {
23845d4f98a2SYan Zheng ret = btrfs_search_slot(trans, root, key, path, 0, 1);
23853561b9dbSLiu Bo if (ret) {
23868df01fddSNikolay Borisov if (ret > 0)
23878df01fddSNikolay Borisov ret = -ENOENT;
23883561b9dbSLiu Bo
23893561b9dbSLiu Bo btrfs_release_path(path);
23905d4f98a2SYan Zheng break;
23915d4f98a2SYan Zheng }
23925d4f98a2SYan Zheng
23933fd0a558SYan, Zheng if (!upper->eb) {
23943fd0a558SYan, Zheng upper->eb = path->nodes[upper->level];
23953fd0a558SYan, Zheng path->nodes[upper->level] = NULL;
23963fd0a558SYan, Zheng } else {
23973fd0a558SYan, Zheng BUG_ON(upper->eb != path->nodes[upper->level]);
23983fd0a558SYan, Zheng }
23993fd0a558SYan, Zheng
24003fd0a558SYan, Zheng upper->locked = 1;
24013fd0a558SYan, Zheng path->locks[upper->level] = 0;
24023fd0a558SYan, Zheng
24035d4f98a2SYan Zheng slot = path->slots[upper->level];
2404b3b4aa74SDavid Sterba btrfs_release_path(path);
24055d4f98a2SYan Zheng } else {
2406fdf8d595SAnand Jain ret = btrfs_bin_search(upper->eb, 0, key, &slot);
24078df01fddSNikolay Borisov if (ret < 0)
2408cbca7d59SFilipe Manana goto next;
24095d4f98a2SYan Zheng BUG_ON(ret);
24105d4f98a2SYan Zheng }
24115d4f98a2SYan Zheng
24125d4f98a2SYan Zheng bytenr = btrfs_node_blockptr(upper->eb, slot);
24133fd0a558SYan, Zheng if (lowest) {
24144547f4d8SLiu Bo if (bytenr != node->bytenr) {
24154547f4d8SLiu Bo btrfs_err(root->fs_info,
24164547f4d8SLiu Bo "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
24174547f4d8SLiu Bo bytenr, node->bytenr, slot,
24184547f4d8SLiu Bo upper->eb->start);
24198df01fddSNikolay Borisov ret = -EIO;
24204547f4d8SLiu Bo goto next;
24214547f4d8SLiu Bo }
24225d4f98a2SYan Zheng } else {
24233fd0a558SYan, Zheng if (node->eb->start == bytenr)
24243fd0a558SYan, Zheng goto next;
24255d4f98a2SYan Zheng }
24265d4f98a2SYan Zheng
2427da17066cSJeff Mahoney blocksize = root->fs_info->nodesize;
2428c9752536SJosef Bacik eb = btrfs_read_node_slot(upper->eb, slot);
242964c043deSLiu Bo if (IS_ERR(eb)) {
24308df01fddSNikolay Borisov ret = PTR_ERR(eb);
243164c043deSLiu Bo goto next;
243297d9a8a4STsutomu Itoh }
24335d4f98a2SYan Zheng btrfs_tree_lock(eb);
24345d4f98a2SYan Zheng
24355d4f98a2SYan Zheng if (!node->eb) {
24365d4f98a2SYan Zheng ret = btrfs_cow_block(trans, root, eb, upper->eb,
24379631e4ccSJosef Bacik slot, &eb, BTRFS_NESTING_COW);
24383fd0a558SYan, Zheng btrfs_tree_unlock(eb);
24393fd0a558SYan, Zheng free_extent_buffer(eb);
24408df01fddSNikolay Borisov if (ret < 0)
24413fd0a558SYan, Zheng goto next;
2442ffe30dd8SJosef Bacik /*
2443ffe30dd8SJosef Bacik * We've just COWed this block, it should have updated
2444ffe30dd8SJosef Bacik * the correct backref node entry.
2445ffe30dd8SJosef Bacik */
2446ffe30dd8SJosef Bacik ASSERT(node->eb == eb);
24475d4f98a2SYan Zheng } else {
24485d4f98a2SYan Zheng btrfs_set_node_blockptr(upper->eb, slot,
24495d4f98a2SYan Zheng node->eb->start);
24505d4f98a2SYan Zheng btrfs_set_node_ptr_generation(upper->eb, slot,
24515d4f98a2SYan Zheng trans->transid);
2452d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, upper->eb);
24535d4f98a2SYan Zheng
245482fa113fSQu Wenruo btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
24555d4f98a2SYan Zheng node->eb->start, blocksize,
245682fa113fSQu Wenruo upper->eb->start);
245782fa113fSQu Wenruo btrfs_init_tree_ref(&ref, node->level,
2458f42c5da6SNikolay Borisov btrfs_header_owner(upper->eb),
2459f42c5da6SNikolay Borisov root->root_key.objectid, false);
246082fa113fSQu Wenruo ret = btrfs_inc_extent_ref(trans, &ref);
2461eb6b7fb4SJosef Bacik if (!ret)
2462eb6b7fb4SJosef Bacik ret = btrfs_drop_subtree(trans, root, eb,
2463eb6b7fb4SJosef Bacik upper->eb);
2464eb6b7fb4SJosef Bacik if (ret)
2465eb6b7fb4SJosef Bacik btrfs_abort_transaction(trans, ret);
24665d4f98a2SYan Zheng }
24673fd0a558SYan, Zheng next:
24683fd0a558SYan, Zheng if (!upper->pending)
2469b0fe7078SQu Wenruo btrfs_backref_drop_node_buffer(upper);
24703fd0a558SYan, Zheng else
2471b0fe7078SQu Wenruo btrfs_backref_unlock_node_buffer(upper);
24728df01fddSNikolay Borisov if (ret)
24733fd0a558SYan, Zheng break;
24745d4f98a2SYan Zheng }
24753fd0a558SYan, Zheng
24768df01fddSNikolay Borisov if (!ret && node->pending) {
2477b0fe7078SQu Wenruo btrfs_backref_drop_node_buffer(node);
24783fd0a558SYan, Zheng list_move_tail(&node->list, &rc->backref_cache.changed);
24793fd0a558SYan, Zheng node->pending = 0;
24805d4f98a2SYan Zheng }
24813fd0a558SYan, Zheng
24825d4f98a2SYan Zheng path->lowest_level = 0;
2483ffe30dd8SJosef Bacik
2484ffe30dd8SJosef Bacik /*
2485ffe30dd8SJosef Bacik * We should have allocated all of our space in the block rsv and thus
2486ffe30dd8SJosef Bacik * shouldn't ENOSPC.
2487ffe30dd8SJosef Bacik */
2488ffe30dd8SJosef Bacik ASSERT(ret != -ENOSPC);
24898df01fddSNikolay Borisov return ret;
24905d4f98a2SYan Zheng }
24915d4f98a2SYan Zheng
link_to_upper(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_backref_node * node,struct btrfs_path * path)24925d4f98a2SYan Zheng static int link_to_upper(struct btrfs_trans_handle *trans,
24933fd0a558SYan, Zheng struct reloc_control *rc,
2494a26195a5SQu Wenruo struct btrfs_backref_node *node,
24955d4f98a2SYan Zheng struct btrfs_path *path)
24965d4f98a2SYan Zheng {
24975d4f98a2SYan Zheng struct btrfs_key key;
24985d4f98a2SYan Zheng
24995d4f98a2SYan Zheng btrfs_node_key_to_cpu(node->eb, &key, 0);
25003fd0a558SYan, Zheng return do_relocation(trans, rc, node, &key, path, 0);
25015d4f98a2SYan Zheng }
25025d4f98a2SYan Zheng
finish_pending_nodes(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_path * path,int err)25035d4f98a2SYan Zheng static int finish_pending_nodes(struct btrfs_trans_handle *trans,
25043fd0a558SYan, Zheng struct reloc_control *rc,
25053fd0a558SYan, Zheng struct btrfs_path *path, int err)
25065d4f98a2SYan Zheng {
25073fd0a558SYan, Zheng LIST_HEAD(list);
2508a26195a5SQu Wenruo struct btrfs_backref_cache *cache = &rc->backref_cache;
2509a26195a5SQu Wenruo struct btrfs_backref_node *node;
25105d4f98a2SYan Zheng int level;
25115d4f98a2SYan Zheng int ret;
25125d4f98a2SYan Zheng
25135d4f98a2SYan Zheng for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
25145d4f98a2SYan Zheng while (!list_empty(&cache->pending[level])) {
25155d4f98a2SYan Zheng node = list_entry(cache->pending[level].next,
2516a26195a5SQu Wenruo struct btrfs_backref_node, list);
25173fd0a558SYan, Zheng list_move_tail(&node->list, &list);
25183fd0a558SYan, Zheng BUG_ON(!node->pending);
25195d4f98a2SYan Zheng
25203fd0a558SYan, Zheng if (!err) {
25213fd0a558SYan, Zheng ret = link_to_upper(trans, rc, node, path);
25225d4f98a2SYan Zheng if (ret < 0)
25235d4f98a2SYan Zheng err = ret;
25245d4f98a2SYan Zheng }
25255d4f98a2SYan Zheng }
25263fd0a558SYan, Zheng list_splice_init(&list, &cache->pending[level]);
25273fd0a558SYan, Zheng }
25285d4f98a2SYan Zheng return err;
25295d4f98a2SYan Zheng }
25305d4f98a2SYan Zheng
25315d4f98a2SYan Zheng /*
25325d4f98a2SYan Zheng * mark a block and all blocks directly/indirectly reference the block
25335d4f98a2SYan Zheng * as processed.
25345d4f98a2SYan Zheng */
update_processed_blocks(struct reloc_control * rc,struct btrfs_backref_node * node)25355d4f98a2SYan Zheng static void update_processed_blocks(struct reloc_control *rc,
2536a26195a5SQu Wenruo struct btrfs_backref_node *node)
25375d4f98a2SYan Zheng {
2538a26195a5SQu Wenruo struct btrfs_backref_node *next = node;
2539a26195a5SQu Wenruo struct btrfs_backref_edge *edge;
2540a26195a5SQu Wenruo struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
25415d4f98a2SYan Zheng int index = 0;
25425d4f98a2SYan Zheng
25435d4f98a2SYan Zheng while (next) {
25445d4f98a2SYan Zheng cond_resched();
25455d4f98a2SYan Zheng while (1) {
25465d4f98a2SYan Zheng if (next->processed)
25475d4f98a2SYan Zheng break;
25485d4f98a2SYan Zheng
25499569cc20SQu Wenruo mark_block_processed(rc, next);
25505d4f98a2SYan Zheng
25515d4f98a2SYan Zheng if (list_empty(&next->upper))
25525d4f98a2SYan Zheng break;
25535d4f98a2SYan Zheng
25545d4f98a2SYan Zheng edge = list_entry(next->upper.next,
2555a26195a5SQu Wenruo struct btrfs_backref_edge, list[LOWER]);
25565d4f98a2SYan Zheng edges[index++] = edge;
25575d4f98a2SYan Zheng next = edge->node[UPPER];
25585d4f98a2SYan Zheng }
25595d4f98a2SYan Zheng next = walk_down_backref(edges, &index);
25605d4f98a2SYan Zheng }
25615d4f98a2SYan Zheng }
25625d4f98a2SYan Zheng
tree_block_processed(u64 bytenr,struct reloc_control * rc)25637476dfdaSDavid Sterba static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
25645d4f98a2SYan Zheng {
2565da17066cSJeff Mahoney u32 blocksize = rc->extent_root->fs_info->nodesize;
25667476dfdaSDavid Sterba
25675d4f98a2SYan Zheng if (test_range_bit(&rc->processed_blocks, bytenr,
25689655d298SChris Mason bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
25695d4f98a2SYan Zheng return 1;
25705d4f98a2SYan Zheng return 0;
25715d4f98a2SYan Zheng }
25725d4f98a2SYan Zheng
get_tree_block_key(struct btrfs_fs_info * fs_info,struct tree_block * block)25732ff7e61eSJeff Mahoney static int get_tree_block_key(struct btrfs_fs_info *fs_info,
25745d4f98a2SYan Zheng struct tree_block *block)
25755d4f98a2SYan Zheng {
2576789d6a3aSQu Wenruo struct btrfs_tree_parent_check check = {
2577789d6a3aSQu Wenruo .level = block->level,
2578789d6a3aSQu Wenruo .owner_root = block->owner,
2579789d6a3aSQu Wenruo .transid = block->key.offset
2580789d6a3aSQu Wenruo };
25815d4f98a2SYan Zheng struct extent_buffer *eb;
25825d4f98a2SYan Zheng
2583789d6a3aSQu Wenruo eb = read_tree_block(fs_info, block->bytenr, &check);
25844eb150d6SQu Wenruo if (IS_ERR(eb))
258564c043deSLiu Bo return PTR_ERR(eb);
25864eb150d6SQu Wenruo if (!extent_buffer_uptodate(eb)) {
2587416bc658SJosef Bacik free_extent_buffer(eb);
2588416bc658SJosef Bacik return -EIO;
2589416bc658SJosef Bacik }
25905d4f98a2SYan Zheng if (block->level == 0)
25915d4f98a2SYan Zheng btrfs_item_key_to_cpu(eb, &block->key, 0);
25925d4f98a2SYan Zheng else
25935d4f98a2SYan Zheng btrfs_node_key_to_cpu(eb, &block->key, 0);
25945d4f98a2SYan Zheng free_extent_buffer(eb);
25955d4f98a2SYan Zheng block->key_ready = 1;
25965d4f98a2SYan Zheng return 0;
25975d4f98a2SYan Zheng }
25985d4f98a2SYan Zheng
25995d4f98a2SYan Zheng /*
26005d4f98a2SYan Zheng * helper function to relocate a tree block
26015d4f98a2SYan Zheng */
relocate_tree_block(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct btrfs_backref_node * node,struct btrfs_key * key,struct btrfs_path * path)26025d4f98a2SYan Zheng static int relocate_tree_block(struct btrfs_trans_handle *trans,
26035d4f98a2SYan Zheng struct reloc_control *rc,
2604a26195a5SQu Wenruo struct btrfs_backref_node *node,
26055d4f98a2SYan Zheng struct btrfs_key *key,
26065d4f98a2SYan Zheng struct btrfs_path *path)
26075d4f98a2SYan Zheng {
26085d4f98a2SYan Zheng struct btrfs_root *root;
26093fd0a558SYan, Zheng int ret = 0;
26105d4f98a2SYan Zheng
26113fd0a558SYan, Zheng if (!node)
26125d4f98a2SYan Zheng return 0;
26133fd0a558SYan, Zheng
26145f6b2e5cSJosef Bacik /*
26155f6b2e5cSJosef Bacik * If we fail here we want to drop our backref_node because we are going
26165f6b2e5cSJosef Bacik * to start over and regenerate the tree for it.
26175f6b2e5cSJosef Bacik */
26185f6b2e5cSJosef Bacik ret = reserve_metadata_space(trans, rc, node);
26195f6b2e5cSJosef Bacik if (ret)
26205f6b2e5cSJosef Bacik goto out;
26215f6b2e5cSJosef Bacik
26223fd0a558SYan, Zheng BUG_ON(node->processed);
2623147d256eSZhaolei root = select_one_root(node);
26248717cf44SJosef Bacik if (IS_ERR(root)) {
26258717cf44SJosef Bacik ret = PTR_ERR(root);
26268717cf44SJosef Bacik
26278717cf44SJosef Bacik /* See explanation in select_one_root for the -EUCLEAN case. */
26288717cf44SJosef Bacik ASSERT(ret == -ENOENT);
26298717cf44SJosef Bacik if (ret == -ENOENT) {
26308717cf44SJosef Bacik ret = 0;
26313fd0a558SYan, Zheng update_processed_blocks(rc, node);
26328717cf44SJosef Bacik }
26333fd0a558SYan, Zheng goto out;
26345d4f98a2SYan Zheng }
26355d4f98a2SYan Zheng
26363fd0a558SYan, Zheng if (root) {
263792a7cc42SQu Wenruo if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
26381c7bfa15SJosef Bacik /*
26391c7bfa15SJosef Bacik * This block was the root block of a root, and this is
26401c7bfa15SJosef Bacik * the first time we're processing the block and thus it
26411c7bfa15SJosef Bacik * should not have had the ->new_bytenr modified and
26421c7bfa15SJosef Bacik * should have not been included on the changed list.
26431c7bfa15SJosef Bacik *
26441c7bfa15SJosef Bacik * However in the case of corruption we could have
26451c7bfa15SJosef Bacik * multiple refs pointing to the same block improperly,
26461c7bfa15SJosef Bacik * and thus we would trip over these checks. ASSERT()
26471c7bfa15SJosef Bacik * for the developer case, because it could indicate a
26481c7bfa15SJosef Bacik * bug in the backref code, however error out for a
26491c7bfa15SJosef Bacik * normal user in the case of corruption.
26501c7bfa15SJosef Bacik */
26511c7bfa15SJosef Bacik ASSERT(node->new_bytenr == 0);
26521c7bfa15SJosef Bacik ASSERT(list_empty(&node->list));
26531c7bfa15SJosef Bacik if (node->new_bytenr || !list_empty(&node->list)) {
26541c7bfa15SJosef Bacik btrfs_err(root->fs_info,
26551c7bfa15SJosef Bacik "bytenr %llu has improper references to it",
26561c7bfa15SJosef Bacik node->bytenr);
26571c7bfa15SJosef Bacik ret = -EUCLEAN;
26581c7bfa15SJosef Bacik goto out;
26591c7bfa15SJosef Bacik }
2660d18c7bd9SJosef Bacik ret = btrfs_record_root_in_trans(trans, root);
2661d18c7bd9SJosef Bacik if (ret)
2662d18c7bd9SJosef Bacik goto out;
266339200e59SJosef Bacik /*
266439200e59SJosef Bacik * Another thread could have failed, need to check if we
266539200e59SJosef Bacik * have reloc_root actually set.
266639200e59SJosef Bacik */
266739200e59SJosef Bacik if (!root->reloc_root) {
266839200e59SJosef Bacik ret = -ENOENT;
266939200e59SJosef Bacik goto out;
267039200e59SJosef Bacik }
26713fd0a558SYan, Zheng root = root->reloc_root;
26723fd0a558SYan, Zheng node->new_bytenr = root->node->start;
267300246528SJosef Bacik btrfs_put_root(node->root);
267400246528SJosef Bacik node->root = btrfs_grab_root(root);
26750b530bc5SJosef Bacik ASSERT(node->root);
26763fd0a558SYan, Zheng list_add_tail(&node->list, &rc->backref_cache.changed);
26773fd0a558SYan, Zheng } else {
26785d4f98a2SYan Zheng path->lowest_level = node->level;
26792bb2e00eSFilipe Manana if (root == root->fs_info->chunk_root)
26802bb2e00eSFilipe Manana btrfs_reserve_chunk_metadata(trans, false);
26815d4f98a2SYan Zheng ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2682b3b4aa74SDavid Sterba btrfs_release_path(path);
26832bb2e00eSFilipe Manana if (root == root->fs_info->chunk_root)
26842bb2e00eSFilipe Manana btrfs_trans_release_chunk_metadata(trans);
26853fd0a558SYan, Zheng if (ret > 0)
26865d4f98a2SYan Zheng ret = 0;
26873fd0a558SYan, Zheng }
26883fd0a558SYan, Zheng if (!ret)
26893fd0a558SYan, Zheng update_processed_blocks(rc, node);
26903fd0a558SYan, Zheng } else {
26913fd0a558SYan, Zheng ret = do_relocation(trans, rc, node, key, path, 1);
26923fd0a558SYan, Zheng }
26935d4f98a2SYan Zheng out:
26940647bf56SWang Shilong if (ret || node->level == 0 || node->cowonly)
2695023acb07SQu Wenruo btrfs_backref_cleanup_node(&rc->backref_cache, node);
26965d4f98a2SYan Zheng return ret;
26975d4f98a2SYan Zheng }
26985d4f98a2SYan Zheng
26995d4f98a2SYan Zheng /*
27005d4f98a2SYan Zheng * relocate a list of blocks
27015d4f98a2SYan Zheng */
27025d4f98a2SYan Zheng static noinline_for_stack
relocate_tree_blocks(struct btrfs_trans_handle * trans,struct reloc_control * rc,struct rb_root * blocks)27035d4f98a2SYan Zheng int relocate_tree_blocks(struct btrfs_trans_handle *trans,
27045d4f98a2SYan Zheng struct reloc_control *rc, struct rb_root *blocks)
27055d4f98a2SYan Zheng {
27062ff7e61eSJeff Mahoney struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2707a26195a5SQu Wenruo struct btrfs_backref_node *node;
27085d4f98a2SYan Zheng struct btrfs_path *path;
27095d4f98a2SYan Zheng struct tree_block *block;
271098ff7b94SQu Wenruo struct tree_block *next;
27115d4f98a2SYan Zheng int ret;
27125d4f98a2SYan Zheng int err = 0;
27135d4f98a2SYan Zheng
27145d4f98a2SYan Zheng path = btrfs_alloc_path();
2715e1a12670SLiu Bo if (!path) {
2716e1a12670SLiu Bo err = -ENOMEM;
271734c2b290SDavid Sterba goto out_free_blocks;
2718e1a12670SLiu Bo }
27195d4f98a2SYan Zheng
272098ff7b94SQu Wenruo /* Kick in readahead for tree blocks with missing keys */
272198ff7b94SQu Wenruo rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
27225d4f98a2SYan Zheng if (!block->key_ready)
2723f7ba2d37SJosef Bacik btrfs_readahead_tree_block(fs_info, block->bytenr,
2724f7ba2d37SJosef Bacik block->owner, 0,
27253fbaf258SJosef Bacik block->level);
27265d4f98a2SYan Zheng }
27275d4f98a2SYan Zheng
272898ff7b94SQu Wenruo /* Get first keys */
272998ff7b94SQu Wenruo rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
273034c2b290SDavid Sterba if (!block->key_ready) {
27312ff7e61eSJeff Mahoney err = get_tree_block_key(fs_info, block);
273234c2b290SDavid Sterba if (err)
273334c2b290SDavid Sterba goto out_free_path;
273434c2b290SDavid Sterba }
27355d4f98a2SYan Zheng }
27365d4f98a2SYan Zheng
273798ff7b94SQu Wenruo /* Do tree relocation */
273898ff7b94SQu Wenruo rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
2739eb96e221SFilipe Manana node = build_backref_tree(trans, rc, &block->key,
27405d4f98a2SYan Zheng block->level, block->bytenr);
27415d4f98a2SYan Zheng if (IS_ERR(node)) {
27425d4f98a2SYan Zheng err = PTR_ERR(node);
27435d4f98a2SYan Zheng goto out;
27445d4f98a2SYan Zheng }
27455d4f98a2SYan Zheng
27465d4f98a2SYan Zheng ret = relocate_tree_block(trans, rc, node, &block->key,
27475d4f98a2SYan Zheng path);
27485d4f98a2SYan Zheng if (ret < 0) {
27495d4f98a2SYan Zheng err = ret;
275050dbbb71SJosef Bacik break;
27515d4f98a2SYan Zheng }
27525d4f98a2SYan Zheng }
27535d4f98a2SYan Zheng out:
27543fd0a558SYan, Zheng err = finish_pending_nodes(trans, rc, path, err);
27555d4f98a2SYan Zheng
275634c2b290SDavid Sterba out_free_path:
27575d4f98a2SYan Zheng btrfs_free_path(path);
275834c2b290SDavid Sterba out_free_blocks:
2759e1a12670SLiu Bo free_block_list(blocks);
27605d4f98a2SYan Zheng return err;
27615d4f98a2SYan Zheng }
27625d4f98a2SYan Zheng
prealloc_file_extent_cluster(struct btrfs_inode * inode,const struct file_extent_cluster * cluster)2763056d9becSNikolay Borisov static noinline_for_stack int prealloc_file_extent_cluster(
2764056d9becSNikolay Borisov struct btrfs_inode *inode,
27654dc6ea8bSDavid Sterba const struct file_extent_cluster *cluster)
2766efa56464SYan, Zheng {
2767efa56464SYan, Zheng u64 alloc_hint = 0;
2768efa56464SYan, Zheng u64 start;
2769efa56464SYan, Zheng u64 end;
2770056d9becSNikolay Borisov u64 offset = inode->index_cnt;
2771efa56464SYan, Zheng u64 num_bytes;
27724e9d0d01SNikolay Borisov int nr;
2773efa56464SYan, Zheng int ret = 0;
27749d9ea1e6SQu Wenruo u64 i_size = i_size_read(&inode->vfs_inode);
2775dcb40c19SWang Xiaoguang u64 prealloc_start = cluster->start - offset;
2776dcb40c19SWang Xiaoguang u64 prealloc_end = cluster->end - offset;
2777214e61d0SNikolay Borisov u64 cur_offset = prealloc_start;
2778efa56464SYan, Zheng
27799d9ea1e6SQu Wenruo /*
27809d9ea1e6SQu Wenruo * For subpage case, previous i_size may not be aligned to PAGE_SIZE.
27819d9ea1e6SQu Wenruo * This means the range [i_size, PAGE_END + 1) is filled with zeros by
27829d9ea1e6SQu Wenruo * btrfs_do_readpage() call of previously relocated file cluster.
27839d9ea1e6SQu Wenruo *
27849d9ea1e6SQu Wenruo * If the current cluster starts in the above range, btrfs_do_readpage()
27859d9ea1e6SQu Wenruo * will skip the read, and relocate_one_page() will later writeback
27869d9ea1e6SQu Wenruo * the padding zeros as new data, causing data corruption.
27879d9ea1e6SQu Wenruo *
27889d9ea1e6SQu Wenruo * Here we have to manually invalidate the range (i_size, PAGE_END + 1).
27899d9ea1e6SQu Wenruo */
2790ce394a7fSYushan Zhou if (!PAGE_ALIGNED(i_size)) {
27919d9ea1e6SQu Wenruo struct address_space *mapping = inode->vfs_inode.i_mapping;
27929d9ea1e6SQu Wenruo struct btrfs_fs_info *fs_info = inode->root->fs_info;
27939d9ea1e6SQu Wenruo const u32 sectorsize = fs_info->sectorsize;
27949d9ea1e6SQu Wenruo struct page *page;
27959d9ea1e6SQu Wenruo
27969d9ea1e6SQu Wenruo ASSERT(sectorsize < PAGE_SIZE);
27979d9ea1e6SQu Wenruo ASSERT(IS_ALIGNED(i_size, sectorsize));
27989d9ea1e6SQu Wenruo
27999d9ea1e6SQu Wenruo /*
28009d9ea1e6SQu Wenruo * Subpage can't handle page with DIRTY but without UPTODATE
28019d9ea1e6SQu Wenruo * bit as it can lead to the following deadlock:
28029d9ea1e6SQu Wenruo *
2803fb12489bSMatthew Wilcox (Oracle) * btrfs_read_folio()
28049d9ea1e6SQu Wenruo * | Page already *locked*
28059d9ea1e6SQu Wenruo * |- btrfs_lock_and_flush_ordered_range()
28069d9ea1e6SQu Wenruo * |- btrfs_start_ordered_extent()
28079d9ea1e6SQu Wenruo * |- extent_write_cache_pages()
28089d9ea1e6SQu Wenruo * |- lock_page()
28099d9ea1e6SQu Wenruo * We try to lock the page we already hold.
28109d9ea1e6SQu Wenruo *
28119d9ea1e6SQu Wenruo * Here we just writeback the whole data reloc inode, so that
28129d9ea1e6SQu Wenruo * we will be ensured to have no dirty range in the page, and
28139d9ea1e6SQu Wenruo * are safe to clear the uptodate bits.
28149d9ea1e6SQu Wenruo *
28159d9ea1e6SQu Wenruo * This shouldn't cause too much overhead, as we need to write
28169d9ea1e6SQu Wenruo * the data back anyway.
28179d9ea1e6SQu Wenruo */
28189d9ea1e6SQu Wenruo ret = filemap_write_and_wait(mapping);
28199d9ea1e6SQu Wenruo if (ret < 0)
28209d9ea1e6SQu Wenruo return ret;
28219d9ea1e6SQu Wenruo
28229d9ea1e6SQu Wenruo clear_extent_bits(&inode->io_tree, i_size,
28239d9ea1e6SQu Wenruo round_up(i_size, PAGE_SIZE) - 1,
28249d9ea1e6SQu Wenruo EXTENT_UPTODATE);
28259d9ea1e6SQu Wenruo page = find_lock_page(mapping, i_size >> PAGE_SHIFT);
28269d9ea1e6SQu Wenruo /*
28279d9ea1e6SQu Wenruo * If page is freed we don't need to do anything then, as we
28289d9ea1e6SQu Wenruo * will re-read the whole page anyway.
28299d9ea1e6SQu Wenruo */
28309d9ea1e6SQu Wenruo if (page) {
28319d9ea1e6SQu Wenruo btrfs_subpage_clear_uptodate(fs_info, page, i_size,
28329d9ea1e6SQu Wenruo round_up(i_size, PAGE_SIZE) - i_size);
28339d9ea1e6SQu Wenruo unlock_page(page);
28349d9ea1e6SQu Wenruo put_page(page);
28359d9ea1e6SQu Wenruo }
28369d9ea1e6SQu Wenruo }
28379d9ea1e6SQu Wenruo
2838efa56464SYan, Zheng BUG_ON(cluster->start != cluster->boundary[0]);
2839056d9becSNikolay Borisov ret = btrfs_alloc_data_chunk_ondemand(inode,
2840dcb40c19SWang Xiaoguang prealloc_end + 1 - prealloc_start);
2841efa56464SYan, Zheng if (ret)
2842214e61d0SNikolay Borisov return ret;
2843efa56464SYan, Zheng
284429b6352bSDavid Sterba btrfs_inode_lock(inode, 0);
28454e9d0d01SNikolay Borisov for (nr = 0; nr < cluster->nr; nr++) {
28469c5c9604SJosef Bacik struct extent_state *cached_state = NULL;
28479c5c9604SJosef Bacik
2848efa56464SYan, Zheng start = cluster->boundary[nr] - offset;
2849efa56464SYan, Zheng if (nr + 1 < cluster->nr)
2850efa56464SYan, Zheng end = cluster->boundary[nr + 1] - 1 - offset;
2851efa56464SYan, Zheng else
2852efa56464SYan, Zheng end = cluster->end - offset;
2853efa56464SYan, Zheng
28549c5c9604SJosef Bacik lock_extent(&inode->io_tree, start, end, &cached_state);
2855efa56464SYan, Zheng num_bytes = end + 1 - start;
2856056d9becSNikolay Borisov ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
2857efa56464SYan, Zheng num_bytes, num_bytes,
2858efa56464SYan, Zheng end + 1, &alloc_hint);
285918513091SWang Xiaoguang cur_offset = end + 1;
28609c5c9604SJosef Bacik unlock_extent(&inode->io_tree, start, end, &cached_state);
2861efa56464SYan, Zheng if (ret)
2862efa56464SYan, Zheng break;
2863efa56464SYan, Zheng }
2864e5d4d75bSDavid Sterba btrfs_inode_unlock(inode, 0);
2865214e61d0SNikolay Borisov
286618513091SWang Xiaoguang if (cur_offset < prealloc_end)
2867056d9becSNikolay Borisov btrfs_free_reserved_data_space_noquota(inode->root->fs_info,
2868a89ef455SFilipe Manana prealloc_end + 1 - cur_offset);
2869efa56464SYan, Zheng return ret;
2870efa56464SYan, Zheng }
2871efa56464SYan, Zheng
setup_relocation_extent_mapping(struct inode * inode,u64 start,u64 end,u64 block_start)28724b01c44fSJohannes Thumshirn static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
28734b01c44fSJohannes Thumshirn u64 start, u64 end, u64 block_start)
28745d4f98a2SYan Zheng {
28755d4f98a2SYan Zheng struct extent_map *em;
28769c5c9604SJosef Bacik struct extent_state *cached_state = NULL;
28770257bb82SYan, Zheng int ret = 0;
28785d4f98a2SYan Zheng
2879172ddd60SDavid Sterba em = alloc_extent_map();
28800257bb82SYan, Zheng if (!em)
28810257bb82SYan, Zheng return -ENOMEM;
28820257bb82SYan, Zheng
28835d4f98a2SYan Zheng em->start = start;
28840257bb82SYan, Zheng em->len = end + 1 - start;
28850257bb82SYan, Zheng em->block_len = em->len;
28860257bb82SYan, Zheng em->block_start = block_start;
28875d4f98a2SYan Zheng set_bit(EXTENT_FLAG_PINNED, &em->flags);
28885d4f98a2SYan Zheng
28899c5c9604SJosef Bacik lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
2890a1ba4c08SFilipe Manana ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, false);
28919c5c9604SJosef Bacik unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
2892a1ba4c08SFilipe Manana free_extent_map(em);
2893a1ba4c08SFilipe Manana
28940257bb82SYan, Zheng return ret;
28950257bb82SYan, Zheng }
28965d4f98a2SYan Zheng
2897726a3421SQu Wenruo /*
2898907d2710SDavid Sterba * Allow error injection to test balance/relocation cancellation
2899726a3421SQu Wenruo */
btrfs_should_cancel_balance(const struct btrfs_fs_info * fs_info)29004dc6ea8bSDavid Sterba noinline int btrfs_should_cancel_balance(const struct btrfs_fs_info *fs_info)
2901726a3421SQu Wenruo {
29025cb502f4SQu Wenruo return atomic_read(&fs_info->balance_cancel_req) ||
2903907d2710SDavid Sterba atomic_read(&fs_info->reloc_cancel_req) ||
29045cb502f4SQu Wenruo fatal_signal_pending(current);
2905726a3421SQu Wenruo }
2906726a3421SQu Wenruo ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
2907726a3421SQu Wenruo
get_cluster_boundary_end(const struct file_extent_cluster * cluster,int cluster_nr)29084dc6ea8bSDavid Sterba static u64 get_cluster_boundary_end(const struct file_extent_cluster *cluster,
2909c2832898SQu Wenruo int cluster_nr)
2910c2832898SQu Wenruo {
2911c2832898SQu Wenruo /* Last extent, use cluster end directly */
2912c2832898SQu Wenruo if (cluster_nr >= cluster->nr - 1)
2913c2832898SQu Wenruo return cluster->end;
2914c2832898SQu Wenruo
2915c2832898SQu Wenruo /* Use next boundary start*/
2916c2832898SQu Wenruo return cluster->boundary[cluster_nr + 1] - 1;
2917c2832898SQu Wenruo }
2918c2832898SQu Wenruo
relocate_one_page(struct inode * inode,struct file_ra_state * ra,const struct file_extent_cluster * cluster,int * cluster_nr,unsigned long page_index)2919f47960f4SQu Wenruo static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
29204dc6ea8bSDavid Sterba const struct file_extent_cluster *cluster,
2921f47960f4SQu Wenruo int *cluster_nr, unsigned long page_index)
2922f47960f4SQu Wenruo {
2923f47960f4SQu Wenruo struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2924f47960f4SQu Wenruo u64 offset = BTRFS_I(inode)->index_cnt;
2925f47960f4SQu Wenruo const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
2926f47960f4SQu Wenruo gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
2927f47960f4SQu Wenruo struct page *page;
2928f47960f4SQu Wenruo u64 page_start;
2929f47960f4SQu Wenruo u64 page_end;
2930c2832898SQu Wenruo u64 cur;
2931f47960f4SQu Wenruo int ret;
2932f47960f4SQu Wenruo
2933f47960f4SQu Wenruo ASSERT(page_index <= last_index);
2934f47960f4SQu Wenruo page = find_lock_page(inode->i_mapping, page_index);
2935f47960f4SQu Wenruo if (!page) {
2936f47960f4SQu Wenruo page_cache_sync_readahead(inode->i_mapping, ra, NULL,
2937f47960f4SQu Wenruo page_index, last_index + 1 - page_index);
2938f47960f4SQu Wenruo page = find_or_create_page(inode->i_mapping, page_index, mask);
2939c2832898SQu Wenruo if (!page)
2940c2832898SQu Wenruo return -ENOMEM;
2941f47960f4SQu Wenruo }
2942f47960f4SQu Wenruo
2943f47960f4SQu Wenruo if (PageReadahead(page))
29442ebdd1dfSMatthew Wilcox (Oracle) page_cache_async_readahead(inode->i_mapping, ra, NULL,
29452ebdd1dfSMatthew Wilcox (Oracle) page_folio(page), page_index,
29462ebdd1dfSMatthew Wilcox (Oracle) last_index + 1 - page_index);
2947f47960f4SQu Wenruo
2948f47960f4SQu Wenruo if (!PageUptodate(page)) {
2949fb12489bSMatthew Wilcox (Oracle) btrfs_read_folio(NULL, page_folio(page));
2950f47960f4SQu Wenruo lock_page(page);
2951f47960f4SQu Wenruo if (!PageUptodate(page)) {
2952f47960f4SQu Wenruo ret = -EIO;
2953f47960f4SQu Wenruo goto release_page;
2954f47960f4SQu Wenruo }
2955f47960f4SQu Wenruo }
2956f47960f4SQu Wenruo
2957e7f1326cSJosef Bacik /*
2958e7f1326cSJosef Bacik * We could have lost page private when we dropped the lock to read the
2959e7f1326cSJosef Bacik * page above, make sure we set_page_extent_mapped here so we have any
2960e7f1326cSJosef Bacik * of the subpage blocksize stuff we need in place.
2961e7f1326cSJosef Bacik */
2962e7f1326cSJosef Bacik ret = set_page_extent_mapped(page);
2963e7f1326cSJosef Bacik if (ret < 0)
2964e7f1326cSJosef Bacik goto release_page;
2965e7f1326cSJosef Bacik
2966f47960f4SQu Wenruo page_start = page_offset(page);
2967f47960f4SQu Wenruo page_end = page_start + PAGE_SIZE - 1;
2968f47960f4SQu Wenruo
2969c2832898SQu Wenruo /*
2970c2832898SQu Wenruo * Start from the cluster, as for subpage case, the cluster can start
2971c2832898SQu Wenruo * inside the page.
2972c2832898SQu Wenruo */
2973c2832898SQu Wenruo cur = max(page_start, cluster->boundary[*cluster_nr] - offset);
2974c2832898SQu Wenruo while (cur <= page_end) {
29759c5c9604SJosef Bacik struct extent_state *cached_state = NULL;
2976c2832898SQu Wenruo u64 extent_start = cluster->boundary[*cluster_nr] - offset;
2977c2832898SQu Wenruo u64 extent_end = get_cluster_boundary_end(cluster,
2978c2832898SQu Wenruo *cluster_nr) - offset;
2979c2832898SQu Wenruo u64 clamped_start = max(page_start, extent_start);
2980c2832898SQu Wenruo u64 clamped_end = min(page_end, extent_end);
2981c2832898SQu Wenruo u32 clamped_len = clamped_end + 1 - clamped_start;
2982f47960f4SQu Wenruo
2983c2832898SQu Wenruo /* Reserve metadata for this range */
2984c2832898SQu Wenruo ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
2985d4135134SFilipe Manana clamped_len, clamped_len,
2986d4135134SFilipe Manana false);
2987c2832898SQu Wenruo if (ret)
2988f47960f4SQu Wenruo goto release_page;
2989f47960f4SQu Wenruo
2990c2832898SQu Wenruo /* Mark the range delalloc and dirty for later writeback */
29919c5c9604SJosef Bacik lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
29929c5c9604SJosef Bacik &cached_state);
2993c2832898SQu Wenruo ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
29949c5c9604SJosef Bacik clamped_end, 0, &cached_state);
2995c2832898SQu Wenruo if (ret) {
29969c5c9604SJosef Bacik clear_extent_bit(&BTRFS_I(inode)->io_tree,
2997c2832898SQu Wenruo clamped_start, clamped_end,
29989c5c9604SJosef Bacik EXTENT_LOCKED | EXTENT_BOUNDARY,
29999c5c9604SJosef Bacik &cached_state);
3000c2832898SQu Wenruo btrfs_delalloc_release_metadata(BTRFS_I(inode),
3001c2832898SQu Wenruo clamped_len, true);
3002c2832898SQu Wenruo btrfs_delalloc_release_extents(BTRFS_I(inode),
3003c2832898SQu Wenruo clamped_len);
3004c2832898SQu Wenruo goto release_page;
3005f47960f4SQu Wenruo }
3006c2832898SQu Wenruo btrfs_page_set_dirty(fs_info, page, clamped_start, clamped_len);
3007f47960f4SQu Wenruo
3008c2832898SQu Wenruo /*
3009c2832898SQu Wenruo * Set the boundary if it's inside the page.
3010c2832898SQu Wenruo * Data relocation requires the destination extents to have the
3011c2832898SQu Wenruo * same size as the source.
3012c2832898SQu Wenruo * EXTENT_BOUNDARY bit prevents current extent from being merged
3013c2832898SQu Wenruo * with previous extent.
3014c2832898SQu Wenruo */
3015c2832898SQu Wenruo if (in_range(cluster->boundary[*cluster_nr] - offset,
3016c2832898SQu Wenruo page_start, PAGE_SIZE)) {
3017c2832898SQu Wenruo u64 boundary_start = cluster->boundary[*cluster_nr] -
3018c2832898SQu Wenruo offset;
3019c2832898SQu Wenruo u64 boundary_end = boundary_start +
3020c2832898SQu Wenruo fs_info->sectorsize - 1;
3021c2832898SQu Wenruo
30220acd32c2SDavid Sterba set_extent_bit(&BTRFS_I(inode)->io_tree,
3023c2832898SQu Wenruo boundary_start, boundary_end,
30241d126800SDavid Sterba EXTENT_BOUNDARY, NULL);
3025c2832898SQu Wenruo }
30269c5c9604SJosef Bacik unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
30279c5c9604SJosef Bacik &cached_state);
3028c2832898SQu Wenruo btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
3029c2832898SQu Wenruo cur += clamped_len;
3030c2832898SQu Wenruo
3031c2832898SQu Wenruo /* Crossed extent end, go to next extent */
3032c2832898SQu Wenruo if (cur >= extent_end) {
3033c2832898SQu Wenruo (*cluster_nr)++;
3034c2832898SQu Wenruo /* Just finished the last extent of the cluster, exit. */
3035c2832898SQu Wenruo if (*cluster_nr >= cluster->nr)
3036c2832898SQu Wenruo break;
3037c2832898SQu Wenruo }
3038c2832898SQu Wenruo }
3039f47960f4SQu Wenruo unlock_page(page);
3040f47960f4SQu Wenruo put_page(page);
3041f47960f4SQu Wenruo
3042f47960f4SQu Wenruo balance_dirty_pages_ratelimited(inode->i_mapping);
3043f47960f4SQu Wenruo btrfs_throttle(fs_info);
3044f47960f4SQu Wenruo if (btrfs_should_cancel_balance(fs_info))
3045f47960f4SQu Wenruo ret = -ECANCELED;
3046f47960f4SQu Wenruo return ret;
3047f47960f4SQu Wenruo
3048f47960f4SQu Wenruo release_page:
3049f47960f4SQu Wenruo unlock_page(page);
3050f47960f4SQu Wenruo put_page(page);
3051f47960f4SQu Wenruo return ret;
3052f47960f4SQu Wenruo }
3053f47960f4SQu Wenruo
relocate_file_extent_cluster(struct inode * inode,const struct file_extent_cluster * cluster)30540257bb82SYan, Zheng static int relocate_file_extent_cluster(struct inode *inode,
30554dc6ea8bSDavid Sterba const struct file_extent_cluster *cluster)
30560257bb82SYan, Zheng {
30570257bb82SYan, Zheng u64 offset = BTRFS_I(inode)->index_cnt;
30580257bb82SYan, Zheng unsigned long index;
30590257bb82SYan, Zheng unsigned long last_index;
30600257bb82SYan, Zheng struct file_ra_state *ra;
3061f47960f4SQu Wenruo int cluster_nr = 0;
30620257bb82SYan, Zheng int ret = 0;
30630257bb82SYan, Zheng
30640257bb82SYan, Zheng if (!cluster->nr)
30650257bb82SYan, Zheng return 0;
30660257bb82SYan, Zheng
30670257bb82SYan, Zheng ra = kzalloc(sizeof(*ra), GFP_NOFS);
30680257bb82SYan, Zheng if (!ra)
30690257bb82SYan, Zheng return -ENOMEM;
30700257bb82SYan, Zheng
3071056d9becSNikolay Borisov ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
30720257bb82SYan, Zheng if (ret)
3073efa56464SYan, Zheng goto out;
30740257bb82SYan, Zheng
30750257bb82SYan, Zheng file_ra_state_init(ra, inode->i_mapping);
30760257bb82SYan, Zheng
30774b01c44fSJohannes Thumshirn ret = setup_relocation_extent_mapping(inode, cluster->start - offset,
3078efa56464SYan, Zheng cluster->end - offset, cluster->start);
3079efa56464SYan, Zheng if (ret)
3080efa56464SYan, Zheng goto out;
3081efa56464SYan, Zheng
308209cbfeafSKirill A. Shutemov last_index = (cluster->end - offset) >> PAGE_SHIFT;
3083f47960f4SQu Wenruo for (index = (cluster->start - offset) >> PAGE_SHIFT;
3084f47960f4SQu Wenruo index <= last_index && !ret; index++)
3085f47960f4SQu Wenruo ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
3086f47960f4SQu Wenruo if (ret == 0)
3087f47960f4SQu Wenruo WARN_ON(cluster_nr != cluster->nr);
3088efa56464SYan, Zheng out:
30890257bb82SYan, Zheng kfree(ra);
30900257bb82SYan, Zheng return ret;
30910257bb82SYan, Zheng }
30920257bb82SYan, Zheng
relocate_data_extent(struct inode * inode,const struct btrfs_key * extent_key,struct file_extent_cluster * cluster)30934dc6ea8bSDavid Sterba static noinline_for_stack int relocate_data_extent(struct inode *inode,
30944dc6ea8bSDavid Sterba const struct btrfs_key *extent_key,
30950257bb82SYan, Zheng struct file_extent_cluster *cluster)
30960257bb82SYan, Zheng {
30970257bb82SYan, Zheng int ret;
30980257bb82SYan, Zheng
30990257bb82SYan, Zheng if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
31000257bb82SYan, Zheng ret = relocate_file_extent_cluster(inode, cluster);
31010257bb82SYan, Zheng if (ret)
31020257bb82SYan, Zheng return ret;
31030257bb82SYan, Zheng cluster->nr = 0;
31040257bb82SYan, Zheng }
31050257bb82SYan, Zheng
31060257bb82SYan, Zheng if (!cluster->nr)
31070257bb82SYan, Zheng cluster->start = extent_key->objectid;
31080257bb82SYan, Zheng else
31090257bb82SYan, Zheng BUG_ON(cluster->nr >= MAX_EXTENTS);
31100257bb82SYan, Zheng cluster->end = extent_key->objectid + extent_key->offset - 1;
31110257bb82SYan, Zheng cluster->boundary[cluster->nr] = extent_key->objectid;
31120257bb82SYan, Zheng cluster->nr++;
31130257bb82SYan, Zheng
31140257bb82SYan, Zheng if (cluster->nr >= MAX_EXTENTS) {
31150257bb82SYan, Zheng ret = relocate_file_extent_cluster(inode, cluster);
31160257bb82SYan, Zheng if (ret)
31170257bb82SYan, Zheng return ret;
31180257bb82SYan, Zheng cluster->nr = 0;
31190257bb82SYan, Zheng }
31200257bb82SYan, Zheng return 0;
31215d4f98a2SYan Zheng }
31225d4f98a2SYan Zheng
31235d4f98a2SYan Zheng /*
31245d4f98a2SYan Zheng * helper to add a tree block to the list.
31255d4f98a2SYan Zheng * the major work is getting the generation and level of the block
31265d4f98a2SYan Zheng */
add_tree_block(struct reloc_control * rc,const struct btrfs_key * extent_key,struct btrfs_path * path,struct rb_root * blocks)31275d4f98a2SYan Zheng static int add_tree_block(struct reloc_control *rc,
31284dc6ea8bSDavid Sterba const struct btrfs_key *extent_key,
31295d4f98a2SYan Zheng struct btrfs_path *path,
31305d4f98a2SYan Zheng struct rb_root *blocks)
31315d4f98a2SYan Zheng {
31325d4f98a2SYan Zheng struct extent_buffer *eb;
31335d4f98a2SYan Zheng struct btrfs_extent_item *ei;
31345d4f98a2SYan Zheng struct btrfs_tree_block_info *bi;
31355d4f98a2SYan Zheng struct tree_block *block;
31365d4f98a2SYan Zheng struct rb_node *rb_node;
31375d4f98a2SYan Zheng u32 item_size;
31385d4f98a2SYan Zheng int level = -1;
31397fdf4b60SWang Shilong u64 generation;
3140f7ba2d37SJosef Bacik u64 owner = 0;
31415d4f98a2SYan Zheng
31425d4f98a2SYan Zheng eb = path->nodes[0];
31433212fa14SJosef Bacik item_size = btrfs_item_size(eb, path->slots[0]);
31445d4f98a2SYan Zheng
31453173a18fSJosef Bacik if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
31463173a18fSJosef Bacik item_size >= sizeof(*ei) + sizeof(*bi)) {
3147f7ba2d37SJosef Bacik unsigned long ptr = 0, end;
3148f7ba2d37SJosef Bacik
31495d4f98a2SYan Zheng ei = btrfs_item_ptr(eb, path->slots[0],
31505d4f98a2SYan Zheng struct btrfs_extent_item);
3151f7ba2d37SJosef Bacik end = (unsigned long)ei + item_size;
31523173a18fSJosef Bacik if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
31535d4f98a2SYan Zheng bi = (struct btrfs_tree_block_info *)(ei + 1);
31545d4f98a2SYan Zheng level = btrfs_tree_block_level(eb, bi);
3155f7ba2d37SJosef Bacik ptr = (unsigned long)(bi + 1);
31565d4f98a2SYan Zheng } else {
31573173a18fSJosef Bacik level = (int)extent_key->offset;
3158f7ba2d37SJosef Bacik ptr = (unsigned long)(ei + 1);
31593173a18fSJosef Bacik }
31603173a18fSJosef Bacik generation = btrfs_extent_generation(eb, ei);
3161f7ba2d37SJosef Bacik
3162f7ba2d37SJosef Bacik /*
3163f7ba2d37SJosef Bacik * We're reading random blocks without knowing their owner ahead
3164f7ba2d37SJosef Bacik * of time. This is ok most of the time, as all reloc roots and
3165f7ba2d37SJosef Bacik * fs roots have the same lock type. However normal trees do
3166f7ba2d37SJosef Bacik * not, and the only way to know ahead of time is to read the
3167f7ba2d37SJosef Bacik * inline ref offset. We know it's an fs root if
3168f7ba2d37SJosef Bacik *
3169f7ba2d37SJosef Bacik * 1. There's more than one ref.
3170f7ba2d37SJosef Bacik * 2. There's a SHARED_DATA_REF_KEY set.
3171f7ba2d37SJosef Bacik * 3. FULL_BACKREF is set on the flags.
3172f7ba2d37SJosef Bacik *
3173f7ba2d37SJosef Bacik * Otherwise it's safe to assume that the ref offset == the
3174f7ba2d37SJosef Bacik * owner of this block, so we can use that when calling
3175f7ba2d37SJosef Bacik * read_tree_block.
3176f7ba2d37SJosef Bacik */
3177f7ba2d37SJosef Bacik if (btrfs_extent_refs(eb, ei) == 1 &&
3178f7ba2d37SJosef Bacik !(btrfs_extent_flags(eb, ei) &
3179f7ba2d37SJosef Bacik BTRFS_BLOCK_FLAG_FULL_BACKREF) &&
3180f7ba2d37SJosef Bacik ptr < end) {
3181f7ba2d37SJosef Bacik struct btrfs_extent_inline_ref *iref;
3182f7ba2d37SJosef Bacik int type;
3183f7ba2d37SJosef Bacik
3184f7ba2d37SJosef Bacik iref = (struct btrfs_extent_inline_ref *)ptr;
3185f7ba2d37SJosef Bacik type = btrfs_get_extent_inline_ref_type(eb, iref,
3186f7ba2d37SJosef Bacik BTRFS_REF_TYPE_BLOCK);
3187f7ba2d37SJosef Bacik if (type == BTRFS_REF_TYPE_INVALID)
3188f7ba2d37SJosef Bacik return -EINVAL;
3189f7ba2d37SJosef Bacik if (type == BTRFS_TREE_BLOCK_REF_KEY)
3190f7ba2d37SJosef Bacik owner = btrfs_extent_inline_ref_offset(eb, iref);
3191f7ba2d37SJosef Bacik }
31923173a18fSJosef Bacik } else {
3193182741d2SQu Wenruo btrfs_print_leaf(eb);
3194182741d2SQu Wenruo btrfs_err(rc->block_group->fs_info,
3195182741d2SQu Wenruo "unrecognized tree backref at tree block %llu slot %u",
3196182741d2SQu Wenruo eb->start, path->slots[0]);
3197182741d2SQu Wenruo btrfs_release_path(path);
3198182741d2SQu Wenruo return -EUCLEAN;
31995d4f98a2SYan Zheng }
32005d4f98a2SYan Zheng
3201b3b4aa74SDavid Sterba btrfs_release_path(path);
32025d4f98a2SYan Zheng
32035d4f98a2SYan Zheng BUG_ON(level == -1);
32045d4f98a2SYan Zheng
32055d4f98a2SYan Zheng block = kmalloc(sizeof(*block), GFP_NOFS);
32065d4f98a2SYan Zheng if (!block)
32075d4f98a2SYan Zheng return -ENOMEM;
32085d4f98a2SYan Zheng
32095d4f98a2SYan Zheng block->bytenr = extent_key->objectid;
3210da17066cSJeff Mahoney block->key.objectid = rc->extent_root->fs_info->nodesize;
32115d4f98a2SYan Zheng block->key.offset = generation;
32125d4f98a2SYan Zheng block->level = level;
32135d4f98a2SYan Zheng block->key_ready = 0;
3214f7ba2d37SJosef Bacik block->owner = owner;
32155d4f98a2SYan Zheng
3216e9a28dc5SQu Wenruo rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
321743c04fb1SJeff Mahoney if (rb_node)
3218982c92cbSQu Wenruo btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
3219982c92cbSQu Wenruo -EEXIST);
32205d4f98a2SYan Zheng
32215d4f98a2SYan Zheng return 0;
32225d4f98a2SYan Zheng }
32235d4f98a2SYan Zheng
32245d4f98a2SYan Zheng /*
32255d4f98a2SYan Zheng * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
32265d4f98a2SYan Zheng */
__add_tree_block(struct reloc_control * rc,u64 bytenr,u32 blocksize,struct rb_root * blocks)32275d4f98a2SYan Zheng static int __add_tree_block(struct reloc_control *rc,
32285d4f98a2SYan Zheng u64 bytenr, u32 blocksize,
32295d4f98a2SYan Zheng struct rb_root *blocks)
32305d4f98a2SYan Zheng {
32310b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
32325d4f98a2SYan Zheng struct btrfs_path *path;
32335d4f98a2SYan Zheng struct btrfs_key key;
32345d4f98a2SYan Zheng int ret;
32350b246afaSJeff Mahoney bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
32365d4f98a2SYan Zheng
32377476dfdaSDavid Sterba if (tree_block_processed(bytenr, rc))
32385d4f98a2SYan Zheng return 0;
32395d4f98a2SYan Zheng
3240e9a28dc5SQu Wenruo if (rb_simple_search(blocks, bytenr))
32415d4f98a2SYan Zheng return 0;
32425d4f98a2SYan Zheng
32435d4f98a2SYan Zheng path = btrfs_alloc_path();
32445d4f98a2SYan Zheng if (!path)
32455d4f98a2SYan Zheng return -ENOMEM;
3246aee68ee5SJosef Bacik again:
32475d4f98a2SYan Zheng key.objectid = bytenr;
3248aee68ee5SJosef Bacik if (skinny) {
3249aee68ee5SJosef Bacik key.type = BTRFS_METADATA_ITEM_KEY;
3250aee68ee5SJosef Bacik key.offset = (u64)-1;
3251aee68ee5SJosef Bacik } else {
32525d4f98a2SYan Zheng key.type = BTRFS_EXTENT_ITEM_KEY;
32535d4f98a2SYan Zheng key.offset = blocksize;
3254aee68ee5SJosef Bacik }
32555d4f98a2SYan Zheng
32565d4f98a2SYan Zheng path->search_commit_root = 1;
32575d4f98a2SYan Zheng path->skip_locking = 1;
32585d4f98a2SYan Zheng ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
32595d4f98a2SYan Zheng if (ret < 0)
32605d4f98a2SYan Zheng goto out;
32615d4f98a2SYan Zheng
3262aee68ee5SJosef Bacik if (ret > 0 && skinny) {
3263aee68ee5SJosef Bacik if (path->slots[0]) {
3264aee68ee5SJosef Bacik path->slots[0]--;
3265aee68ee5SJosef Bacik btrfs_item_key_to_cpu(path->nodes[0], &key,
3266aee68ee5SJosef Bacik path->slots[0]);
32673173a18fSJosef Bacik if (key.objectid == bytenr &&
3268aee68ee5SJosef Bacik (key.type == BTRFS_METADATA_ITEM_KEY ||
3269aee68ee5SJosef Bacik (key.type == BTRFS_EXTENT_ITEM_KEY &&
3270aee68ee5SJosef Bacik key.offset == blocksize)))
32713173a18fSJosef Bacik ret = 0;
32723173a18fSJosef Bacik }
3273aee68ee5SJosef Bacik
3274aee68ee5SJosef Bacik if (ret) {
3275aee68ee5SJosef Bacik skinny = false;
3276aee68ee5SJosef Bacik btrfs_release_path(path);
3277aee68ee5SJosef Bacik goto again;
3278aee68ee5SJosef Bacik }
3279aee68ee5SJosef Bacik }
3280cdccee99SLiu Bo if (ret) {
3281cdccee99SLiu Bo ASSERT(ret == 1);
3282cdccee99SLiu Bo btrfs_print_leaf(path->nodes[0]);
3283cdccee99SLiu Bo btrfs_err(fs_info,
3284cdccee99SLiu Bo "tree block extent item (%llu) is not found in extent tree",
3285cdccee99SLiu Bo bytenr);
3286cdccee99SLiu Bo WARN_ON(1);
3287cdccee99SLiu Bo ret = -EINVAL;
3288cdccee99SLiu Bo goto out;
3289cdccee99SLiu Bo }
32903173a18fSJosef Bacik
32915d4f98a2SYan Zheng ret = add_tree_block(rc, &key, path, blocks);
32925d4f98a2SYan Zheng out:
32935d4f98a2SYan Zheng btrfs_free_path(path);
32945d4f98a2SYan Zheng return ret;
32955d4f98a2SYan Zheng }
32965d4f98a2SYan Zheng
delete_block_group_cache(struct btrfs_fs_info * fs_info,struct btrfs_block_group * block_group,struct inode * inode,u64 ino)32970af3d00bSJosef Bacik static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
329832da5386SDavid Sterba struct btrfs_block_group *block_group,
32991bbc621eSChris Mason struct inode *inode,
33001bbc621eSChris Mason u64 ino)
33010af3d00bSJosef Bacik {
33020af3d00bSJosef Bacik struct btrfs_root *root = fs_info->tree_root;
33030af3d00bSJosef Bacik struct btrfs_trans_handle *trans;
33040af3d00bSJosef Bacik int ret = 0;
33050af3d00bSJosef Bacik
33060af3d00bSJosef Bacik if (inode)
33070af3d00bSJosef Bacik goto truncate;
33080af3d00bSJosef Bacik
33090202e83fSDavid Sterba inode = btrfs_iget(fs_info->sb, ino, root);
33102e19f1f9SAl Viro if (IS_ERR(inode))
33110af3d00bSJosef Bacik return -ENOENT;
33120af3d00bSJosef Bacik
33130af3d00bSJosef Bacik truncate:
33142ff7e61eSJeff Mahoney ret = btrfs_check_trunc_cache_free_space(fs_info,
33157b61cd92SMiao Xie &fs_info->global_block_rsv);
33167b61cd92SMiao Xie if (ret)
33177b61cd92SMiao Xie goto out;
33187b61cd92SMiao Xie
33197a7eaa40SJosef Bacik trans = btrfs_join_transaction(root);
33200af3d00bSJosef Bacik if (IS_ERR(trans)) {
33213612b495STsutomu Itoh ret = PTR_ERR(trans);
33220af3d00bSJosef Bacik goto out;
33230af3d00bSJosef Bacik }
33240af3d00bSJosef Bacik
332577ab86bfSJeff Mahoney ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
33260af3d00bSJosef Bacik
33273a45bb20SJeff Mahoney btrfs_end_transaction(trans);
33282ff7e61eSJeff Mahoney btrfs_btree_balance_dirty(fs_info);
33290af3d00bSJosef Bacik out:
33300af3d00bSJosef Bacik iput(inode);
33310af3d00bSJosef Bacik return ret;
33320af3d00bSJosef Bacik }
33330af3d00bSJosef Bacik
33345d4f98a2SYan Zheng /*
333519b546d7SQu Wenruo * Locate the free space cache EXTENT_DATA in root tree leaf and delete the
333619b546d7SQu Wenruo * cache inode, to avoid free space cache data extent blocking data relocation.
33375d4f98a2SYan Zheng */
delete_v1_space_cache(struct extent_buffer * leaf,struct btrfs_block_group * block_group,u64 data_bytenr)333819b546d7SQu Wenruo static int delete_v1_space_cache(struct extent_buffer *leaf,
333919b546d7SQu Wenruo struct btrfs_block_group *block_group,
334019b546d7SQu Wenruo u64 data_bytenr)
33415d4f98a2SYan Zheng {
334219b546d7SQu Wenruo u64 space_cache_ino;
334319b546d7SQu Wenruo struct btrfs_file_extent_item *ei;
33445d4f98a2SYan Zheng struct btrfs_key key;
334519b546d7SQu Wenruo bool found = false;
334619b546d7SQu Wenruo int i;
33475d4f98a2SYan Zheng int ret;
33485d4f98a2SYan Zheng
334919b546d7SQu Wenruo if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
335019b546d7SQu Wenruo return 0;
33515d4f98a2SYan Zheng
335219b546d7SQu Wenruo for (i = 0; i < btrfs_header_nritems(leaf); i++) {
335350e31ef4SQu Wenruo u8 type;
335450e31ef4SQu Wenruo
335519b546d7SQu Wenruo btrfs_item_key_to_cpu(leaf, &key, i);
335619b546d7SQu Wenruo if (key.type != BTRFS_EXTENT_DATA_KEY)
335719b546d7SQu Wenruo continue;
335819b546d7SQu Wenruo ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
335950e31ef4SQu Wenruo type = btrfs_file_extent_type(leaf, ei);
336050e31ef4SQu Wenruo
336150e31ef4SQu Wenruo if ((type == BTRFS_FILE_EXTENT_REG ||
336250e31ef4SQu Wenruo type == BTRFS_FILE_EXTENT_PREALLOC) &&
336319b546d7SQu Wenruo btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
336419b546d7SQu Wenruo found = true;
336519b546d7SQu Wenruo space_cache_ino = key.objectid;
336619b546d7SQu Wenruo break;
336719b546d7SQu Wenruo }
336819b546d7SQu Wenruo }
336919b546d7SQu Wenruo if (!found)
337019b546d7SQu Wenruo return -ENOENT;
337119b546d7SQu Wenruo ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
337219b546d7SQu Wenruo space_cache_ino);
33730af3d00bSJosef Bacik return ret;
33745d4f98a2SYan Zheng }
33755d4f98a2SYan Zheng
33765d4f98a2SYan Zheng /*
33772c016dc2SLiu Bo * helper to find all tree blocks that reference a given data extent
33785d4f98a2SYan Zheng */
add_data_references(struct reloc_control * rc,const struct btrfs_key * extent_key,struct btrfs_path * path,struct rb_root * blocks)33794dc6ea8bSDavid Sterba static noinline_for_stack int add_data_references(struct reloc_control *rc,
33804dc6ea8bSDavid Sterba const struct btrfs_key *extent_key,
33815d4f98a2SYan Zheng struct btrfs_path *path,
33825d4f98a2SYan Zheng struct rb_root *blocks)
33835d4f98a2SYan Zheng {
3384a2c8d27eSFilipe Manana struct btrfs_backref_walk_ctx ctx = { 0 };
338519b546d7SQu Wenruo struct ulist_iterator leaf_uiter;
338619b546d7SQu Wenruo struct ulist_node *ref_node = NULL;
3387a2c8d27eSFilipe Manana const u32 blocksize = rc->extent_root->fs_info->nodesize;
3388647f63bdSFilipe David Borba Manana int ret = 0;
33895d4f98a2SYan Zheng
3390b3b4aa74SDavid Sterba btrfs_release_path(path);
3391a2c8d27eSFilipe Manana
3392a2c8d27eSFilipe Manana ctx.bytenr = extent_key->objectid;
33930cad8f14SFilipe Manana ctx.skip_inode_ref_list = true;
3394a2c8d27eSFilipe Manana ctx.fs_info = rc->extent_root->fs_info;
3395a2c8d27eSFilipe Manana
3396a2c8d27eSFilipe Manana ret = btrfs_find_all_leafs(&ctx);
339719b546d7SQu Wenruo if (ret < 0)
339819b546d7SQu Wenruo return ret;
339919b546d7SQu Wenruo
340019b546d7SQu Wenruo ULIST_ITER_INIT(&leaf_uiter);
3401a2c8d27eSFilipe Manana while ((ref_node = ulist_next(ctx.refs, &leaf_uiter))) {
3402789d6a3aSQu Wenruo struct btrfs_tree_parent_check check = { 0 };
340319b546d7SQu Wenruo struct extent_buffer *eb;
340419b546d7SQu Wenruo
3405789d6a3aSQu Wenruo eb = read_tree_block(ctx.fs_info, ref_node->val, &check);
340619b546d7SQu Wenruo if (IS_ERR(eb)) {
340719b546d7SQu Wenruo ret = PTR_ERR(eb);
340819b546d7SQu Wenruo break;
340919b546d7SQu Wenruo }
341019b546d7SQu Wenruo ret = delete_v1_space_cache(eb, rc->block_group,
341119b546d7SQu Wenruo extent_key->objectid);
341219b546d7SQu Wenruo free_extent_buffer(eb);
341319b546d7SQu Wenruo if (ret < 0)
341419b546d7SQu Wenruo break;
341519b546d7SQu Wenruo ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
341619b546d7SQu Wenruo if (ret < 0)
341719b546d7SQu Wenruo break;
341819b546d7SQu Wenruo }
341919b546d7SQu Wenruo if (ret < 0)
34205d4f98a2SYan Zheng free_block_list(blocks);
3421a2c8d27eSFilipe Manana ulist_free(ctx.refs);
342219b546d7SQu Wenruo return ret;
34235d4f98a2SYan Zheng }
34245d4f98a2SYan Zheng
34255d4f98a2SYan Zheng /*
34262c016dc2SLiu Bo * helper to find next unprocessed extent
34275d4f98a2SYan Zheng */
34285d4f98a2SYan Zheng static noinline_for_stack
find_next_extent(struct reloc_control * rc,struct btrfs_path * path,struct btrfs_key * extent_key)3429147d256eSZhaolei int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
34303fd0a558SYan, Zheng struct btrfs_key *extent_key)
34315d4f98a2SYan Zheng {
34320b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
34335d4f98a2SYan Zheng struct btrfs_key key;
34345d4f98a2SYan Zheng struct extent_buffer *leaf;
34355d4f98a2SYan Zheng u64 start, end, last;
34365d4f98a2SYan Zheng int ret;
34375d4f98a2SYan Zheng
3438b3470b5dSDavid Sterba last = rc->block_group->start + rc->block_group->length;
34395d4f98a2SYan Zheng while (1) {
3440e5860f82SFilipe Manana bool block_found;
3441e5860f82SFilipe Manana
34425d4f98a2SYan Zheng cond_resched();
34435d4f98a2SYan Zheng if (rc->search_start >= last) {
34445d4f98a2SYan Zheng ret = 1;
34455d4f98a2SYan Zheng break;
34465d4f98a2SYan Zheng }
34475d4f98a2SYan Zheng
34485d4f98a2SYan Zheng key.objectid = rc->search_start;
34495d4f98a2SYan Zheng key.type = BTRFS_EXTENT_ITEM_KEY;
34505d4f98a2SYan Zheng key.offset = 0;
34515d4f98a2SYan Zheng
34525d4f98a2SYan Zheng path->search_commit_root = 1;
34535d4f98a2SYan Zheng path->skip_locking = 1;
34545d4f98a2SYan Zheng ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
34555d4f98a2SYan Zheng 0, 0);
34565d4f98a2SYan Zheng if (ret < 0)
34575d4f98a2SYan Zheng break;
34585d4f98a2SYan Zheng next:
34595d4f98a2SYan Zheng leaf = path->nodes[0];
34605d4f98a2SYan Zheng if (path->slots[0] >= btrfs_header_nritems(leaf)) {
34615d4f98a2SYan Zheng ret = btrfs_next_leaf(rc->extent_root, path);
34625d4f98a2SYan Zheng if (ret != 0)
34635d4f98a2SYan Zheng break;
34645d4f98a2SYan Zheng leaf = path->nodes[0];
34655d4f98a2SYan Zheng }
34665d4f98a2SYan Zheng
34675d4f98a2SYan Zheng btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
34685d4f98a2SYan Zheng if (key.objectid >= last) {
34695d4f98a2SYan Zheng ret = 1;
34705d4f98a2SYan Zheng break;
34715d4f98a2SYan Zheng }
34725d4f98a2SYan Zheng
34733173a18fSJosef Bacik if (key.type != BTRFS_EXTENT_ITEM_KEY &&
34743173a18fSJosef Bacik key.type != BTRFS_METADATA_ITEM_KEY) {
34753173a18fSJosef Bacik path->slots[0]++;
34763173a18fSJosef Bacik goto next;
34773173a18fSJosef Bacik }
34783173a18fSJosef Bacik
34793173a18fSJosef Bacik if (key.type == BTRFS_EXTENT_ITEM_KEY &&
34805d4f98a2SYan Zheng key.objectid + key.offset <= rc->search_start) {
34815d4f98a2SYan Zheng path->slots[0]++;
34825d4f98a2SYan Zheng goto next;
34835d4f98a2SYan Zheng }
34845d4f98a2SYan Zheng
34853173a18fSJosef Bacik if (key.type == BTRFS_METADATA_ITEM_KEY &&
34860b246afaSJeff Mahoney key.objectid + fs_info->nodesize <=
34873173a18fSJosef Bacik rc->search_start) {
34883173a18fSJosef Bacik path->slots[0]++;
34893173a18fSJosef Bacik goto next;
34903173a18fSJosef Bacik }
34913173a18fSJosef Bacik
3492e5860f82SFilipe Manana block_found = find_first_extent_bit(&rc->processed_blocks,
34935d4f98a2SYan Zheng key.objectid, &start, &end,
3494e6138876SJosef Bacik EXTENT_DIRTY, NULL);
34955d4f98a2SYan Zheng
3496e5860f82SFilipe Manana if (block_found && start <= key.objectid) {
3497b3b4aa74SDavid Sterba btrfs_release_path(path);
34985d4f98a2SYan Zheng rc->search_start = end + 1;
34995d4f98a2SYan Zheng } else {
35003173a18fSJosef Bacik if (key.type == BTRFS_EXTENT_ITEM_KEY)
35015d4f98a2SYan Zheng rc->search_start = key.objectid + key.offset;
35023173a18fSJosef Bacik else
35033173a18fSJosef Bacik rc->search_start = key.objectid +
35040b246afaSJeff Mahoney fs_info->nodesize;
35053fd0a558SYan, Zheng memcpy(extent_key, &key, sizeof(key));
35065d4f98a2SYan Zheng return 0;
35075d4f98a2SYan Zheng }
35085d4f98a2SYan Zheng }
3509b3b4aa74SDavid Sterba btrfs_release_path(path);
35105d4f98a2SYan Zheng return ret;
35115d4f98a2SYan Zheng }
35125d4f98a2SYan Zheng
set_reloc_control(struct reloc_control * rc)35135d4f98a2SYan Zheng static void set_reloc_control(struct reloc_control *rc)
35145d4f98a2SYan Zheng {
35155d4f98a2SYan Zheng struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
35167585717fSChris Mason
35177585717fSChris Mason mutex_lock(&fs_info->reloc_mutex);
35185d4f98a2SYan Zheng fs_info->reloc_ctl = rc;
35197585717fSChris Mason mutex_unlock(&fs_info->reloc_mutex);
35205d4f98a2SYan Zheng }
35215d4f98a2SYan Zheng
unset_reloc_control(struct reloc_control * rc)35225d4f98a2SYan Zheng static void unset_reloc_control(struct reloc_control *rc)
35235d4f98a2SYan Zheng {
35245d4f98a2SYan Zheng struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
35257585717fSChris Mason
35267585717fSChris Mason mutex_lock(&fs_info->reloc_mutex);
35275d4f98a2SYan Zheng fs_info->reloc_ctl = NULL;
35287585717fSChris Mason mutex_unlock(&fs_info->reloc_mutex);
35295d4f98a2SYan Zheng }
35305d4f98a2SYan Zheng
35313fd0a558SYan, Zheng static noinline_for_stack
prepare_to_relocate(struct reloc_control * rc)35323fd0a558SYan, Zheng int prepare_to_relocate(struct reloc_control *rc)
35333fd0a558SYan, Zheng {
35343fd0a558SYan, Zheng struct btrfs_trans_handle *trans;
3535ac2fabacSJosef Bacik int ret;
35363fd0a558SYan, Zheng
35372ff7e61eSJeff Mahoney rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info,
353866d8f3ddSMiao Xie BTRFS_BLOCK_RSV_TEMP);
35393fd0a558SYan, Zheng if (!rc->block_rsv)
35403fd0a558SYan, Zheng return -ENOMEM;
35413fd0a558SYan, Zheng
35423fd0a558SYan, Zheng memset(&rc->cluster, 0, sizeof(rc->cluster));
3543b3470b5dSDavid Sterba rc->search_start = rc->block_group->start;
35443fd0a558SYan, Zheng rc->extents_found = 0;
35453fd0a558SYan, Zheng rc->nodes_relocated = 0;
35463fd0a558SYan, Zheng rc->merging_rsv_size = 0;
35470647bf56SWang Shilong rc->reserved_bytes = 0;
3548da17066cSJeff Mahoney rc->block_rsv->size = rc->extent_root->fs_info->nodesize *
35490647bf56SWang Shilong RELOCATION_RESERVED_NODES;
35509270501cSJosef Bacik ret = btrfs_block_rsv_refill(rc->extent_root->fs_info,
3551ac2fabacSJosef Bacik rc->block_rsv, rc->block_rsv->size,
3552ac2fabacSJosef Bacik BTRFS_RESERVE_FLUSH_ALL);
3553ac2fabacSJosef Bacik if (ret)
3554ac2fabacSJosef Bacik return ret;
35553fd0a558SYan, Zheng
35563fd0a558SYan, Zheng rc->create_reloc_tree = 1;
35573fd0a558SYan, Zheng set_reloc_control(rc);
35583fd0a558SYan, Zheng
35597a7eaa40SJosef Bacik trans = btrfs_join_transaction(rc->extent_root);
356028818947SLiu Bo if (IS_ERR(trans)) {
356128818947SLiu Bo unset_reloc_control(rc);
356228818947SLiu Bo /*
356328818947SLiu Bo * extent tree is not a ref_cow tree and has no reloc_root to
356428818947SLiu Bo * cleanup. And callers are responsible to free the above
356528818947SLiu Bo * block rsv.
356628818947SLiu Bo */
356728818947SLiu Bo return PTR_ERR(trans);
356828818947SLiu Bo }
356985f02d6cSZixuan Fu
357085f02d6cSZixuan Fu ret = btrfs_commit_transaction(trans);
357185f02d6cSZixuan Fu if (ret)
357285f02d6cSZixuan Fu unset_reloc_control(rc);
357385f02d6cSZixuan Fu
357485f02d6cSZixuan Fu return ret;
35753fd0a558SYan, Zheng }
357676dda93cSYan, Zheng
relocate_block_group(struct reloc_control * rc)35775d4f98a2SYan Zheng static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
35785d4f98a2SYan Zheng {
35792ff7e61eSJeff Mahoney struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
35805d4f98a2SYan Zheng struct rb_root blocks = RB_ROOT;
35815d4f98a2SYan Zheng struct btrfs_key key;
35825d4f98a2SYan Zheng struct btrfs_trans_handle *trans = NULL;
35835d4f98a2SYan Zheng struct btrfs_path *path;
35845d4f98a2SYan Zheng struct btrfs_extent_item *ei;
35855d4f98a2SYan Zheng u64 flags;
35865d4f98a2SYan Zheng int ret;
35875d4f98a2SYan Zheng int err = 0;
3588c87f08caSChris Mason int progress = 0;
35895d4f98a2SYan Zheng
35905d4f98a2SYan Zheng path = btrfs_alloc_path();
35913fd0a558SYan, Zheng if (!path)
35925d4f98a2SYan Zheng return -ENOMEM;
3593e4058b54SDavid Sterba path->reada = READA_FORWARD;
35943fd0a558SYan, Zheng
35953fd0a558SYan, Zheng ret = prepare_to_relocate(rc);
35963fd0a558SYan, Zheng if (ret) {
35973fd0a558SYan, Zheng err = ret;
35983fd0a558SYan, Zheng goto out_free;
35992423fdfbSJiri Slaby }
36005d4f98a2SYan Zheng
36015d4f98a2SYan Zheng while (1) {
36020647bf56SWang Shilong rc->reserved_bytes = 0;
36039270501cSJosef Bacik ret = btrfs_block_rsv_refill(fs_info, rc->block_rsv,
36049270501cSJosef Bacik rc->block_rsv->size,
36050647bf56SWang Shilong BTRFS_RESERVE_FLUSH_ALL);
36060647bf56SWang Shilong if (ret) {
36070647bf56SWang Shilong err = ret;
36080647bf56SWang Shilong break;
36090647bf56SWang Shilong }
3610c87f08caSChris Mason progress++;
3611a22285a6SYan, Zheng trans = btrfs_start_transaction(rc->extent_root, 0);
36120f788c58SLiu Bo if (IS_ERR(trans)) {
36130f788c58SLiu Bo err = PTR_ERR(trans);
36140f788c58SLiu Bo trans = NULL;
36150f788c58SLiu Bo break;
36160f788c58SLiu Bo }
3617c87f08caSChris Mason restart:
3618907717eeSJosef Bacik if (rc->backref_cache.last_trans != trans->transid)
3619907717eeSJosef Bacik btrfs_backref_release_cache(&rc->backref_cache);
3620907717eeSJosef Bacik rc->backref_cache.last_trans = trans->transid;
36213fd0a558SYan, Zheng
3622147d256eSZhaolei ret = find_next_extent(rc, path, &key);
36235d4f98a2SYan Zheng if (ret < 0)
36245d4f98a2SYan Zheng err = ret;
36255d4f98a2SYan Zheng if (ret != 0)
36265d4f98a2SYan Zheng break;
36275d4f98a2SYan Zheng
36285d4f98a2SYan Zheng rc->extents_found++;
36295d4f98a2SYan Zheng
36305d4f98a2SYan Zheng ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
36315d4f98a2SYan Zheng struct btrfs_extent_item);
36325d4f98a2SYan Zheng flags = btrfs_extent_flags(path->nodes[0], ei);
36335d4f98a2SYan Zheng
36345d4f98a2SYan Zheng if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
36355d4f98a2SYan Zheng ret = add_tree_block(rc, &key, path, &blocks);
36365d4f98a2SYan Zheng } else if (rc->stage == UPDATE_DATA_PTRS &&
36375d4f98a2SYan Zheng (flags & BTRFS_EXTENT_FLAG_DATA)) {
36385d4f98a2SYan Zheng ret = add_data_references(rc, &key, path, &blocks);
36395d4f98a2SYan Zheng } else {
3640b3b4aa74SDavid Sterba btrfs_release_path(path);
36415d4f98a2SYan Zheng ret = 0;
36425d4f98a2SYan Zheng }
36435d4f98a2SYan Zheng if (ret < 0) {
36443fd0a558SYan, Zheng err = ret;
36455d4f98a2SYan Zheng break;
36465d4f98a2SYan Zheng }
36475d4f98a2SYan Zheng
36485d4f98a2SYan Zheng if (!RB_EMPTY_ROOT(&blocks)) {
36495d4f98a2SYan Zheng ret = relocate_tree_blocks(trans, rc, &blocks);
36505d4f98a2SYan Zheng if (ret < 0) {
36513fd0a558SYan, Zheng if (ret != -EAGAIN) {
36525d4f98a2SYan Zheng err = ret;
36535d4f98a2SYan Zheng break;
36545d4f98a2SYan Zheng }
36553fd0a558SYan, Zheng rc->extents_found--;
36563fd0a558SYan, Zheng rc->search_start = key.objectid;
36573fd0a558SYan, Zheng }
36585d4f98a2SYan Zheng }
36595d4f98a2SYan Zheng
36603a45bb20SJeff Mahoney btrfs_end_transaction_throttle(trans);
36612ff7e61eSJeff Mahoney btrfs_btree_balance_dirty(fs_info);
36623fd0a558SYan, Zheng trans = NULL;
36635d4f98a2SYan Zheng
36645d4f98a2SYan Zheng if (rc->stage == MOVE_DATA_EXTENTS &&
36655d4f98a2SYan Zheng (flags & BTRFS_EXTENT_FLAG_DATA)) {
36665d4f98a2SYan Zheng rc->found_file_extent = 1;
36670257bb82SYan, Zheng ret = relocate_data_extent(rc->data_inode,
36683fd0a558SYan, Zheng &key, &rc->cluster);
36695d4f98a2SYan Zheng if (ret < 0) {
36705d4f98a2SYan Zheng err = ret;
36715d4f98a2SYan Zheng break;
36725d4f98a2SYan Zheng }
36735d4f98a2SYan Zheng }
3674f31ea088SQu Wenruo if (btrfs_should_cancel_balance(fs_info)) {
3675f31ea088SQu Wenruo err = -ECANCELED;
3676f31ea088SQu Wenruo break;
3677f31ea088SQu Wenruo }
36785d4f98a2SYan Zheng }
3679c87f08caSChris Mason if (trans && progress && err == -ENOSPC) {
368043a7e99dSNikolay Borisov ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
36819689457bSShilong Wang if (ret == 1) {
3682c87f08caSChris Mason err = 0;
3683c87f08caSChris Mason progress = 0;
3684c87f08caSChris Mason goto restart;
3685c87f08caSChris Mason }
3686c87f08caSChris Mason }
36873fd0a558SYan, Zheng
3688b3b4aa74SDavid Sterba btrfs_release_path(path);
368991166212SDavid Sterba clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
36905d4f98a2SYan Zheng
36915d4f98a2SYan Zheng if (trans) {
36923a45bb20SJeff Mahoney btrfs_end_transaction_throttle(trans);
36932ff7e61eSJeff Mahoney btrfs_btree_balance_dirty(fs_info);
36945d4f98a2SYan Zheng }
36955d4f98a2SYan Zheng
36960257bb82SYan, Zheng if (!err) {
36973fd0a558SYan, Zheng ret = relocate_file_extent_cluster(rc->data_inode,
36983fd0a558SYan, Zheng &rc->cluster);
36990257bb82SYan, Zheng if (ret < 0)
37000257bb82SYan, Zheng err = ret;
37010257bb82SYan, Zheng }
37020257bb82SYan, Zheng
37033fd0a558SYan, Zheng rc->create_reloc_tree = 0;
37043fd0a558SYan, Zheng set_reloc_control(rc);
37050257bb82SYan, Zheng
370613fe1bdbSQu Wenruo btrfs_backref_release_cache(&rc->backref_cache);
370763f018beSNikolay Borisov btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
37085d4f98a2SYan Zheng
37097f913c7cSQu Wenruo /*
37107f913c7cSQu Wenruo * Even in the case when the relocation is cancelled, we should all go
37117f913c7cSQu Wenruo * through prepare_to_merge() and merge_reloc_roots().
37127f913c7cSQu Wenruo *
37137f913c7cSQu Wenruo * For error (including cancelled balance), prepare_to_merge() will
37147f913c7cSQu Wenruo * mark all reloc trees orphan, then queue them for cleanup in
37157f913c7cSQu Wenruo * merge_reloc_roots()
37167f913c7cSQu Wenruo */
37173fd0a558SYan, Zheng err = prepare_to_merge(rc, err);
37185d4f98a2SYan Zheng
37195d4f98a2SYan Zheng merge_reloc_roots(rc);
37205d4f98a2SYan Zheng
37213fd0a558SYan, Zheng rc->merge_reloc_tree = 0;
37225d4f98a2SYan Zheng unset_reloc_control(rc);
372363f018beSNikolay Borisov btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
37245d4f98a2SYan Zheng
37255d4f98a2SYan Zheng /* get rid of pinned extents */
37267a7eaa40SJosef Bacik trans = btrfs_join_transaction(rc->extent_root);
372762b99540SQu Wenruo if (IS_ERR(trans)) {
37283612b495STsutomu Itoh err = PTR_ERR(trans);
372962b99540SQu Wenruo goto out_free;
373062b99540SQu Wenruo }
3731fb686c68SJosef Bacik ret = btrfs_commit_transaction(trans);
3732fb686c68SJosef Bacik if (ret && !err)
3733fb686c68SJosef Bacik err = ret;
37346217b0faSJosef Bacik out_free:
3735d2311e69SQu Wenruo ret = clean_dirty_subvols(rc);
3736d2311e69SQu Wenruo if (ret < 0 && !err)
3737d2311e69SQu Wenruo err = ret;
37382ff7e61eSJeff Mahoney btrfs_free_block_rsv(fs_info, rc->block_rsv);
37393fd0a558SYan, Zheng btrfs_free_path(path);
37405d4f98a2SYan Zheng return err;
37415d4f98a2SYan Zheng }
37425d4f98a2SYan Zheng
__insert_orphan_inode(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 objectid)37435d4f98a2SYan Zheng static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
37440257bb82SYan, Zheng struct btrfs_root *root, u64 objectid)
37455d4f98a2SYan Zheng {
37465d4f98a2SYan Zheng struct btrfs_path *path;
37475d4f98a2SYan Zheng struct btrfs_inode_item *item;
37485d4f98a2SYan Zheng struct extent_buffer *leaf;
37495d4f98a2SYan Zheng int ret;
37505d4f98a2SYan Zheng
37515d4f98a2SYan Zheng path = btrfs_alloc_path();
37525d4f98a2SYan Zheng if (!path)
37535d4f98a2SYan Zheng return -ENOMEM;
37545d4f98a2SYan Zheng
37555d4f98a2SYan Zheng ret = btrfs_insert_empty_inode(trans, root, path, objectid);
37565d4f98a2SYan Zheng if (ret)
37575d4f98a2SYan Zheng goto out;
37585d4f98a2SYan Zheng
37595d4f98a2SYan Zheng leaf = path->nodes[0];
37605d4f98a2SYan Zheng item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3761b159fa28SDavid Sterba memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
37625d4f98a2SYan Zheng btrfs_set_inode_generation(leaf, item, 1);
37630257bb82SYan, Zheng btrfs_set_inode_size(leaf, item, 0);
37645d4f98a2SYan Zheng btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3765960a3166SJohannes Thumshirn btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3766960a3166SJohannes Thumshirn BTRFS_INODE_PREALLOC);
3767d5e09e38SFilipe Manana btrfs_mark_buffer_dirty(trans, leaf);
37685d4f98a2SYan Zheng out:
37695d4f98a2SYan Zheng btrfs_free_path(path);
37705d4f98a2SYan Zheng return ret;
37715d4f98a2SYan Zheng }
37725d4f98a2SYan Zheng
delete_orphan_inode(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 objectid)3773790c1b8cSJosef Bacik static void delete_orphan_inode(struct btrfs_trans_handle *trans,
3774790c1b8cSJosef Bacik struct btrfs_root *root, u64 objectid)
3775790c1b8cSJosef Bacik {
3776790c1b8cSJosef Bacik struct btrfs_path *path;
3777790c1b8cSJosef Bacik struct btrfs_key key;
3778790c1b8cSJosef Bacik int ret = 0;
3779790c1b8cSJosef Bacik
3780790c1b8cSJosef Bacik path = btrfs_alloc_path();
3781790c1b8cSJosef Bacik if (!path) {
3782790c1b8cSJosef Bacik ret = -ENOMEM;
3783790c1b8cSJosef Bacik goto out;
3784790c1b8cSJosef Bacik }
3785790c1b8cSJosef Bacik
3786790c1b8cSJosef Bacik key.objectid = objectid;
3787790c1b8cSJosef Bacik key.type = BTRFS_INODE_ITEM_KEY;
3788790c1b8cSJosef Bacik key.offset = 0;
3789790c1b8cSJosef Bacik ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3790790c1b8cSJosef Bacik if (ret) {
3791790c1b8cSJosef Bacik if (ret > 0)
3792790c1b8cSJosef Bacik ret = -ENOENT;
3793790c1b8cSJosef Bacik goto out;
3794790c1b8cSJosef Bacik }
3795790c1b8cSJosef Bacik ret = btrfs_del_item(trans, root, path);
3796790c1b8cSJosef Bacik out:
3797790c1b8cSJosef Bacik if (ret)
3798790c1b8cSJosef Bacik btrfs_abort_transaction(trans, ret);
3799790c1b8cSJosef Bacik btrfs_free_path(path);
3800790c1b8cSJosef Bacik }
3801790c1b8cSJosef Bacik
38025d4f98a2SYan Zheng /*
38035d4f98a2SYan Zheng * helper to create inode for data relocation.
38045d4f98a2SYan Zheng * the inode is in data relocation tree and its link count is 0
38055d4f98a2SYan Zheng */
create_reloc_inode(struct btrfs_fs_info * fs_info,const struct btrfs_block_group * group)38064dc6ea8bSDavid Sterba static noinline_for_stack struct inode *create_reloc_inode(
38074dc6ea8bSDavid Sterba struct btrfs_fs_info *fs_info,
38084dc6ea8bSDavid Sterba const struct btrfs_block_group *group)
38095d4f98a2SYan Zheng {
38105d4f98a2SYan Zheng struct inode *inode = NULL;
38115d4f98a2SYan Zheng struct btrfs_trans_handle *trans;
38125d4f98a2SYan Zheng struct btrfs_root *root;
38134624900dSZhaolei u64 objectid;
38145d4f98a2SYan Zheng int err = 0;
38155d4f98a2SYan Zheng
3816aeb935a4SQu Wenruo root = btrfs_grab_root(fs_info->data_reloc_root);
3817a22285a6SYan, Zheng trans = btrfs_start_transaction(root, 6);
381876deacf0SJosef Bacik if (IS_ERR(trans)) {
381900246528SJosef Bacik btrfs_put_root(root);
38203fd0a558SYan, Zheng return ERR_CAST(trans);
382176deacf0SJosef Bacik }
38225d4f98a2SYan Zheng
3823543068a2SNikolay Borisov err = btrfs_get_free_objectid(root, &objectid);
38245d4f98a2SYan Zheng if (err)
38255d4f98a2SYan Zheng goto out;
38265d4f98a2SYan Zheng
38270257bb82SYan, Zheng err = __insert_orphan_inode(trans, root, objectid);
3828790c1b8cSJosef Bacik if (err)
3829790c1b8cSJosef Bacik goto out;
38305d4f98a2SYan Zheng
38310202e83fSDavid Sterba inode = btrfs_iget(fs_info->sb, objectid, root);
3832790c1b8cSJosef Bacik if (IS_ERR(inode)) {
3833790c1b8cSJosef Bacik delete_orphan_inode(trans, root, objectid);
3834790c1b8cSJosef Bacik err = PTR_ERR(inode);
3835790c1b8cSJosef Bacik inode = NULL;
3836790c1b8cSJosef Bacik goto out;
3837790c1b8cSJosef Bacik }
3838b3470b5dSDavid Sterba BTRFS_I(inode)->index_cnt = group->start;
38395d4f98a2SYan Zheng
384073f2e545SNikolay Borisov err = btrfs_orphan_add(trans, BTRFS_I(inode));
38415d4f98a2SYan Zheng out:
384200246528SJosef Bacik btrfs_put_root(root);
38433a45bb20SJeff Mahoney btrfs_end_transaction(trans);
38442ff7e61eSJeff Mahoney btrfs_btree_balance_dirty(fs_info);
38455d4f98a2SYan Zheng if (err) {
38465d4f98a2SYan Zheng iput(inode);
38475d4f98a2SYan Zheng inode = ERR_PTR(err);
38485d4f98a2SYan Zheng }
38495d4f98a2SYan Zheng return inode;
38505d4f98a2SYan Zheng }
38515d4f98a2SYan Zheng
3852907d2710SDavid Sterba /*
3853907d2710SDavid Sterba * Mark start of chunk relocation that is cancellable. Check if the cancellation
3854907d2710SDavid Sterba * has been requested meanwhile and don't start in that case.
3855907d2710SDavid Sterba *
3856907d2710SDavid Sterba * Return:
3857907d2710SDavid Sterba * 0 success
3858907d2710SDavid Sterba * -EINPROGRESS operation is already in progress, that's probably a bug
3859907d2710SDavid Sterba * -ECANCELED cancellation request was set before the operation started
3860907d2710SDavid Sterba */
reloc_chunk_start(struct btrfs_fs_info * fs_info)3861907d2710SDavid Sterba static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
3862907d2710SDavid Sterba {
3863907d2710SDavid Sterba if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
3864907d2710SDavid Sterba /* This should not happen */
3865907d2710SDavid Sterba btrfs_err(fs_info, "reloc already running, cannot start");
3866907d2710SDavid Sterba return -EINPROGRESS;
3867907d2710SDavid Sterba }
3868907d2710SDavid Sterba
3869907d2710SDavid Sterba if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
3870907d2710SDavid Sterba btrfs_info(fs_info, "chunk relocation canceled on start");
3871907d2710SDavid Sterba /*
3872907d2710SDavid Sterba * On cancel, clear all requests but let the caller mark
3873907d2710SDavid Sterba * the end after cleanup operations.
3874907d2710SDavid Sterba */
3875907d2710SDavid Sterba atomic_set(&fs_info->reloc_cancel_req, 0);
3876907d2710SDavid Sterba return -ECANCELED;
3877907d2710SDavid Sterba }
3878907d2710SDavid Sterba return 0;
3879907d2710SDavid Sterba }
3880907d2710SDavid Sterba
3881907d2710SDavid Sterba /*
3882907d2710SDavid Sterba * Mark end of chunk relocation that is cancellable and wake any waiters.
3883907d2710SDavid Sterba */
reloc_chunk_end(struct btrfs_fs_info * fs_info)3884907d2710SDavid Sterba static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
3885907d2710SDavid Sterba {
3886907d2710SDavid Sterba /* Requested after start, clear bit first so any waiters can continue */
3887907d2710SDavid Sterba if (atomic_read(&fs_info->reloc_cancel_req) > 0)
3888907d2710SDavid Sterba btrfs_info(fs_info, "chunk relocation canceled during operation");
3889907d2710SDavid Sterba clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
3890907d2710SDavid Sterba atomic_set(&fs_info->reloc_cancel_req, 0);
3891907d2710SDavid Sterba }
3892907d2710SDavid Sterba
alloc_reloc_control(struct btrfs_fs_info * fs_info)3893c258d6e3SQu Wenruo static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
38943fd0a558SYan, Zheng {
38953fd0a558SYan, Zheng struct reloc_control *rc;
38963fd0a558SYan, Zheng
38973fd0a558SYan, Zheng rc = kzalloc(sizeof(*rc), GFP_NOFS);
38983fd0a558SYan, Zheng if (!rc)
38993fd0a558SYan, Zheng return NULL;
39003fd0a558SYan, Zheng
39013fd0a558SYan, Zheng INIT_LIST_HEAD(&rc->reloc_roots);
3902d2311e69SQu Wenruo INIT_LIST_HEAD(&rc->dirty_subvol_roots);
3903584fb121SQu Wenruo btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
39043fd0a558SYan, Zheng mapping_tree_init(&rc->reloc_root_tree);
390535da5a7eSDavid Sterba extent_io_tree_init(fs_info, &rc->processed_blocks, IO_TREE_RELOC_BLOCKS);
39063fd0a558SYan, Zheng return rc;
39073fd0a558SYan, Zheng }
39083fd0a558SYan, Zheng
free_reloc_control(struct reloc_control * rc)39091a0afa0eSJosef Bacik static void free_reloc_control(struct reloc_control *rc)
39101a0afa0eSJosef Bacik {
39111a0afa0eSJosef Bacik struct mapping_node *node, *tmp;
39121a0afa0eSJosef Bacik
39131a0afa0eSJosef Bacik free_reloc_roots(&rc->reloc_roots);
39141a0afa0eSJosef Bacik rbtree_postorder_for_each_entry_safe(node, tmp,
39151a0afa0eSJosef Bacik &rc->reloc_root_tree.rb_root, rb_node)
39161a0afa0eSJosef Bacik kfree(node);
39171a0afa0eSJosef Bacik
39181a0afa0eSJosef Bacik kfree(rc);
39191a0afa0eSJosef Bacik }
39201a0afa0eSJosef Bacik
39215d4f98a2SYan Zheng /*
3922ebce0e01SAdam Borowski * Print the block group being relocated
3923ebce0e01SAdam Borowski */
describe_relocation(struct btrfs_fs_info * fs_info,struct btrfs_block_group * block_group)3924ebce0e01SAdam Borowski static void describe_relocation(struct btrfs_fs_info *fs_info,
392532da5386SDavid Sterba struct btrfs_block_group *block_group)
3926ebce0e01SAdam Borowski {
3927f89e09cfSAnand Jain char buf[128] = {'\0'};
3928ebce0e01SAdam Borowski
3929f89e09cfSAnand Jain btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
3930ebce0e01SAdam Borowski
3931ebce0e01SAdam Borowski btrfs_info(fs_info,
3932ebce0e01SAdam Borowski "relocating block group %llu flags %s",
3933b3470b5dSDavid Sterba block_group->start, buf);
3934ebce0e01SAdam Borowski }
3935ebce0e01SAdam Borowski
stage_to_string(int stage)3936430640e3SQu Wenruo static const char *stage_to_string(int stage)
3937430640e3SQu Wenruo {
3938430640e3SQu Wenruo if (stage == MOVE_DATA_EXTENTS)
3939430640e3SQu Wenruo return "move data extents";
3940430640e3SQu Wenruo if (stage == UPDATE_DATA_PTRS)
3941430640e3SQu Wenruo return "update data pointers";
3942430640e3SQu Wenruo return "unknown";
3943430640e3SQu Wenruo }
3944430640e3SQu Wenruo
3945ebce0e01SAdam Borowski /*
39465d4f98a2SYan Zheng * function to relocate all extents in a block group.
39475d4f98a2SYan Zheng */
btrfs_relocate_block_group(struct btrfs_fs_info * fs_info,u64 group_start)39486bccf3abSJeff Mahoney int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
39495d4f98a2SYan Zheng {
395032da5386SDavid Sterba struct btrfs_block_group *bg;
395129cbcf40SJosef Bacik struct btrfs_root *extent_root = btrfs_extent_root(fs_info, group_start);
39525d4f98a2SYan Zheng struct reloc_control *rc;
39530af3d00bSJosef Bacik struct inode *inode;
39540af3d00bSJosef Bacik struct btrfs_path *path;
39555d4f98a2SYan Zheng int ret;
3956f0486c68SYan, Zheng int rw = 0;
39575d4f98a2SYan Zheng int err = 0;
39585d4f98a2SYan Zheng
3959b4be6aefSJosef Bacik /*
3960b4be6aefSJosef Bacik * This only gets set if we had a half-deleted snapshot on mount. We
3961b4be6aefSJosef Bacik * cannot allow relocation to start while we're still trying to clean up
3962b4be6aefSJosef Bacik * these pending deletions.
3963b4be6aefSJosef Bacik */
3964b4be6aefSJosef Bacik ret = wait_on_bit(&fs_info->flags, BTRFS_FS_UNFINISHED_DROPS, TASK_INTERRUPTIBLE);
3965b4be6aefSJosef Bacik if (ret)
3966b4be6aefSJosef Bacik return ret;
3967b4be6aefSJosef Bacik
3968b4be6aefSJosef Bacik /* We may have been woken up by close_ctree, so bail if we're closing. */
3969b4be6aefSJosef Bacik if (btrfs_fs_closing(fs_info))
3970b4be6aefSJosef Bacik return -EINTR;
3971b4be6aefSJosef Bacik
3972eede2bf3SOmar Sandoval bg = btrfs_lookup_block_group(fs_info, group_start);
3973eede2bf3SOmar Sandoval if (!bg)
3974eede2bf3SOmar Sandoval return -ENOENT;
3975eede2bf3SOmar Sandoval
39760320b353SNaohiro Aota /*
39770320b353SNaohiro Aota * Relocation of a data block group creates ordered extents. Without
39780320b353SNaohiro Aota * sb_start_write(), we can freeze the filesystem while unfinished
39790320b353SNaohiro Aota * ordered extents are left. Such ordered extents can cause a deadlock
39800320b353SNaohiro Aota * e.g. when syncfs() is waiting for their completion but they can't
39810320b353SNaohiro Aota * finish because they block when joining a transaction, due to the
39820320b353SNaohiro Aota * fact that the freeze locks are being held in write mode.
39830320b353SNaohiro Aota */
39840320b353SNaohiro Aota if (bg->flags & BTRFS_BLOCK_GROUP_DATA)
39850320b353SNaohiro Aota ASSERT(sb_write_started(fs_info->sb));
39860320b353SNaohiro Aota
3987eede2bf3SOmar Sandoval if (btrfs_pinned_by_swapfile(fs_info, bg)) {
3988eede2bf3SOmar Sandoval btrfs_put_block_group(bg);
3989eede2bf3SOmar Sandoval return -ETXTBSY;
3990eede2bf3SOmar Sandoval }
3991eede2bf3SOmar Sandoval
3992c258d6e3SQu Wenruo rc = alloc_reloc_control(fs_info);
3993eede2bf3SOmar Sandoval if (!rc) {
3994eede2bf3SOmar Sandoval btrfs_put_block_group(bg);
39955d4f98a2SYan Zheng return -ENOMEM;
3996eede2bf3SOmar Sandoval }
39975d4f98a2SYan Zheng
3998907d2710SDavid Sterba ret = reloc_chunk_start(fs_info);
3999907d2710SDavid Sterba if (ret < 0) {
4000907d2710SDavid Sterba err = ret;
4001907d2710SDavid Sterba goto out_put_bg;
4002907d2710SDavid Sterba }
4003907d2710SDavid Sterba
4004f0486c68SYan, Zheng rc->extent_root = extent_root;
4005eede2bf3SOmar Sandoval rc->block_group = bg;
40065d4f98a2SYan Zheng
4007b12de528SQu Wenruo ret = btrfs_inc_block_group_ro(rc->block_group, true);
4008f0486c68SYan, Zheng if (ret) {
4009f0486c68SYan, Zheng err = ret;
4010f0486c68SYan, Zheng goto out;
4011f0486c68SYan, Zheng }
4012f0486c68SYan, Zheng rw = 1;
4013f0486c68SYan, Zheng
40140af3d00bSJosef Bacik path = btrfs_alloc_path();
40150af3d00bSJosef Bacik if (!path) {
40160af3d00bSJosef Bacik err = -ENOMEM;
40170af3d00bSJosef Bacik goto out;
40180af3d00bSJosef Bacik }
40190af3d00bSJosef Bacik
40207949f339SDavid Sterba inode = lookup_free_space_inode(rc->block_group, path);
40210af3d00bSJosef Bacik btrfs_free_path(path);
40220af3d00bSJosef Bacik
40230af3d00bSJosef Bacik if (!IS_ERR(inode))
40241bbc621eSChris Mason ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
40250af3d00bSJosef Bacik else
40260af3d00bSJosef Bacik ret = PTR_ERR(inode);
40270af3d00bSJosef Bacik
40280af3d00bSJosef Bacik if (ret && ret != -ENOENT) {
40290af3d00bSJosef Bacik err = ret;
40300af3d00bSJosef Bacik goto out;
40310af3d00bSJosef Bacik }
40320af3d00bSJosef Bacik
40335d4f98a2SYan Zheng rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
40345d4f98a2SYan Zheng if (IS_ERR(rc->data_inode)) {
40355d4f98a2SYan Zheng err = PTR_ERR(rc->data_inode);
40365d4f98a2SYan Zheng rc->data_inode = NULL;
40375d4f98a2SYan Zheng goto out;
40385d4f98a2SYan Zheng }
40395d4f98a2SYan Zheng
40400b246afaSJeff Mahoney describe_relocation(fs_info, rc->block_group);
40415d4f98a2SYan Zheng
40429cfa3e34SFilipe Manana btrfs_wait_block_group_reservations(rc->block_group);
4043f78c436cSFilipe Manana btrfs_wait_nocow_writers(rc->block_group);
40446374e57aSChris Mason btrfs_wait_ordered_roots(fs_info, U64_MAX,
4045b3470b5dSDavid Sterba rc->block_group->start,
4046b3470b5dSDavid Sterba rc->block_group->length);
40475d4f98a2SYan Zheng
40487ae9bd18SNaohiro Aota ret = btrfs_zone_finish(rc->block_group);
40497ae9bd18SNaohiro Aota WARN_ON(ret && ret != -EAGAIN);
40507ae9bd18SNaohiro Aota
40515d4f98a2SYan Zheng while (1) {
4052430640e3SQu Wenruo int finishes_stage;
4053430640e3SQu Wenruo
405476dda93cSYan, Zheng mutex_lock(&fs_info->cleaner_mutex);
40555d4f98a2SYan Zheng ret = relocate_block_group(rc);
405676dda93cSYan, Zheng mutex_unlock(&fs_info->cleaner_mutex);
4057ff612ba7SJosef Bacik if (ret < 0)
40585d4f98a2SYan Zheng err = ret;
4059ff612ba7SJosef Bacik
4060430640e3SQu Wenruo finishes_stage = rc->stage;
4061ff612ba7SJosef Bacik /*
4062ff612ba7SJosef Bacik * We may have gotten ENOSPC after we already dirtied some
4063ff612ba7SJosef Bacik * extents. If writeout happens while we're relocating a
4064ff612ba7SJosef Bacik * different block group we could end up hitting the
4065ff612ba7SJosef Bacik * BUG_ON(rc->stage == UPDATE_DATA_PTRS) in
4066ff612ba7SJosef Bacik * btrfs_reloc_cow_block. Make sure we write everything out
4067ff612ba7SJosef Bacik * properly so we don't trip over this problem, and then break
4068ff612ba7SJosef Bacik * out of the loop if we hit an error.
4069ff612ba7SJosef Bacik */
4070ff612ba7SJosef Bacik if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
4071ff612ba7SJosef Bacik ret = btrfs_wait_ordered_range(rc->data_inode, 0,
4072ff612ba7SJosef Bacik (u64)-1);
4073ff612ba7SJosef Bacik if (ret)
4074ff612ba7SJosef Bacik err = ret;
4075ff612ba7SJosef Bacik invalidate_mapping_pages(rc->data_inode->i_mapping,
4076ff612ba7SJosef Bacik 0, -1);
4077ff612ba7SJosef Bacik rc->stage = UPDATE_DATA_PTRS;
40785d4f98a2SYan Zheng }
40795d4f98a2SYan Zheng
4080ff612ba7SJosef Bacik if (err < 0)
4081ff612ba7SJosef Bacik goto out;
4082ff612ba7SJosef Bacik
40835d4f98a2SYan Zheng if (rc->extents_found == 0)
40845d4f98a2SYan Zheng break;
40855d4f98a2SYan Zheng
4086430640e3SQu Wenruo btrfs_info(fs_info, "found %llu extents, stage: %s",
4087430640e3SQu Wenruo rc->extents_found, stage_to_string(finishes_stage));
40885d4f98a2SYan Zheng }
40895d4f98a2SYan Zheng
40905d4f98a2SYan Zheng WARN_ON(rc->block_group->pinned > 0);
40915d4f98a2SYan Zheng WARN_ON(rc->block_group->reserved > 0);
4092bf38be65SDavid Sterba WARN_ON(rc->block_group->used > 0);
40935d4f98a2SYan Zheng out:
4094f0486c68SYan, Zheng if (err && rw)
40952ff7e61eSJeff Mahoney btrfs_dec_block_group_ro(rc->block_group);
40965d4f98a2SYan Zheng iput(rc->data_inode);
4097907d2710SDavid Sterba out_put_bg:
4098907d2710SDavid Sterba btrfs_put_block_group(bg);
4099907d2710SDavid Sterba reloc_chunk_end(fs_info);
41001a0afa0eSJosef Bacik free_reloc_control(rc);
41015d4f98a2SYan Zheng return err;
41025d4f98a2SYan Zheng }
41035d4f98a2SYan Zheng
mark_garbage_root(struct btrfs_root * root)410476dda93cSYan, Zheng static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
410576dda93cSYan, Zheng {
41060b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
410776dda93cSYan, Zheng struct btrfs_trans_handle *trans;
410879787eaaSJeff Mahoney int ret, err;
410976dda93cSYan, Zheng
41100b246afaSJeff Mahoney trans = btrfs_start_transaction(fs_info->tree_root, 0);
411179787eaaSJeff Mahoney if (IS_ERR(trans))
411279787eaaSJeff Mahoney return PTR_ERR(trans);
411376dda93cSYan, Zheng
411476dda93cSYan, Zheng memset(&root->root_item.drop_progress, 0,
411576dda93cSYan, Zheng sizeof(root->root_item.drop_progress));
4116c8422684SDavid Sterba btrfs_set_root_drop_level(&root->root_item, 0);
411776dda93cSYan, Zheng btrfs_set_root_refs(&root->root_item, 0);
41180b246afaSJeff Mahoney ret = btrfs_update_root(trans, fs_info->tree_root,
411976dda93cSYan, Zheng &root->root_key, &root->root_item);
412076dda93cSYan, Zheng
41213a45bb20SJeff Mahoney err = btrfs_end_transaction(trans);
412279787eaaSJeff Mahoney if (err)
412379787eaaSJeff Mahoney return err;
412479787eaaSJeff Mahoney return ret;
412576dda93cSYan, Zheng }
412676dda93cSYan, Zheng
41275d4f98a2SYan Zheng /*
41285d4f98a2SYan Zheng * recover relocation interrupted by system crash.
41295d4f98a2SYan Zheng *
41305d4f98a2SYan Zheng * this function resumes merging reloc trees with corresponding fs trees.
41315d4f98a2SYan Zheng * this is important for keeping the sharing of tree blocks
41325d4f98a2SYan Zheng */
btrfs_recover_relocation(struct btrfs_fs_info * fs_info)41337eefae6bSJosef Bacik int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
41345d4f98a2SYan Zheng {
41355d4f98a2SYan Zheng LIST_HEAD(reloc_roots);
41365d4f98a2SYan Zheng struct btrfs_key key;
41375d4f98a2SYan Zheng struct btrfs_root *fs_root;
41385d4f98a2SYan Zheng struct btrfs_root *reloc_root;
41395d4f98a2SYan Zheng struct btrfs_path *path;
41405d4f98a2SYan Zheng struct extent_buffer *leaf;
41415d4f98a2SYan Zheng struct reloc_control *rc = NULL;
41425d4f98a2SYan Zheng struct btrfs_trans_handle *trans;
41435d4f98a2SYan Zheng int ret;
41445d4f98a2SYan Zheng int err = 0;
41455d4f98a2SYan Zheng
41465d4f98a2SYan Zheng path = btrfs_alloc_path();
41475d4f98a2SYan Zheng if (!path)
41485d4f98a2SYan Zheng return -ENOMEM;
4149e4058b54SDavid Sterba path->reada = READA_BACK;
41505d4f98a2SYan Zheng
41515d4f98a2SYan Zheng key.objectid = BTRFS_TREE_RELOC_OBJECTID;
41525d4f98a2SYan Zheng key.type = BTRFS_ROOT_ITEM_KEY;
41535d4f98a2SYan Zheng key.offset = (u64)-1;
41545d4f98a2SYan Zheng
41555d4f98a2SYan Zheng while (1) {
41560b246afaSJeff Mahoney ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
41575d4f98a2SYan Zheng path, 0, 0);
41585d4f98a2SYan Zheng if (ret < 0) {
41595d4f98a2SYan Zheng err = ret;
41605d4f98a2SYan Zheng goto out;
41615d4f98a2SYan Zheng }
41625d4f98a2SYan Zheng if (ret > 0) {
41635d4f98a2SYan Zheng if (path->slots[0] == 0)
41645d4f98a2SYan Zheng break;
41655d4f98a2SYan Zheng path->slots[0]--;
41665d4f98a2SYan Zheng }
41675d4f98a2SYan Zheng leaf = path->nodes[0];
41685d4f98a2SYan Zheng btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4169b3b4aa74SDavid Sterba btrfs_release_path(path);
41705d4f98a2SYan Zheng
41715d4f98a2SYan Zheng if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
41725d4f98a2SYan Zheng key.type != BTRFS_ROOT_ITEM_KEY)
41735d4f98a2SYan Zheng break;
41745d4f98a2SYan Zheng
41757eefae6bSJosef Bacik reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
41765d4f98a2SYan Zheng if (IS_ERR(reloc_root)) {
41775d4f98a2SYan Zheng err = PTR_ERR(reloc_root);
41785d4f98a2SYan Zheng goto out;
41795d4f98a2SYan Zheng }
41805d4f98a2SYan Zheng
418192a7cc42SQu Wenruo set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
41825d4f98a2SYan Zheng list_add(&reloc_root->root_list, &reloc_roots);
41835d4f98a2SYan Zheng
41845d4f98a2SYan Zheng if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4185a820feb5SDavid Sterba fs_root = btrfs_get_fs_root(fs_info,
4186a820feb5SDavid Sterba reloc_root->root_key.offset, false);
41875d4f98a2SYan Zheng if (IS_ERR(fs_root)) {
418876dda93cSYan, Zheng ret = PTR_ERR(fs_root);
418976dda93cSYan, Zheng if (ret != -ENOENT) {
419076dda93cSYan, Zheng err = ret;
41915d4f98a2SYan Zheng goto out;
41925d4f98a2SYan Zheng }
419379787eaaSJeff Mahoney ret = mark_garbage_root(reloc_root);
419479787eaaSJeff Mahoney if (ret < 0) {
419579787eaaSJeff Mahoney err = ret;
419679787eaaSJeff Mahoney goto out;
419779787eaaSJeff Mahoney }
4198932fd26dSJosef Bacik } else {
419900246528SJosef Bacik btrfs_put_root(fs_root);
420076dda93cSYan, Zheng }
42015d4f98a2SYan Zheng }
42025d4f98a2SYan Zheng
42035d4f98a2SYan Zheng if (key.offset == 0)
42045d4f98a2SYan Zheng break;
42055d4f98a2SYan Zheng
42065d4f98a2SYan Zheng key.offset--;
42075d4f98a2SYan Zheng }
4208b3b4aa74SDavid Sterba btrfs_release_path(path);
42095d4f98a2SYan Zheng
42105d4f98a2SYan Zheng if (list_empty(&reloc_roots))
42115d4f98a2SYan Zheng goto out;
42125d4f98a2SYan Zheng
4213c258d6e3SQu Wenruo rc = alloc_reloc_control(fs_info);
42145d4f98a2SYan Zheng if (!rc) {
42155d4f98a2SYan Zheng err = -ENOMEM;
42165d4f98a2SYan Zheng goto out;
42175d4f98a2SYan Zheng }
42185d4f98a2SYan Zheng
4219907d2710SDavid Sterba ret = reloc_chunk_start(fs_info);
4220907d2710SDavid Sterba if (ret < 0) {
4221907d2710SDavid Sterba err = ret;
4222907d2710SDavid Sterba goto out_end;
4223907d2710SDavid Sterba }
4224907d2710SDavid Sterba
422529cbcf40SJosef Bacik rc->extent_root = btrfs_extent_root(fs_info, 0);
42265d4f98a2SYan Zheng
42275d4f98a2SYan Zheng set_reloc_control(rc);
42285d4f98a2SYan Zheng
42297a7eaa40SJosef Bacik trans = btrfs_join_transaction(rc->extent_root);
42303612b495STsutomu Itoh if (IS_ERR(trans)) {
42313612b495STsutomu Itoh err = PTR_ERR(trans);
4232fb2d83eeSJosef Bacik goto out_unset;
42333612b495STsutomu Itoh }
42343fd0a558SYan, Zheng
42353fd0a558SYan, Zheng rc->merge_reloc_tree = 1;
42363fd0a558SYan, Zheng
42375d4f98a2SYan Zheng while (!list_empty(&reloc_roots)) {
42385d4f98a2SYan Zheng reloc_root = list_entry(reloc_roots.next,
42395d4f98a2SYan Zheng struct btrfs_root, root_list);
42405d4f98a2SYan Zheng list_del(&reloc_root->root_list);
42415d4f98a2SYan Zheng
42425d4f98a2SYan Zheng if (btrfs_root_refs(&reloc_root->root_item) == 0) {
42435d4f98a2SYan Zheng list_add_tail(&reloc_root->root_list,
42445d4f98a2SYan Zheng &rc->reloc_roots);
42455d4f98a2SYan Zheng continue;
42465d4f98a2SYan Zheng }
42475d4f98a2SYan Zheng
4248a820feb5SDavid Sterba fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
4249a820feb5SDavid Sterba false);
425079787eaaSJeff Mahoney if (IS_ERR(fs_root)) {
425179787eaaSJeff Mahoney err = PTR_ERR(fs_root);
4252ca1aa281SJosef Bacik list_add_tail(&reloc_root->root_list, &reloc_roots);
42531402d17dSXiyu Yang btrfs_end_transaction(trans);
4254fb2d83eeSJosef Bacik goto out_unset;
425579787eaaSJeff Mahoney }
42565d4f98a2SYan Zheng
4257ffd7b339SJeff Mahoney err = __add_reloc_root(reloc_root);
425857a304cfSJosef Bacik ASSERT(err != -EEXIST);
42593c925863SJosef Bacik if (err) {
42603c925863SJosef Bacik list_add_tail(&reloc_root->root_list, &reloc_roots);
42613c925863SJosef Bacik btrfs_put_root(fs_root);
42623c925863SJosef Bacik btrfs_end_transaction(trans);
42633c925863SJosef Bacik goto out_unset;
42643c925863SJosef Bacik }
4265f44deb74SJosef Bacik fs_root->reloc_root = btrfs_grab_root(reloc_root);
426600246528SJosef Bacik btrfs_put_root(fs_root);
42675d4f98a2SYan Zheng }
42685d4f98a2SYan Zheng
42693a45bb20SJeff Mahoney err = btrfs_commit_transaction(trans);
427079787eaaSJeff Mahoney if (err)
4271fb2d83eeSJosef Bacik goto out_unset;
42725d4f98a2SYan Zheng
42735d4f98a2SYan Zheng merge_reloc_roots(rc);
42745d4f98a2SYan Zheng
42755d4f98a2SYan Zheng unset_reloc_control(rc);
42765d4f98a2SYan Zheng
42777a7eaa40SJosef Bacik trans = btrfs_join_transaction(rc->extent_root);
427862b99540SQu Wenruo if (IS_ERR(trans)) {
42793612b495STsutomu Itoh err = PTR_ERR(trans);
42806217b0faSJosef Bacik goto out_clean;
428162b99540SQu Wenruo }
42823a45bb20SJeff Mahoney err = btrfs_commit_transaction(trans);
42836217b0faSJosef Bacik out_clean:
4284d2311e69SQu Wenruo ret = clean_dirty_subvols(rc);
4285d2311e69SQu Wenruo if (ret < 0 && !err)
4286d2311e69SQu Wenruo err = ret;
4287fb2d83eeSJosef Bacik out_unset:
4288fb2d83eeSJosef Bacik unset_reloc_control(rc);
4289907d2710SDavid Sterba out_end:
4290907d2710SDavid Sterba reloc_chunk_end(fs_info);
42911a0afa0eSJosef Bacik free_reloc_control(rc);
42923612b495STsutomu Itoh out:
4293aca1bba6SLiu Bo free_reloc_roots(&reloc_roots);
4294aca1bba6SLiu Bo
42955d4f98a2SYan Zheng btrfs_free_path(path);
42965d4f98a2SYan Zheng
42975d4f98a2SYan Zheng if (err == 0) {
42985d4f98a2SYan Zheng /* cleanup orphan inode in data relocation tree */
4299aeb935a4SQu Wenruo fs_root = btrfs_grab_root(fs_info->data_reloc_root);
4300aeb935a4SQu Wenruo ASSERT(fs_root);
430166b4ffd1SJosef Bacik err = btrfs_orphan_cleanup(fs_root);
430200246528SJosef Bacik btrfs_put_root(fs_root);
4303932fd26dSJosef Bacik }
43045d4f98a2SYan Zheng return err;
43055d4f98a2SYan Zheng }
43065d4f98a2SYan Zheng
43075d4f98a2SYan Zheng /*
43085d4f98a2SYan Zheng * helper to add ordered checksum for data relocation.
43095d4f98a2SYan Zheng *
43105d4f98a2SYan Zheng * cloning checksum properly handles the nodatasum extents.
43115d4f98a2SYan Zheng * it also saves CPU time to re-calculate the checksum.
43125d4f98a2SYan Zheng */
btrfs_reloc_clone_csums(struct btrfs_ordered_extent * ordered)431334bfaf15SChristoph Hellwig int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered)
43145d4f98a2SYan Zheng {
431534bfaf15SChristoph Hellwig struct btrfs_inode *inode = BTRFS_I(ordered->inode);
43167bfa9535SNikolay Borisov struct btrfs_fs_info *fs_info = inode->root->fs_info;
431734bfaf15SChristoph Hellwig u64 disk_bytenr = ordered->file_offset + inode->index_cnt;
431834bfaf15SChristoph Hellwig struct btrfs_root *csum_root = btrfs_csum_root(fs_info, disk_bytenr);
43195d4f98a2SYan Zheng LIST_HEAD(list);
432034bfaf15SChristoph Hellwig int ret;
43215d4f98a2SYan Zheng
432297e38239SQu Wenruo ret = btrfs_lookup_csums_list(csum_root, disk_bytenr,
432334bfaf15SChristoph Hellwig disk_bytenr + ordered->num_bytes - 1,
432434bfaf15SChristoph Hellwig &list, 0, false);
432579787eaaSJeff Mahoney if (ret)
432634bfaf15SChristoph Hellwig return ret;
43275d4f98a2SYan Zheng
43285d4f98a2SYan Zheng while (!list_empty(&list)) {
432934bfaf15SChristoph Hellwig struct btrfs_ordered_sum *sums =
433034bfaf15SChristoph Hellwig list_entry(list.next, struct btrfs_ordered_sum, list);
433134bfaf15SChristoph Hellwig
43325d4f98a2SYan Zheng list_del_init(&sums->list);
43335d4f98a2SYan Zheng
43344577b014SJosef Bacik /*
43354577b014SJosef Bacik * We need to offset the new_bytenr based on where the csum is.
43364577b014SJosef Bacik * We need to do this because we will read in entire prealloc
43374577b014SJosef Bacik * extents but we may have written to say the middle of the
43384577b014SJosef Bacik * prealloc extent, so we need to make sure the csum goes with
43394577b014SJosef Bacik * the right disk offset.
43404577b014SJosef Bacik *
43414577b014SJosef Bacik * We can do this because the data reloc inode refers strictly
43424577b014SJosef Bacik * to the on disk bytes, so we don't have to worry about
43434577b014SJosef Bacik * disk_len vs real len like with real inodes since it's all
43444577b014SJosef Bacik * disk length.
43454577b014SJosef Bacik */
434634bfaf15SChristoph Hellwig sums->logical = ordered->disk_bytenr + sums->logical - disk_bytenr;
4347f9756261SNikolay Borisov btrfs_add_ordered_sum(ordered, sums);
43485d4f98a2SYan Zheng }
434934bfaf15SChristoph Hellwig
435034bfaf15SChristoph Hellwig return 0;
43515d4f98a2SYan Zheng }
43523fd0a558SYan, Zheng
btrfs_reloc_cow_block(struct btrfs_trans_handle * trans,struct btrfs_root * root,const struct extent_buffer * buf,struct extent_buffer * cow)435383d4cfd4SJosef Bacik int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
43544dc6ea8bSDavid Sterba struct btrfs_root *root,
43554dc6ea8bSDavid Sterba const struct extent_buffer *buf,
43563fd0a558SYan, Zheng struct extent_buffer *cow)
43573fd0a558SYan, Zheng {
43580b246afaSJeff Mahoney struct btrfs_fs_info *fs_info = root->fs_info;
43593fd0a558SYan, Zheng struct reloc_control *rc;
4360a26195a5SQu Wenruo struct btrfs_backref_node *node;
43613fd0a558SYan, Zheng int first_cow = 0;
43623fd0a558SYan, Zheng int level;
436383d4cfd4SJosef Bacik int ret = 0;
43643fd0a558SYan, Zheng
43650b246afaSJeff Mahoney rc = fs_info->reloc_ctl;
43663fd0a558SYan, Zheng if (!rc)
436783d4cfd4SJosef Bacik return 0;
43683fd0a558SYan, Zheng
436937f00a6dSJohannes Thumshirn BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root));
43703fd0a558SYan, Zheng
43713fd0a558SYan, Zheng level = btrfs_header_level(buf);
43723fd0a558SYan, Zheng if (btrfs_header_generation(buf) <=
43733fd0a558SYan, Zheng btrfs_root_last_snapshot(&root->root_item))
43743fd0a558SYan, Zheng first_cow = 1;
43753fd0a558SYan, Zheng
43763fd0a558SYan, Zheng if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
43773fd0a558SYan, Zheng rc->create_reloc_tree) {
43783fd0a558SYan, Zheng WARN_ON(!first_cow && level == 0);
43793fd0a558SYan, Zheng
43803fd0a558SYan, Zheng node = rc->backref_cache.path[level];
4381*3c884b6bSJosef Bacik
4382*3c884b6bSJosef Bacik /*
4383*3c884b6bSJosef Bacik * If node->bytenr != buf->start and node->new_bytenr !=
4384*3c884b6bSJosef Bacik * buf->start then we've got the wrong backref node for what we
4385*3c884b6bSJosef Bacik * expected to see here and the cache is incorrect.
4386*3c884b6bSJosef Bacik */
4387*3c884b6bSJosef Bacik if (unlikely(node->bytenr != buf->start && node->new_bytenr != buf->start)) {
4388*3c884b6bSJosef Bacik btrfs_err(fs_info,
4389*3c884b6bSJosef Bacik "bytenr %llu was found but our backref cache was expecting %llu or %llu",
4390*3c884b6bSJosef Bacik buf->start, node->bytenr, node->new_bytenr);
4391*3c884b6bSJosef Bacik return -EUCLEAN;
4392*3c884b6bSJosef Bacik }
43933fd0a558SYan, Zheng
4394b0fe7078SQu Wenruo btrfs_backref_drop_node_buffer(node);
439567439dadSDavid Sterba atomic_inc(&cow->refs);
43963fd0a558SYan, Zheng node->eb = cow;
43973fd0a558SYan, Zheng node->new_bytenr = cow->start;
43983fd0a558SYan, Zheng
43993fd0a558SYan, Zheng if (!node->pending) {
44003fd0a558SYan, Zheng list_move_tail(&node->list,
44013fd0a558SYan, Zheng &rc->backref_cache.pending[level]);
44023fd0a558SYan, Zheng node->pending = 1;
44033fd0a558SYan, Zheng }
44043fd0a558SYan, Zheng
44053fd0a558SYan, Zheng if (first_cow)
44069569cc20SQu Wenruo mark_block_processed(rc, node);
44073fd0a558SYan, Zheng
44083fd0a558SYan, Zheng if (first_cow && level > 0)
44093fd0a558SYan, Zheng rc->nodes_relocated += buf->len;
44103fd0a558SYan, Zheng }
44113fd0a558SYan, Zheng
441283d4cfd4SJosef Bacik if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
44133fd0a558SYan, Zheng ret = replace_file_extents(trans, rc, root, cow);
441483d4cfd4SJosef Bacik return ret;
44153fd0a558SYan, Zheng }
44163fd0a558SYan, Zheng
44173fd0a558SYan, Zheng /*
44183fd0a558SYan, Zheng * called before creating snapshot. it calculates metadata reservation
441901327610SNicholas D Steeves * required for relocating tree blocks in the snapshot
44203fd0a558SYan, Zheng */
btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot * pending,u64 * bytes_to_reserve)4421147d256eSZhaolei void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
44223fd0a558SYan, Zheng u64 *bytes_to_reserve)
44233fd0a558SYan, Zheng {
442410995c04SQu Wenruo struct btrfs_root *root = pending->root;
442510995c04SQu Wenruo struct reloc_control *rc = root->fs_info->reloc_ctl;
44263fd0a558SYan, Zheng
44276282675eSQu Wenruo if (!rc || !have_reloc_root(root))
44283fd0a558SYan, Zheng return;
44293fd0a558SYan, Zheng
44303fd0a558SYan, Zheng if (!rc->merge_reloc_tree)
44313fd0a558SYan, Zheng return;
44323fd0a558SYan, Zheng
44333fd0a558SYan, Zheng root = root->reloc_root;
44343fd0a558SYan, Zheng BUG_ON(btrfs_root_refs(&root->root_item) == 0);
44353fd0a558SYan, Zheng /*
44363fd0a558SYan, Zheng * relocation is in the stage of merging trees. the space
44373fd0a558SYan, Zheng * used by merging a reloc tree is twice the size of
44383fd0a558SYan, Zheng * relocated tree nodes in the worst case. half for cowing
44393fd0a558SYan, Zheng * the reloc tree, half for cowing the fs tree. the space
44403fd0a558SYan, Zheng * used by cowing the reloc tree will be freed after the
44413fd0a558SYan, Zheng * tree is dropped. if we create snapshot, cowing the fs
44423fd0a558SYan, Zheng * tree may use more space than it frees. so we need
44433fd0a558SYan, Zheng * reserve extra space.
44443fd0a558SYan, Zheng */
44453fd0a558SYan, Zheng *bytes_to_reserve += rc->nodes_relocated;
44463fd0a558SYan, Zheng }
44473fd0a558SYan, Zheng
44483fd0a558SYan, Zheng /*
44493fd0a558SYan, Zheng * called after snapshot is created. migrate block reservation
44503fd0a558SYan, Zheng * and create reloc root for the newly created snapshot
4451f44deb74SJosef Bacik *
4452f44deb74SJosef Bacik * This is similar to btrfs_init_reloc_root(), we come out of here with two
4453f44deb74SJosef Bacik * references held on the reloc_root, one for root->reloc_root and one for
4454f44deb74SJosef Bacik * rc->reloc_roots.
44553fd0a558SYan, Zheng */
btrfs_reloc_post_snapshot(struct btrfs_trans_handle * trans,struct btrfs_pending_snapshot * pending)445649b25e05SJeff Mahoney int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
44573fd0a558SYan, Zheng struct btrfs_pending_snapshot *pending)
44583fd0a558SYan, Zheng {
44593fd0a558SYan, Zheng struct btrfs_root *root = pending->root;
44603fd0a558SYan, Zheng struct btrfs_root *reloc_root;
44613fd0a558SYan, Zheng struct btrfs_root *new_root;
446210995c04SQu Wenruo struct reloc_control *rc = root->fs_info->reloc_ctl;
44633fd0a558SYan, Zheng int ret;
44643fd0a558SYan, Zheng
44656282675eSQu Wenruo if (!rc || !have_reloc_root(root))
446649b25e05SJeff Mahoney return 0;
44673fd0a558SYan, Zheng
44683fd0a558SYan, Zheng rc = root->fs_info->reloc_ctl;
44693fd0a558SYan, Zheng rc->merging_rsv_size += rc->nodes_relocated;
44703fd0a558SYan, Zheng
44713fd0a558SYan, Zheng if (rc->merge_reloc_tree) {
44723fd0a558SYan, Zheng ret = btrfs_block_rsv_migrate(&pending->block_rsv,
44733fd0a558SYan, Zheng rc->block_rsv,
44743a584174SLu Fengqi rc->nodes_relocated, true);
447549b25e05SJeff Mahoney if (ret)
447649b25e05SJeff Mahoney return ret;
44773fd0a558SYan, Zheng }
44783fd0a558SYan, Zheng
44793fd0a558SYan, Zheng new_root = pending->snap;
44803fd0a558SYan, Zheng reloc_root = create_reloc_root(trans, root->reloc_root,
44813fd0a558SYan, Zheng new_root->root_key.objectid);
448249b25e05SJeff Mahoney if (IS_ERR(reloc_root))
448349b25e05SJeff Mahoney return PTR_ERR(reloc_root);
44843fd0a558SYan, Zheng
4485ffd7b339SJeff Mahoney ret = __add_reloc_root(reloc_root);
448657a304cfSJosef Bacik ASSERT(ret != -EEXIST);
44873c925863SJosef Bacik if (ret) {
44883c925863SJosef Bacik /* Pairs with create_reloc_root */
44893c925863SJosef Bacik btrfs_put_root(reloc_root);
44903c925863SJosef Bacik return ret;
44913c925863SJosef Bacik }
4492f44deb74SJosef Bacik new_root->reloc_root = btrfs_grab_root(reloc_root);
44933fd0a558SYan, Zheng
449449b25e05SJeff Mahoney if (rc->create_reloc_tree)
44953fd0a558SYan, Zheng ret = clone_backref_node(trans, rc, root, reloc_root);
449649b25e05SJeff Mahoney return ret;
44973fd0a558SYan, Zheng }
4498b9a9a850SQu Wenruo
4499b9a9a850SQu Wenruo /*
4500b9a9a850SQu Wenruo * Get the current bytenr for the block group which is being relocated.
4501b9a9a850SQu Wenruo *
4502b9a9a850SQu Wenruo * Return U64_MAX if no running relocation.
4503b9a9a850SQu Wenruo */
btrfs_get_reloc_bg_bytenr(const struct btrfs_fs_info * fs_info)45044dc6ea8bSDavid Sterba u64 btrfs_get_reloc_bg_bytenr(const struct btrfs_fs_info *fs_info)
4505b9a9a850SQu Wenruo {
4506b9a9a850SQu Wenruo u64 logical = U64_MAX;
4507b9a9a850SQu Wenruo
4508b9a9a850SQu Wenruo lockdep_assert_held(&fs_info->reloc_mutex);
4509b9a9a850SQu Wenruo
4510b9a9a850SQu Wenruo if (fs_info->reloc_ctl && fs_info->reloc_ctl->block_group)
4511b9a9a850SQu Wenruo logical = fs_info->reloc_ctl->block_group->start;
4512b9a9a850SQu Wenruo return logical;
4513b9a9a850SQu Wenruo }
4514