xref: /openbmc/linux/fs/btrfs/relocation.c (revision 1b60d2ec)
1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0
25d4f98a2SYan Zheng /*
35d4f98a2SYan Zheng  * Copyright (C) 2009 Oracle.  All rights reserved.
45d4f98a2SYan Zheng  */
55d4f98a2SYan Zheng 
65d4f98a2SYan Zheng #include <linux/sched.h>
75d4f98a2SYan Zheng #include <linux/pagemap.h>
85d4f98a2SYan Zheng #include <linux/writeback.h>
95d4f98a2SYan Zheng #include <linux/blkdev.h>
105d4f98a2SYan Zheng #include <linux/rbtree.h>
115a0e3ad6STejun Heo #include <linux/slab.h>
12726a3421SQu Wenruo #include <linux/error-injection.h>
135d4f98a2SYan Zheng #include "ctree.h"
145d4f98a2SYan Zheng #include "disk-io.h"
155d4f98a2SYan Zheng #include "transaction.h"
165d4f98a2SYan Zheng #include "volumes.h"
175d4f98a2SYan Zheng #include "locking.h"
185d4f98a2SYan Zheng #include "btrfs_inode.h"
195d4f98a2SYan Zheng #include "async-thread.h"
200af3d00bSJosef Bacik #include "free-space-cache.h"
21581bb050SLi Zefan #include "inode-map.h"
2262b99540SQu Wenruo #include "qgroup.h"
23cdccee99SLiu Bo #include "print-tree.h"
2486736342SJosef Bacik #include "delalloc-space.h"
25aac0023cSJosef Bacik #include "block-group.h"
2619b546d7SQu Wenruo #include "backref.h"
27e9a28dc5SQu Wenruo #include "misc.h"
285d4f98a2SYan Zheng 
295d4f98a2SYan Zheng /*
300c891389SQu Wenruo  * Relocation overview
310c891389SQu Wenruo  *
320c891389SQu Wenruo  * [What does relocation do]
330c891389SQu Wenruo  *
340c891389SQu Wenruo  * The objective of relocation is to relocate all extents of the target block
350c891389SQu Wenruo  * group to other block groups.
360c891389SQu Wenruo  * This is utilized by resize (shrink only), profile converting, compacting
370c891389SQu Wenruo  * space, or balance routine to spread chunks over devices.
380c891389SQu Wenruo  *
390c891389SQu Wenruo  * 		Before		|		After
400c891389SQu Wenruo  * ------------------------------------------------------------------
410c891389SQu Wenruo  *  BG A: 10 data extents	| BG A: deleted
420c891389SQu Wenruo  *  BG B:  2 data extents	| BG B: 10 data extents (2 old + 8 relocated)
430c891389SQu Wenruo  *  BG C:  1 extents		| BG C:  3 data extents (1 old + 2 relocated)
440c891389SQu Wenruo  *
450c891389SQu Wenruo  * [How does relocation work]
460c891389SQu Wenruo  *
470c891389SQu Wenruo  * 1.   Mark the target block group read-only
480c891389SQu Wenruo  *      New extents won't be allocated from the target block group.
490c891389SQu Wenruo  *
500c891389SQu Wenruo  * 2.1  Record each extent in the target block group
510c891389SQu Wenruo  *      To build a proper map of extents to be relocated.
520c891389SQu Wenruo  *
530c891389SQu Wenruo  * 2.2  Build data reloc tree and reloc trees
540c891389SQu Wenruo  *      Data reloc tree will contain an inode, recording all newly relocated
550c891389SQu Wenruo  *      data extents.
560c891389SQu Wenruo  *      There will be only one data reloc tree for one data block group.
570c891389SQu Wenruo  *
580c891389SQu Wenruo  *      Reloc tree will be a special snapshot of its source tree, containing
590c891389SQu Wenruo  *      relocated tree blocks.
600c891389SQu Wenruo  *      Each tree referring to a tree block in target block group will get its
610c891389SQu Wenruo  *      reloc tree built.
620c891389SQu Wenruo  *
630c891389SQu Wenruo  * 2.3  Swap source tree with its corresponding reloc tree
640c891389SQu Wenruo  *      Each involved tree only refers to new extents after swap.
650c891389SQu Wenruo  *
660c891389SQu Wenruo  * 3.   Cleanup reloc trees and data reloc tree.
670c891389SQu Wenruo  *      As old extents in the target block group are still referenced by reloc
680c891389SQu Wenruo  *      trees, we need to clean them up before really freeing the target block
690c891389SQu Wenruo  *      group.
700c891389SQu Wenruo  *
710c891389SQu Wenruo  * The main complexity is in steps 2.2 and 2.3.
720c891389SQu Wenruo  *
730c891389SQu Wenruo  * The entry point of relocation is relocate_block_group() function.
740c891389SQu Wenruo  */
750c891389SQu Wenruo 
760647bf56SWang Shilong #define RELOCATION_RESERVED_NODES	256
772a979612SQu Wenruo /*
785d4f98a2SYan Zheng  * map address of tree root to tree
795d4f98a2SYan Zheng  */
805d4f98a2SYan Zheng struct mapping_node {
81e9a28dc5SQu Wenruo 	struct {
825d4f98a2SYan Zheng 		struct rb_node rb_node;
835d4f98a2SYan Zheng 		u64 bytenr;
84e9a28dc5SQu Wenruo 	}; /* Use rb_simle_node for search/insert */
855d4f98a2SYan Zheng 	void *data;
865d4f98a2SYan Zheng };
875d4f98a2SYan Zheng 
885d4f98a2SYan Zheng struct mapping_tree {
895d4f98a2SYan Zheng 	struct rb_root rb_root;
905d4f98a2SYan Zheng 	spinlock_t lock;
915d4f98a2SYan Zheng };
925d4f98a2SYan Zheng 
935d4f98a2SYan Zheng /*
945d4f98a2SYan Zheng  * present a tree block to process
955d4f98a2SYan Zheng  */
965d4f98a2SYan Zheng struct tree_block {
97e9a28dc5SQu Wenruo 	struct {
985d4f98a2SYan Zheng 		struct rb_node rb_node;
995d4f98a2SYan Zheng 		u64 bytenr;
100e9a28dc5SQu Wenruo 	}; /* Use rb_simple_node for search/insert */
1015d4f98a2SYan Zheng 	struct btrfs_key key;
1025d4f98a2SYan Zheng 	unsigned int level:8;
1035d4f98a2SYan Zheng 	unsigned int key_ready:1;
1045d4f98a2SYan Zheng };
1055d4f98a2SYan Zheng 
1060257bb82SYan, Zheng #define MAX_EXTENTS 128
1070257bb82SYan, Zheng 
1080257bb82SYan, Zheng struct file_extent_cluster {
1090257bb82SYan, Zheng 	u64 start;
1100257bb82SYan, Zheng 	u64 end;
1110257bb82SYan, Zheng 	u64 boundary[MAX_EXTENTS];
1120257bb82SYan, Zheng 	unsigned int nr;
1130257bb82SYan, Zheng };
1140257bb82SYan, Zheng 
1155d4f98a2SYan Zheng struct reloc_control {
1165d4f98a2SYan Zheng 	/* block group to relocate */
11732da5386SDavid Sterba 	struct btrfs_block_group *block_group;
1185d4f98a2SYan Zheng 	/* extent tree */
1195d4f98a2SYan Zheng 	struct btrfs_root *extent_root;
1205d4f98a2SYan Zheng 	/* inode for moving data */
1215d4f98a2SYan Zheng 	struct inode *data_inode;
1223fd0a558SYan, Zheng 
1233fd0a558SYan, Zheng 	struct btrfs_block_rsv *block_rsv;
1243fd0a558SYan, Zheng 
125a26195a5SQu Wenruo 	struct btrfs_backref_cache backref_cache;
1263fd0a558SYan, Zheng 
1273fd0a558SYan, Zheng 	struct file_extent_cluster cluster;
1285d4f98a2SYan Zheng 	/* tree blocks have been processed */
1295d4f98a2SYan Zheng 	struct extent_io_tree processed_blocks;
1305d4f98a2SYan Zheng 	/* map start of tree root to corresponding reloc tree */
1315d4f98a2SYan Zheng 	struct mapping_tree reloc_root_tree;
1325d4f98a2SYan Zheng 	/* list of reloc trees */
1335d4f98a2SYan Zheng 	struct list_head reloc_roots;
134d2311e69SQu Wenruo 	/* list of subvolume trees that get relocated */
135d2311e69SQu Wenruo 	struct list_head dirty_subvol_roots;
1363fd0a558SYan, Zheng 	/* size of metadata reservation for merging reloc trees */
1373fd0a558SYan, Zheng 	u64 merging_rsv_size;
1383fd0a558SYan, Zheng 	/* size of relocated tree nodes */
1393fd0a558SYan, Zheng 	u64 nodes_relocated;
1400647bf56SWang Shilong 	/* reserved size for block group relocation*/
1410647bf56SWang Shilong 	u64 reserved_bytes;
1423fd0a558SYan, Zheng 
1435d4f98a2SYan Zheng 	u64 search_start;
1445d4f98a2SYan Zheng 	u64 extents_found;
1453fd0a558SYan, Zheng 
1463fd0a558SYan, Zheng 	unsigned int stage:8;
1473fd0a558SYan, Zheng 	unsigned int create_reloc_tree:1;
1483fd0a558SYan, Zheng 	unsigned int merge_reloc_tree:1;
1495d4f98a2SYan Zheng 	unsigned int found_file_extent:1;
1505d4f98a2SYan Zheng };
1515d4f98a2SYan Zheng 
1525d4f98a2SYan Zheng /* stages of data relocation */
1535d4f98a2SYan Zheng #define MOVE_DATA_EXTENTS	0
1545d4f98a2SYan Zheng #define UPDATE_DATA_PTRS	1
1555d4f98a2SYan Zheng 
1569569cc20SQu Wenruo static void mark_block_processed(struct reloc_control *rc,
157a26195a5SQu Wenruo 				 struct btrfs_backref_node *node)
1589569cc20SQu Wenruo {
1599569cc20SQu Wenruo 	u32 blocksize;
1609569cc20SQu Wenruo 
1619569cc20SQu Wenruo 	if (node->level == 0 ||
1629569cc20SQu Wenruo 	    in_range(node->bytenr, rc->block_group->start,
1639569cc20SQu Wenruo 		     rc->block_group->length)) {
1649569cc20SQu Wenruo 		blocksize = rc->extent_root->fs_info->nodesize;
1659569cc20SQu Wenruo 		set_extent_bits(&rc->processed_blocks, node->bytenr,
1669569cc20SQu Wenruo 				node->bytenr + blocksize - 1, EXTENT_DIRTY);
1679569cc20SQu Wenruo 	}
1689569cc20SQu Wenruo 	node->processed = 1;
1699569cc20SQu Wenruo }
1709569cc20SQu Wenruo 
1715d4f98a2SYan Zheng 
1725d4f98a2SYan Zheng static void mapping_tree_init(struct mapping_tree *tree)
1735d4f98a2SYan Zheng {
1746bef4d31SEric Paris 	tree->rb_root = RB_ROOT;
1755d4f98a2SYan Zheng 	spin_lock_init(&tree->lock);
1765d4f98a2SYan Zheng }
1775d4f98a2SYan Zheng 
1785d4f98a2SYan Zheng /*
1795d4f98a2SYan Zheng  * walk up backref nodes until reach node presents tree root
1805d4f98a2SYan Zheng  */
181a26195a5SQu Wenruo static struct btrfs_backref_node *walk_up_backref(
182a26195a5SQu Wenruo 		struct btrfs_backref_node *node,
183a26195a5SQu Wenruo 		struct btrfs_backref_edge *edges[], int *index)
1845d4f98a2SYan Zheng {
185a26195a5SQu Wenruo 	struct btrfs_backref_edge *edge;
1865d4f98a2SYan Zheng 	int idx = *index;
1875d4f98a2SYan Zheng 
1885d4f98a2SYan Zheng 	while (!list_empty(&node->upper)) {
1895d4f98a2SYan Zheng 		edge = list_entry(node->upper.next,
190a26195a5SQu Wenruo 				  struct btrfs_backref_edge, list[LOWER]);
1915d4f98a2SYan Zheng 		edges[idx++] = edge;
1925d4f98a2SYan Zheng 		node = edge->node[UPPER];
1935d4f98a2SYan Zheng 	}
1943fd0a558SYan, Zheng 	BUG_ON(node->detached);
1955d4f98a2SYan Zheng 	*index = idx;
1965d4f98a2SYan Zheng 	return node;
1975d4f98a2SYan Zheng }
1985d4f98a2SYan Zheng 
1995d4f98a2SYan Zheng /*
2005d4f98a2SYan Zheng  * walk down backref nodes to find start of next reference path
2015d4f98a2SYan Zheng  */
202a26195a5SQu Wenruo static struct btrfs_backref_node *walk_down_backref(
203a26195a5SQu Wenruo 		struct btrfs_backref_edge *edges[], int *index)
2045d4f98a2SYan Zheng {
205a26195a5SQu Wenruo 	struct btrfs_backref_edge *edge;
206a26195a5SQu Wenruo 	struct btrfs_backref_node *lower;
2075d4f98a2SYan Zheng 	int idx = *index;
2085d4f98a2SYan Zheng 
2095d4f98a2SYan Zheng 	while (idx > 0) {
2105d4f98a2SYan Zheng 		edge = edges[idx - 1];
2115d4f98a2SYan Zheng 		lower = edge->node[LOWER];
2125d4f98a2SYan Zheng 		if (list_is_last(&edge->list[LOWER], &lower->upper)) {
2135d4f98a2SYan Zheng 			idx--;
2145d4f98a2SYan Zheng 			continue;
2155d4f98a2SYan Zheng 		}
2165d4f98a2SYan Zheng 		edge = list_entry(edge->list[LOWER].next,
217a26195a5SQu Wenruo 				  struct btrfs_backref_edge, list[LOWER]);
2185d4f98a2SYan Zheng 		edges[idx - 1] = edge;
2195d4f98a2SYan Zheng 		*index = idx;
2205d4f98a2SYan Zheng 		return edge->node[UPPER];
2215d4f98a2SYan Zheng 	}
2225d4f98a2SYan Zheng 	*index = 0;
2235d4f98a2SYan Zheng 	return NULL;
2245d4f98a2SYan Zheng }
2255d4f98a2SYan Zheng 
226a26195a5SQu Wenruo static void update_backref_node(struct btrfs_backref_cache *cache,
227a26195a5SQu Wenruo 				struct btrfs_backref_node *node, u64 bytenr)
2283fd0a558SYan, Zheng {
2293fd0a558SYan, Zheng 	struct rb_node *rb_node;
2303fd0a558SYan, Zheng 	rb_erase(&node->rb_node, &cache->rb_root);
2313fd0a558SYan, Zheng 	node->bytenr = bytenr;
232e9a28dc5SQu Wenruo 	rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
23343c04fb1SJeff Mahoney 	if (rb_node)
234982c92cbSQu Wenruo 		btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
2353fd0a558SYan, Zheng }
2363fd0a558SYan, Zheng 
2373fd0a558SYan, Zheng /*
2383fd0a558SYan, Zheng  * update backref cache after a transaction commit
2393fd0a558SYan, Zheng  */
2403fd0a558SYan, Zheng static int update_backref_cache(struct btrfs_trans_handle *trans,
241a26195a5SQu Wenruo 				struct btrfs_backref_cache *cache)
2423fd0a558SYan, Zheng {
243a26195a5SQu Wenruo 	struct btrfs_backref_node *node;
2443fd0a558SYan, Zheng 	int level = 0;
2453fd0a558SYan, Zheng 
2463fd0a558SYan, Zheng 	if (cache->last_trans == 0) {
2473fd0a558SYan, Zheng 		cache->last_trans = trans->transid;
2483fd0a558SYan, Zheng 		return 0;
2493fd0a558SYan, Zheng 	}
2503fd0a558SYan, Zheng 
2513fd0a558SYan, Zheng 	if (cache->last_trans == trans->transid)
2523fd0a558SYan, Zheng 		return 0;
2533fd0a558SYan, Zheng 
2543fd0a558SYan, Zheng 	/*
2553fd0a558SYan, Zheng 	 * detached nodes are used to avoid unnecessary backref
2563fd0a558SYan, Zheng 	 * lookup. transaction commit changes the extent tree.
2573fd0a558SYan, Zheng 	 * so the detached nodes are no longer useful.
2583fd0a558SYan, Zheng 	 */
2593fd0a558SYan, Zheng 	while (!list_empty(&cache->detached)) {
2603fd0a558SYan, Zheng 		node = list_entry(cache->detached.next,
261a26195a5SQu Wenruo 				  struct btrfs_backref_node, list);
262023acb07SQu Wenruo 		btrfs_backref_cleanup_node(cache, node);
2633fd0a558SYan, Zheng 	}
2643fd0a558SYan, Zheng 
2653fd0a558SYan, Zheng 	while (!list_empty(&cache->changed)) {
2663fd0a558SYan, Zheng 		node = list_entry(cache->changed.next,
267a26195a5SQu Wenruo 				  struct btrfs_backref_node, list);
2683fd0a558SYan, Zheng 		list_del_init(&node->list);
2693fd0a558SYan, Zheng 		BUG_ON(node->pending);
2703fd0a558SYan, Zheng 		update_backref_node(cache, node, node->new_bytenr);
2713fd0a558SYan, Zheng 	}
2723fd0a558SYan, Zheng 
2733fd0a558SYan, Zheng 	/*
2743fd0a558SYan, Zheng 	 * some nodes can be left in the pending list if there were
2753fd0a558SYan, Zheng 	 * errors during processing the pending nodes.
2763fd0a558SYan, Zheng 	 */
2773fd0a558SYan, Zheng 	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2783fd0a558SYan, Zheng 		list_for_each_entry(node, &cache->pending[level], list) {
2793fd0a558SYan, Zheng 			BUG_ON(!node->pending);
2803fd0a558SYan, Zheng 			if (node->bytenr == node->new_bytenr)
2813fd0a558SYan, Zheng 				continue;
2823fd0a558SYan, Zheng 			update_backref_node(cache, node, node->new_bytenr);
2833fd0a558SYan, Zheng 		}
2843fd0a558SYan, Zheng 	}
2853fd0a558SYan, Zheng 
2863fd0a558SYan, Zheng 	cache->last_trans = 0;
2873fd0a558SYan, Zheng 	return 1;
2883fd0a558SYan, Zheng }
2893fd0a558SYan, Zheng 
2906282675eSQu Wenruo static bool reloc_root_is_dead(struct btrfs_root *root)
2916282675eSQu Wenruo {
2926282675eSQu Wenruo 	/*
2936282675eSQu Wenruo 	 * Pair with set_bit/clear_bit in clean_dirty_subvols and
2946282675eSQu Wenruo 	 * btrfs_update_reloc_root. We need to see the updated bit before
2956282675eSQu Wenruo 	 * trying to access reloc_root
2966282675eSQu Wenruo 	 */
2976282675eSQu Wenruo 	smp_rmb();
2986282675eSQu Wenruo 	if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
2996282675eSQu Wenruo 		return true;
3006282675eSQu Wenruo 	return false;
3016282675eSQu Wenruo }
3026282675eSQu Wenruo 
3036282675eSQu Wenruo /*
3046282675eSQu Wenruo  * Check if this subvolume tree has valid reloc tree.
3056282675eSQu Wenruo  *
3066282675eSQu Wenruo  * Reloc tree after swap is considered dead, thus not considered as valid.
3076282675eSQu Wenruo  * This is enough for most callers, as they don't distinguish dead reloc root
30855465730SQu Wenruo  * from no reloc root.  But btrfs_should_ignore_reloc_root() below is a
30955465730SQu Wenruo  * special case.
3106282675eSQu Wenruo  */
3116282675eSQu Wenruo static bool have_reloc_root(struct btrfs_root *root)
3126282675eSQu Wenruo {
3136282675eSQu Wenruo 	if (reloc_root_is_dead(root))
3146282675eSQu Wenruo 		return false;
3156282675eSQu Wenruo 	if (!root->reloc_root)
3166282675eSQu Wenruo 		return false;
3176282675eSQu Wenruo 	return true;
3186282675eSQu Wenruo }
319f2a97a9dSDavid Sterba 
32055465730SQu Wenruo int btrfs_should_ignore_reloc_root(struct btrfs_root *root)
3213fd0a558SYan, Zheng {
3223fd0a558SYan, Zheng 	struct btrfs_root *reloc_root;
3233fd0a558SYan, Zheng 
32427cdeb70SMiao Xie 	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
3253fd0a558SYan, Zheng 		return 0;
3263fd0a558SYan, Zheng 
3276282675eSQu Wenruo 	/* This root has been merged with its reloc tree, we can ignore it */
3286282675eSQu Wenruo 	if (reloc_root_is_dead(root))
3296282675eSQu Wenruo 		return 1;
3306282675eSQu Wenruo 
3313fd0a558SYan, Zheng 	reloc_root = root->reloc_root;
3323fd0a558SYan, Zheng 	if (!reloc_root)
3333fd0a558SYan, Zheng 		return 0;
3343fd0a558SYan, Zheng 
3354d4225fcSJosef Bacik 	if (btrfs_header_generation(reloc_root->commit_root) ==
3364d4225fcSJosef Bacik 	    root->fs_info->running_transaction->transid)
3373fd0a558SYan, Zheng 		return 0;
3383fd0a558SYan, Zheng 	/*
3393fd0a558SYan, Zheng 	 * if there is reloc tree and it was created in previous
3403fd0a558SYan, Zheng 	 * transaction backref lookup can find the reloc tree,
3413fd0a558SYan, Zheng 	 * so backref node for the fs tree root is useless for
3423fd0a558SYan, Zheng 	 * relocation.
3433fd0a558SYan, Zheng 	 */
3443fd0a558SYan, Zheng 	return 1;
3453fd0a558SYan, Zheng }
34655465730SQu Wenruo 
3475d4f98a2SYan Zheng /*
3485d4f98a2SYan Zheng  * find reloc tree by address of tree root
3495d4f98a2SYan Zheng  */
3502433bea5SQu Wenruo struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr)
3515d4f98a2SYan Zheng {
3522433bea5SQu Wenruo 	struct reloc_control *rc = fs_info->reloc_ctl;
3535d4f98a2SYan Zheng 	struct rb_node *rb_node;
3545d4f98a2SYan Zheng 	struct mapping_node *node;
3555d4f98a2SYan Zheng 	struct btrfs_root *root = NULL;
3565d4f98a2SYan Zheng 
3572433bea5SQu Wenruo 	ASSERT(rc);
3585d4f98a2SYan Zheng 	spin_lock(&rc->reloc_root_tree.lock);
359e9a28dc5SQu Wenruo 	rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr);
3605d4f98a2SYan Zheng 	if (rb_node) {
3615d4f98a2SYan Zheng 		node = rb_entry(rb_node, struct mapping_node, rb_node);
3625d4f98a2SYan Zheng 		root = (struct btrfs_root *)node->data;
3635d4f98a2SYan Zheng 	}
3645d4f98a2SYan Zheng 	spin_unlock(&rc->reloc_root_tree.lock);
36500246528SJosef Bacik 	return btrfs_grab_root(root);
3665d4f98a2SYan Zheng }
3675d4f98a2SYan Zheng 
3685d4f98a2SYan Zheng static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
3695d4f98a2SYan Zheng 					u64 root_objectid)
3705d4f98a2SYan Zheng {
3715d4f98a2SYan Zheng 	struct btrfs_key key;
3725d4f98a2SYan Zheng 
3735d4f98a2SYan Zheng 	key.objectid = root_objectid;
3745d4f98a2SYan Zheng 	key.type = BTRFS_ROOT_ITEM_KEY;
3755d4f98a2SYan Zheng 	key.offset = (u64)-1;
3765d4f98a2SYan Zheng 
377bc44d7c4SJosef Bacik 	return btrfs_get_fs_root(fs_info, &key, false);
3785d4f98a2SYan Zheng }
3795d4f98a2SYan Zheng 
3805d4f98a2SYan Zheng /*
3811f872924SQu Wenruo  * In handle_one_tree_backref(), we have only linked the lower node to the edge,
3821f872924SQu Wenruo  * but the upper node hasn't been linked to the edge.
383a26195a5SQu Wenruo  * This means we can only iterate through btrfs_backref_node::upper to reach
384a26195a5SQu Wenruo  * parent edges, but not through btrfs_backref_node::lower to reach children
385a26195a5SQu Wenruo  * edges.
3861f872924SQu Wenruo  *
387a26195a5SQu Wenruo  * This function will finish the btrfs_backref_node::lower to related edges,
388a26195a5SQu Wenruo  * so that backref cache can be bi-directionally iterated.
3891f872924SQu Wenruo  *
3901f872924SQu Wenruo  * Also, this will add the nodes to backref cache for the next run.
3911f872924SQu Wenruo  */
392a26195a5SQu Wenruo static int finish_upper_links(struct btrfs_backref_cache *cache,
393a26195a5SQu Wenruo 			      struct btrfs_backref_node *start)
3941f872924SQu Wenruo {
3951f872924SQu Wenruo 	struct list_head *useless_node = &cache->useless_node;
396a26195a5SQu Wenruo 	struct btrfs_backref_edge *edge;
3971f872924SQu Wenruo 	struct rb_node *rb_node;
3981f872924SQu Wenruo 	LIST_HEAD(pending_edge);
3991f872924SQu Wenruo 
4001f872924SQu Wenruo 	ASSERT(start->checked);
4011f872924SQu Wenruo 
4021f872924SQu Wenruo 	/* Insert this node to cache if it's not COW-only */
4031f872924SQu Wenruo 	if (!start->cowonly) {
404e9a28dc5SQu Wenruo 		rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
4051f872924SQu Wenruo 					   &start->rb_node);
4061f872924SQu Wenruo 		if (rb_node)
407982c92cbSQu Wenruo 			btrfs_backref_panic(cache->fs_info, start->bytenr,
408982c92cbSQu Wenruo 					    -EEXIST);
4091f872924SQu Wenruo 		list_add_tail(&start->lower, &cache->leaves);
4101f872924SQu Wenruo 	}
4111f872924SQu Wenruo 
4121f872924SQu Wenruo 	/*
4131f872924SQu Wenruo 	 * Use breadth first search to iterate all related edges.
4141f872924SQu Wenruo 	 *
4151f872924SQu Wenruo 	 * The starting points are all the edges of this node
4161f872924SQu Wenruo 	 */
4171f872924SQu Wenruo 	list_for_each_entry(edge, &start->upper, list[LOWER])
4181f872924SQu Wenruo 		list_add_tail(&edge->list[UPPER], &pending_edge);
4191f872924SQu Wenruo 
4201f872924SQu Wenruo 	while (!list_empty(&pending_edge)) {
421a26195a5SQu Wenruo 		struct btrfs_backref_node *upper;
422a26195a5SQu Wenruo 		struct btrfs_backref_node *lower;
4231f872924SQu Wenruo 		struct rb_node *rb_node;
4241f872924SQu Wenruo 
425a26195a5SQu Wenruo 		edge = list_first_entry(&pending_edge,
426a26195a5SQu Wenruo 				struct btrfs_backref_edge, list[UPPER]);
4271f872924SQu Wenruo 		list_del_init(&edge->list[UPPER]);
4281f872924SQu Wenruo 		upper = edge->node[UPPER];
4291f872924SQu Wenruo 		lower = edge->node[LOWER];
4301f872924SQu Wenruo 
4311f872924SQu Wenruo 		/* Parent is detached, no need to keep any edges */
4321f872924SQu Wenruo 		if (upper->detached) {
4331f872924SQu Wenruo 			list_del(&edge->list[LOWER]);
434741188d3SQu Wenruo 			btrfs_backref_free_edge(cache, edge);
4351f872924SQu Wenruo 
4361f872924SQu Wenruo 			/* Lower node is orphan, queue for cleanup */
4371f872924SQu Wenruo 			if (list_empty(&lower->upper))
4381f872924SQu Wenruo 				list_add(&lower->list, useless_node);
4391f872924SQu Wenruo 			continue;
4401f872924SQu Wenruo 		}
4411f872924SQu Wenruo 
4421f872924SQu Wenruo 		/*
4431f872924SQu Wenruo 		 * All new nodes added in current build_backref_tree() haven't
4441f872924SQu Wenruo 		 * been linked to the cache rb tree.
4451f872924SQu Wenruo 		 * So if we have upper->rb_node populated, this means a cache
4461f872924SQu Wenruo 		 * hit. We only need to link the edge, as @upper and all its
4471f872924SQu Wenruo 		 * parent have already been linked.
4481f872924SQu Wenruo 		 */
4491f872924SQu Wenruo 		if (!RB_EMPTY_NODE(&upper->rb_node)) {
4501f872924SQu Wenruo 			if (upper->lowest) {
4511f872924SQu Wenruo 				list_del_init(&upper->lower);
4521f872924SQu Wenruo 				upper->lowest = 0;
4531f872924SQu Wenruo 			}
4541f872924SQu Wenruo 
4551f872924SQu Wenruo 			list_add_tail(&edge->list[UPPER], &upper->lower);
4561f872924SQu Wenruo 			continue;
4571f872924SQu Wenruo 		}
4581f872924SQu Wenruo 
4591f872924SQu Wenruo 		/* Sanity check, we shouldn't have any unchecked nodes */
4601f872924SQu Wenruo 		if (!upper->checked) {
4611f872924SQu Wenruo 			ASSERT(0);
4621f872924SQu Wenruo 			return -EUCLEAN;
4631f872924SQu Wenruo 		}
4641f872924SQu Wenruo 
4651f872924SQu Wenruo 		/* Sanity check, COW-only node has non-COW-only parent */
4661f872924SQu Wenruo 		if (start->cowonly != upper->cowonly) {
4671f872924SQu Wenruo 			ASSERT(0);
4681f872924SQu Wenruo 			return -EUCLEAN;
4691f872924SQu Wenruo 		}
4701f872924SQu Wenruo 
4711f872924SQu Wenruo 		/* Only cache non-COW-only (subvolume trees) tree blocks */
4721f872924SQu Wenruo 		if (!upper->cowonly) {
473e9a28dc5SQu Wenruo 			rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
4741f872924SQu Wenruo 						   &upper->rb_node);
4751f872924SQu Wenruo 			if (rb_node) {
476982c92cbSQu Wenruo 				btrfs_backref_panic(cache->fs_info,
477982c92cbSQu Wenruo 						upper->bytenr, -EEXIST);
4781f872924SQu Wenruo 				return -EUCLEAN;
4791f872924SQu Wenruo 			}
4801f872924SQu Wenruo 		}
4811f872924SQu Wenruo 
4821f872924SQu Wenruo 		list_add_tail(&edge->list[UPPER], &upper->lower);
4831f872924SQu Wenruo 
4841f872924SQu Wenruo 		/*
4851f872924SQu Wenruo 		 * Also queue all the parent edges of this uncached node to
4861f872924SQu Wenruo 		 * finish the upper linkage
4871f872924SQu Wenruo 		 */
4881f872924SQu Wenruo 		list_for_each_entry(edge, &upper->upper, list[LOWER])
4891f872924SQu Wenruo 			list_add_tail(&edge->list[UPPER], &pending_edge);
4901f872924SQu Wenruo 	}
4911f872924SQu Wenruo 	return 0;
4921f872924SQu Wenruo }
4931f872924SQu Wenruo 
4941f872924SQu Wenruo /*
49529db137bSQu Wenruo  * For useless nodes, do two major clean ups:
49629db137bSQu Wenruo  *
49729db137bSQu Wenruo  * - Cleanup the children edges and nodes
49829db137bSQu Wenruo  *   If child node is also orphan (no parent) during cleanup, then the child
49929db137bSQu Wenruo  *   node will also be cleaned up.
50029db137bSQu Wenruo  *
50129db137bSQu Wenruo  * - Freeing up leaves (level 0), keeps nodes detached
50229db137bSQu Wenruo  *   For nodes, the node is still cached as "detached"
50329db137bSQu Wenruo  *
50429db137bSQu Wenruo  * Return false if @node is not in the @useless_nodes list.
50529db137bSQu Wenruo  * Return true if @node is in the @useless_nodes list.
50629db137bSQu Wenruo  */
50729db137bSQu Wenruo static bool handle_useless_nodes(struct reloc_control *rc,
508a26195a5SQu Wenruo 				 struct btrfs_backref_node *node)
50929db137bSQu Wenruo {
510a26195a5SQu Wenruo 	struct btrfs_backref_cache *cache = &rc->backref_cache;
51129db137bSQu Wenruo 	struct list_head *useless_node = &cache->useless_node;
51229db137bSQu Wenruo 	bool ret = false;
51329db137bSQu Wenruo 
51429db137bSQu Wenruo 	while (!list_empty(useless_node)) {
515a26195a5SQu Wenruo 		struct btrfs_backref_node *cur;
51629db137bSQu Wenruo 
517a26195a5SQu Wenruo 		cur = list_first_entry(useless_node, struct btrfs_backref_node,
51829db137bSQu Wenruo 				 list);
51929db137bSQu Wenruo 		list_del_init(&cur->list);
52029db137bSQu Wenruo 
52129db137bSQu Wenruo 		/* Only tree root nodes can be added to @useless_nodes */
52229db137bSQu Wenruo 		ASSERT(list_empty(&cur->upper));
52329db137bSQu Wenruo 
52429db137bSQu Wenruo 		if (cur == node)
52529db137bSQu Wenruo 			ret = true;
52629db137bSQu Wenruo 
52729db137bSQu Wenruo 		/* The node is the lowest node */
52829db137bSQu Wenruo 		if (cur->lowest) {
52929db137bSQu Wenruo 			list_del_init(&cur->lower);
53029db137bSQu Wenruo 			cur->lowest = 0;
53129db137bSQu Wenruo 		}
53229db137bSQu Wenruo 
53329db137bSQu Wenruo 		/* Cleanup the lower edges */
53429db137bSQu Wenruo 		while (!list_empty(&cur->lower)) {
535a26195a5SQu Wenruo 			struct btrfs_backref_edge *edge;
536a26195a5SQu Wenruo 			struct btrfs_backref_node *lower;
53729db137bSQu Wenruo 
53829db137bSQu Wenruo 			edge = list_entry(cur->lower.next,
539a26195a5SQu Wenruo 					struct btrfs_backref_edge, list[UPPER]);
54029db137bSQu Wenruo 			list_del(&edge->list[UPPER]);
54129db137bSQu Wenruo 			list_del(&edge->list[LOWER]);
54229db137bSQu Wenruo 			lower = edge->node[LOWER];
543741188d3SQu Wenruo 			btrfs_backref_free_edge(cache, edge);
54429db137bSQu Wenruo 
54529db137bSQu Wenruo 			/* Child node is also orphan, queue for cleanup */
54629db137bSQu Wenruo 			if (list_empty(&lower->upper))
54729db137bSQu Wenruo 				list_add(&lower->list, useless_node);
54829db137bSQu Wenruo 		}
54929db137bSQu Wenruo 		/* Mark this block processed for relocation */
55029db137bSQu Wenruo 		mark_block_processed(rc, cur);
55129db137bSQu Wenruo 
55229db137bSQu Wenruo 		/*
55329db137bSQu Wenruo 		 * Backref nodes for tree leaves are deleted from the cache.
55429db137bSQu Wenruo 		 * Backref nodes for upper level tree blocks are left in the
55529db137bSQu Wenruo 		 * cache to avoid unnecessary backref lookup.
55629db137bSQu Wenruo 		 */
55729db137bSQu Wenruo 		if (cur->level > 0) {
55829db137bSQu Wenruo 			list_add(&cur->list, &cache->detached);
55929db137bSQu Wenruo 			cur->detached = 1;
56029db137bSQu Wenruo 		} else {
56129db137bSQu Wenruo 			rb_erase(&cur->rb_node, &cache->rb_root);
562741188d3SQu Wenruo 			btrfs_backref_free_node(cache, cur);
56329db137bSQu Wenruo 		}
56429db137bSQu Wenruo 	}
56529db137bSQu Wenruo 	return ret;
56629db137bSQu Wenruo }
56729db137bSQu Wenruo 
56829db137bSQu Wenruo /*
569e7d571c7SQu Wenruo  * Build backref tree for a given tree block. Root of the backref tree
570e7d571c7SQu Wenruo  * corresponds the tree block, leaves of the backref tree correspond roots of
571e7d571c7SQu Wenruo  * b-trees that reference the tree block.
572e7d571c7SQu Wenruo  *
573e7d571c7SQu Wenruo  * The basic idea of this function is check backrefs of a given block to find
574e7d571c7SQu Wenruo  * upper level blocks that reference the block, and then check backrefs of
575e7d571c7SQu Wenruo  * these upper level blocks recursively. The recursion stops when tree root is
576e7d571c7SQu Wenruo  * reached or backrefs for the block is cached.
577e7d571c7SQu Wenruo  *
578e7d571c7SQu Wenruo  * NOTE: if we find that backrefs for a block are cached, we know backrefs for
579e7d571c7SQu Wenruo  * all upper level blocks that directly/indirectly reference the block are also
580e7d571c7SQu Wenruo  * cached.
581e7d571c7SQu Wenruo  */
582a26195a5SQu Wenruo static noinline_for_stack struct btrfs_backref_node *build_backref_tree(
583e7d571c7SQu Wenruo 			struct reloc_control *rc, struct btrfs_key *node_key,
584e7d571c7SQu Wenruo 			int level, u64 bytenr)
585e7d571c7SQu Wenruo {
586e7d571c7SQu Wenruo 	struct btrfs_backref_iter *iter;
587a26195a5SQu Wenruo 	struct btrfs_backref_cache *cache = &rc->backref_cache;
588e7d571c7SQu Wenruo 	/* For searching parent of TREE_BLOCK_REF */
589e7d571c7SQu Wenruo 	struct btrfs_path *path;
590a26195a5SQu Wenruo 	struct btrfs_backref_node *cur;
591a26195a5SQu Wenruo 	struct btrfs_backref_node *upper;
592a26195a5SQu Wenruo 	struct btrfs_backref_node *lower;
593a26195a5SQu Wenruo 	struct btrfs_backref_node *node = NULL;
594a26195a5SQu Wenruo 	struct btrfs_backref_edge *edge;
595e7d571c7SQu Wenruo 	int ret;
596e7d571c7SQu Wenruo 	int err = 0;
597e7d571c7SQu Wenruo 
598e7d571c7SQu Wenruo 	iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS);
599e7d571c7SQu Wenruo 	if (!iter)
600e7d571c7SQu Wenruo 		return ERR_PTR(-ENOMEM);
601e7d571c7SQu Wenruo 	path = btrfs_alloc_path();
602e7d571c7SQu Wenruo 	if (!path) {
603e7d571c7SQu Wenruo 		err = -ENOMEM;
604e7d571c7SQu Wenruo 		goto out;
605e7d571c7SQu Wenruo 	}
606e7d571c7SQu Wenruo 
607b1818dabSQu Wenruo 	node = btrfs_backref_alloc_node(cache, bytenr, level);
608e7d571c7SQu Wenruo 	if (!node) {
609e7d571c7SQu Wenruo 		err = -ENOMEM;
610e7d571c7SQu Wenruo 		goto out;
611e7d571c7SQu Wenruo 	}
612e7d571c7SQu Wenruo 
613e7d571c7SQu Wenruo 	node->lowest = 1;
614e7d571c7SQu Wenruo 	cur = node;
615e7d571c7SQu Wenruo 
616e7d571c7SQu Wenruo 	/* Breadth-first search to build backref cache */
617e7d571c7SQu Wenruo 	do {
618*1b60d2ecSQu Wenruo 		ret = btrfs_backref_add_tree_node(cache, path, iter, node_key,
619*1b60d2ecSQu Wenruo 						  cur);
620e7d571c7SQu Wenruo 		if (ret < 0) {
621e7d571c7SQu Wenruo 			err = ret;
622e7d571c7SQu Wenruo 			goto out;
623e7d571c7SQu Wenruo 		}
624e7d571c7SQu Wenruo 		edge = list_first_entry_or_null(&cache->pending_edge,
625a26195a5SQu Wenruo 				struct btrfs_backref_edge, list[UPPER]);
626e7d571c7SQu Wenruo 		/*
627e7d571c7SQu Wenruo 		 * The pending list isn't empty, take the first block to
628e7d571c7SQu Wenruo 		 * process
629e7d571c7SQu Wenruo 		 */
630e7d571c7SQu Wenruo 		if (edge) {
6315d4f98a2SYan Zheng 			list_del_init(&edge->list[UPPER]);
6325d4f98a2SYan Zheng 			cur = edge->node[UPPER];
6335d4f98a2SYan Zheng 		}
634e7d571c7SQu Wenruo 	} while (edge);
6355d4f98a2SYan Zheng 
6361f872924SQu Wenruo 	/* Finish the upper linkage of newly added edges/nodes */
6371f872924SQu Wenruo 	ret = finish_upper_links(cache, node);
6381f872924SQu Wenruo 	if (ret < 0) {
6391f872924SQu Wenruo 		err = ret;
64075bfb9afSJosef Bacik 		goto out;
64175bfb9afSJosef Bacik 	}
64275bfb9afSJosef Bacik 
64329db137bSQu Wenruo 	if (handle_useless_nodes(rc, node))
6443fd0a558SYan, Zheng 		node = NULL;
6455d4f98a2SYan Zheng out:
64671f572a9SQu Wenruo 	btrfs_backref_iter_free(iter);
64771f572a9SQu Wenruo 	btrfs_free_path(path);
6485d4f98a2SYan Zheng 	if (err) {
64984780289SQu Wenruo 		while (!list_empty(&cache->useless_node)) {
65084780289SQu Wenruo 			lower = list_first_entry(&cache->useless_node,
651a26195a5SQu Wenruo 					   struct btrfs_backref_node, list);
65275bfb9afSJosef Bacik 			list_del_init(&lower->list);
6533fd0a558SYan, Zheng 		}
65484780289SQu Wenruo 		while (!list_empty(&cache->pending_edge)) {
65584780289SQu Wenruo 			edge = list_first_entry(&cache->pending_edge,
656a26195a5SQu Wenruo 					struct btrfs_backref_edge, list[UPPER]);
65775bfb9afSJosef Bacik 			list_del(&edge->list[UPPER]);
6583fd0a558SYan, Zheng 			list_del(&edge->list[LOWER]);
65975bfb9afSJosef Bacik 			lower = edge->node[LOWER];
6605d4f98a2SYan Zheng 			upper = edge->node[UPPER];
661741188d3SQu Wenruo 			btrfs_backref_free_edge(cache, edge);
66275bfb9afSJosef Bacik 
66375bfb9afSJosef Bacik 			/*
66475bfb9afSJosef Bacik 			 * Lower is no longer linked to any upper backref nodes
66575bfb9afSJosef Bacik 			 * and isn't in the cache, we can free it ourselves.
66675bfb9afSJosef Bacik 			 */
66775bfb9afSJosef Bacik 			if (list_empty(&lower->upper) &&
66875bfb9afSJosef Bacik 			    RB_EMPTY_NODE(&lower->rb_node))
66984780289SQu Wenruo 				list_add(&lower->list, &cache->useless_node);
67075bfb9afSJosef Bacik 
67175bfb9afSJosef Bacik 			if (!RB_EMPTY_NODE(&upper->rb_node))
67275bfb9afSJosef Bacik 				continue;
67375bfb9afSJosef Bacik 
67401327610SNicholas D Steeves 			/* Add this guy's upper edges to the list to process */
67575bfb9afSJosef Bacik 			list_for_each_entry(edge, &upper->upper, list[LOWER])
67684780289SQu Wenruo 				list_add_tail(&edge->list[UPPER],
67784780289SQu Wenruo 					      &cache->pending_edge);
67875bfb9afSJosef Bacik 			if (list_empty(&upper->upper))
67984780289SQu Wenruo 				list_add(&upper->list, &cache->useless_node);
68075bfb9afSJosef Bacik 		}
68175bfb9afSJosef Bacik 
68284780289SQu Wenruo 		while (!list_empty(&cache->useless_node)) {
68384780289SQu Wenruo 			lower = list_first_entry(&cache->useless_node,
684a26195a5SQu Wenruo 					   struct btrfs_backref_node, list);
68575bfb9afSJosef Bacik 			list_del_init(&lower->list);
6860fd8c3daSLiu Bo 			if (lower == node)
6870fd8c3daSLiu Bo 				node = NULL;
688741188d3SQu Wenruo 			btrfs_backref_free_node(cache, lower);
6895d4f98a2SYan Zheng 		}
6900fd8c3daSLiu Bo 
691023acb07SQu Wenruo 		btrfs_backref_cleanup_node(cache, node);
69284780289SQu Wenruo 		ASSERT(list_empty(&cache->useless_node) &&
69384780289SQu Wenruo 		       list_empty(&cache->pending_edge));
6945d4f98a2SYan Zheng 		return ERR_PTR(err);
6955d4f98a2SYan Zheng 	}
69675bfb9afSJosef Bacik 	ASSERT(!node || !node->detached);
69784780289SQu Wenruo 	ASSERT(list_empty(&cache->useless_node) &&
69884780289SQu Wenruo 	       list_empty(&cache->pending_edge));
6995d4f98a2SYan Zheng 	return node;
7005d4f98a2SYan Zheng }
7015d4f98a2SYan Zheng 
7025d4f98a2SYan Zheng /*
7033fd0a558SYan, Zheng  * helper to add backref node for the newly created snapshot.
7043fd0a558SYan, Zheng  * the backref node is created by cloning backref node that
7053fd0a558SYan, Zheng  * corresponds to root of source tree
7063fd0a558SYan, Zheng  */
7073fd0a558SYan, Zheng static int clone_backref_node(struct btrfs_trans_handle *trans,
7083fd0a558SYan, Zheng 			      struct reloc_control *rc,
7093fd0a558SYan, Zheng 			      struct btrfs_root *src,
7103fd0a558SYan, Zheng 			      struct btrfs_root *dest)
7113fd0a558SYan, Zheng {
7123fd0a558SYan, Zheng 	struct btrfs_root *reloc_root = src->reloc_root;
713a26195a5SQu Wenruo 	struct btrfs_backref_cache *cache = &rc->backref_cache;
714a26195a5SQu Wenruo 	struct btrfs_backref_node *node = NULL;
715a26195a5SQu Wenruo 	struct btrfs_backref_node *new_node;
716a26195a5SQu Wenruo 	struct btrfs_backref_edge *edge;
717a26195a5SQu Wenruo 	struct btrfs_backref_edge *new_edge;
7183fd0a558SYan, Zheng 	struct rb_node *rb_node;
7193fd0a558SYan, Zheng 
7203fd0a558SYan, Zheng 	if (cache->last_trans > 0)
7213fd0a558SYan, Zheng 		update_backref_cache(trans, cache);
7223fd0a558SYan, Zheng 
723e9a28dc5SQu Wenruo 	rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
7243fd0a558SYan, Zheng 	if (rb_node) {
725a26195a5SQu Wenruo 		node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
7263fd0a558SYan, Zheng 		if (node->detached)
7273fd0a558SYan, Zheng 			node = NULL;
7283fd0a558SYan, Zheng 		else
7293fd0a558SYan, Zheng 			BUG_ON(node->new_bytenr != reloc_root->node->start);
7303fd0a558SYan, Zheng 	}
7313fd0a558SYan, Zheng 
7323fd0a558SYan, Zheng 	if (!node) {
733e9a28dc5SQu Wenruo 		rb_node = rb_simple_search(&cache->rb_root,
7343fd0a558SYan, Zheng 					   reloc_root->commit_root->start);
7353fd0a558SYan, Zheng 		if (rb_node) {
736a26195a5SQu Wenruo 			node = rb_entry(rb_node, struct btrfs_backref_node,
7373fd0a558SYan, Zheng 					rb_node);
7383fd0a558SYan, Zheng 			BUG_ON(node->detached);
7393fd0a558SYan, Zheng 		}
7403fd0a558SYan, Zheng 	}
7413fd0a558SYan, Zheng 
7423fd0a558SYan, Zheng 	if (!node)
7433fd0a558SYan, Zheng 		return 0;
7443fd0a558SYan, Zheng 
745b1818dabSQu Wenruo 	new_node = btrfs_backref_alloc_node(cache, dest->node->start,
746b1818dabSQu Wenruo 					    node->level);
7473fd0a558SYan, Zheng 	if (!new_node)
7483fd0a558SYan, Zheng 		return -ENOMEM;
7493fd0a558SYan, Zheng 
7503fd0a558SYan, Zheng 	new_node->lowest = node->lowest;
7516848ad64SYan, Zheng 	new_node->checked = 1;
75200246528SJosef Bacik 	new_node->root = btrfs_grab_root(dest);
7530b530bc5SJosef Bacik 	ASSERT(new_node->root);
7543fd0a558SYan, Zheng 
7553fd0a558SYan, Zheng 	if (!node->lowest) {
7563fd0a558SYan, Zheng 		list_for_each_entry(edge, &node->lower, list[UPPER]) {
75747254d07SQu Wenruo 			new_edge = btrfs_backref_alloc_edge(cache);
7583fd0a558SYan, Zheng 			if (!new_edge)
7593fd0a558SYan, Zheng 				goto fail;
7603fd0a558SYan, Zheng 
761f39911e5SQu Wenruo 			btrfs_backref_link_edge(new_edge, edge->node[LOWER],
762f39911e5SQu Wenruo 						new_node, LINK_UPPER);
7633fd0a558SYan, Zheng 		}
76476b9e23dSMiao Xie 	} else {
76576b9e23dSMiao Xie 		list_add_tail(&new_node->lower, &cache->leaves);
7663fd0a558SYan, Zheng 	}
7673fd0a558SYan, Zheng 
768e9a28dc5SQu Wenruo 	rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr,
7693fd0a558SYan, Zheng 				   &new_node->rb_node);
77043c04fb1SJeff Mahoney 	if (rb_node)
771982c92cbSQu Wenruo 		btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST);
7723fd0a558SYan, Zheng 
7733fd0a558SYan, Zheng 	if (!new_node->lowest) {
7743fd0a558SYan, Zheng 		list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
7753fd0a558SYan, Zheng 			list_add_tail(&new_edge->list[LOWER],
7763fd0a558SYan, Zheng 				      &new_edge->node[LOWER]->upper);
7773fd0a558SYan, Zheng 		}
7783fd0a558SYan, Zheng 	}
7793fd0a558SYan, Zheng 	return 0;
7803fd0a558SYan, Zheng fail:
7813fd0a558SYan, Zheng 	while (!list_empty(&new_node->lower)) {
7823fd0a558SYan, Zheng 		new_edge = list_entry(new_node->lower.next,
783a26195a5SQu Wenruo 				      struct btrfs_backref_edge, list[UPPER]);
7843fd0a558SYan, Zheng 		list_del(&new_edge->list[UPPER]);
785741188d3SQu Wenruo 		btrfs_backref_free_edge(cache, new_edge);
7863fd0a558SYan, Zheng 	}
787741188d3SQu Wenruo 	btrfs_backref_free_node(cache, new_node);
7883fd0a558SYan, Zheng 	return -ENOMEM;
7893fd0a558SYan, Zheng }
7903fd0a558SYan, Zheng 
7913fd0a558SYan, Zheng /*
7925d4f98a2SYan Zheng  * helper to add 'address of tree root -> reloc tree' mapping
7935d4f98a2SYan Zheng  */
794ffd7b339SJeff Mahoney static int __must_check __add_reloc_root(struct btrfs_root *root)
7955d4f98a2SYan Zheng {
7960b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
7975d4f98a2SYan Zheng 	struct rb_node *rb_node;
7985d4f98a2SYan Zheng 	struct mapping_node *node;
7990b246afaSJeff Mahoney 	struct reloc_control *rc = fs_info->reloc_ctl;
8005d4f98a2SYan Zheng 
8015d4f98a2SYan Zheng 	node = kmalloc(sizeof(*node), GFP_NOFS);
802ffd7b339SJeff Mahoney 	if (!node)
803ffd7b339SJeff Mahoney 		return -ENOMEM;
8045d4f98a2SYan Zheng 
805ea287ab1SJosef Bacik 	node->bytenr = root->commit_root->start;
8065d4f98a2SYan Zheng 	node->data = root;
8075d4f98a2SYan Zheng 
8085d4f98a2SYan Zheng 	spin_lock(&rc->reloc_root_tree.lock);
809e9a28dc5SQu Wenruo 	rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
8105d4f98a2SYan Zheng 				   node->bytenr, &node->rb_node);
8115d4f98a2SYan Zheng 	spin_unlock(&rc->reloc_root_tree.lock);
812ffd7b339SJeff Mahoney 	if (rb_node) {
8130b246afaSJeff Mahoney 		btrfs_panic(fs_info, -EEXIST,
8145d163e0eSJeff Mahoney 			    "Duplicate root found for start=%llu while inserting into relocation tree",
8155d163e0eSJeff Mahoney 			    node->bytenr);
816ffd7b339SJeff Mahoney 	}
8175d4f98a2SYan Zheng 
8185d4f98a2SYan Zheng 	list_add_tail(&root->root_list, &rc->reloc_roots);
8195d4f98a2SYan Zheng 	return 0;
8205d4f98a2SYan Zheng }
8215d4f98a2SYan Zheng 
8225d4f98a2SYan Zheng /*
823c974c464SWang Shilong  * helper to delete the 'address of tree root -> reloc tree'
8245d4f98a2SYan Zheng  * mapping
8255d4f98a2SYan Zheng  */
826c974c464SWang Shilong static void __del_reloc_root(struct btrfs_root *root)
8275d4f98a2SYan Zheng {
8280b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
8295d4f98a2SYan Zheng 	struct rb_node *rb_node;
8305d4f98a2SYan Zheng 	struct mapping_node *node = NULL;
8310b246afaSJeff Mahoney 	struct reloc_control *rc = fs_info->reloc_ctl;
832f44deb74SJosef Bacik 	bool put_ref = false;
8335d4f98a2SYan Zheng 
83465c6e82bSQu Wenruo 	if (rc && root->node) {
8355d4f98a2SYan Zheng 		spin_lock(&rc->reloc_root_tree.lock);
836e9a28dc5SQu Wenruo 		rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
837ea287ab1SJosef Bacik 					   root->commit_root->start);
838c974c464SWang Shilong 		if (rb_node) {
839c974c464SWang Shilong 			node = rb_entry(rb_node, struct mapping_node, rb_node);
840c974c464SWang Shilong 			rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
841ea287ab1SJosef Bacik 			RB_CLEAR_NODE(&node->rb_node);
842c974c464SWang Shilong 		}
843c974c464SWang Shilong 		spin_unlock(&rc->reloc_root_tree.lock);
844c974c464SWang Shilong 		if (!node)
845c974c464SWang Shilong 			return;
846c974c464SWang Shilong 		BUG_ON((struct btrfs_root *)node->data != root);
847389305b2SQu Wenruo 	}
848c974c464SWang Shilong 
849f44deb74SJosef Bacik 	/*
850f44deb74SJosef Bacik 	 * We only put the reloc root here if it's on the list.  There's a lot
851f44deb74SJosef Bacik 	 * of places where the pattern is to splice the rc->reloc_roots, process
852f44deb74SJosef Bacik 	 * the reloc roots, and then add the reloc root back onto
853f44deb74SJosef Bacik 	 * rc->reloc_roots.  If we call __del_reloc_root while it's off of the
854f44deb74SJosef Bacik 	 * list we don't want the reference being dropped, because the guy
855f44deb74SJosef Bacik 	 * messing with the list is in charge of the reference.
856f44deb74SJosef Bacik 	 */
8570b246afaSJeff Mahoney 	spin_lock(&fs_info->trans_lock);
858f44deb74SJosef Bacik 	if (!list_empty(&root->root_list)) {
859f44deb74SJosef Bacik 		put_ref = true;
860c974c464SWang Shilong 		list_del_init(&root->root_list);
861f44deb74SJosef Bacik 	}
8620b246afaSJeff Mahoney 	spin_unlock(&fs_info->trans_lock);
863f44deb74SJosef Bacik 	if (put_ref)
864f44deb74SJosef Bacik 		btrfs_put_root(root);
865c974c464SWang Shilong 	kfree(node);
866c974c464SWang Shilong }
867c974c464SWang Shilong 
868c974c464SWang Shilong /*
869c974c464SWang Shilong  * helper to update the 'address of tree root -> reloc tree'
870c974c464SWang Shilong  * mapping
871c974c464SWang Shilong  */
872ea287ab1SJosef Bacik static int __update_reloc_root(struct btrfs_root *root)
873c974c464SWang Shilong {
8740b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
875c974c464SWang Shilong 	struct rb_node *rb_node;
876c974c464SWang Shilong 	struct mapping_node *node = NULL;
8770b246afaSJeff Mahoney 	struct reloc_control *rc = fs_info->reloc_ctl;
878c974c464SWang Shilong 
879c974c464SWang Shilong 	spin_lock(&rc->reloc_root_tree.lock);
880e9a28dc5SQu Wenruo 	rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root,
881ea287ab1SJosef Bacik 				   root->commit_root->start);
8825d4f98a2SYan Zheng 	if (rb_node) {
8835d4f98a2SYan Zheng 		node = rb_entry(rb_node, struct mapping_node, rb_node);
8845d4f98a2SYan Zheng 		rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
8855d4f98a2SYan Zheng 	}
8865d4f98a2SYan Zheng 	spin_unlock(&rc->reloc_root_tree.lock);
8875d4f98a2SYan Zheng 
8888f71f3e0SLiu Bo 	if (!node)
8898f71f3e0SLiu Bo 		return 0;
8905d4f98a2SYan Zheng 	BUG_ON((struct btrfs_root *)node->data != root);
8915d4f98a2SYan Zheng 
8925d4f98a2SYan Zheng 	spin_lock(&rc->reloc_root_tree.lock);
893ea287ab1SJosef Bacik 	node->bytenr = root->node->start;
894e9a28dc5SQu Wenruo 	rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root,
8955d4f98a2SYan Zheng 				   node->bytenr, &node->rb_node);
8965d4f98a2SYan Zheng 	spin_unlock(&rc->reloc_root_tree.lock);
89743c04fb1SJeff Mahoney 	if (rb_node)
898982c92cbSQu Wenruo 		btrfs_backref_panic(fs_info, node->bytenr, -EEXIST);
8995d4f98a2SYan Zheng 	return 0;
9005d4f98a2SYan Zheng }
9015d4f98a2SYan Zheng 
9023fd0a558SYan, Zheng static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
9033fd0a558SYan, Zheng 					struct btrfs_root *root, u64 objectid)
9045d4f98a2SYan Zheng {
9050b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
9065d4f98a2SYan Zheng 	struct btrfs_root *reloc_root;
9075d4f98a2SYan Zheng 	struct extent_buffer *eb;
9085d4f98a2SYan Zheng 	struct btrfs_root_item *root_item;
9095d4f98a2SYan Zheng 	struct btrfs_key root_key;
9105d4f98a2SYan Zheng 	int ret;
9115d4f98a2SYan Zheng 
9125d4f98a2SYan Zheng 	root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
9135d4f98a2SYan Zheng 	BUG_ON(!root_item);
9145d4f98a2SYan Zheng 
9155d4f98a2SYan Zheng 	root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
9165d4f98a2SYan Zheng 	root_key.type = BTRFS_ROOT_ITEM_KEY;
9173fd0a558SYan, Zheng 	root_key.offset = objectid;
9185d4f98a2SYan Zheng 
9193fd0a558SYan, Zheng 	if (root->root_key.objectid == objectid) {
920054570a1SFilipe Manana 		u64 commit_root_gen;
921054570a1SFilipe Manana 
9223fd0a558SYan, Zheng 		/* called by btrfs_init_reloc_root */
9235d4f98a2SYan Zheng 		ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
9245d4f98a2SYan Zheng 				      BTRFS_TREE_RELOC_OBJECTID);
9255d4f98a2SYan Zheng 		BUG_ON(ret);
926054570a1SFilipe Manana 		/*
927054570a1SFilipe Manana 		 * Set the last_snapshot field to the generation of the commit
928054570a1SFilipe Manana 		 * root - like this ctree.c:btrfs_block_can_be_shared() behaves
929054570a1SFilipe Manana 		 * correctly (returns true) when the relocation root is created
930054570a1SFilipe Manana 		 * either inside the critical section of a transaction commit
931054570a1SFilipe Manana 		 * (through transaction.c:qgroup_account_snapshot()) and when
932054570a1SFilipe Manana 		 * it's created before the transaction commit is started.
933054570a1SFilipe Manana 		 */
934054570a1SFilipe Manana 		commit_root_gen = btrfs_header_generation(root->commit_root);
935054570a1SFilipe Manana 		btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen);
9363fd0a558SYan, Zheng 	} else {
9373fd0a558SYan, Zheng 		/*
9383fd0a558SYan, Zheng 		 * called by btrfs_reloc_post_snapshot_hook.
9393fd0a558SYan, Zheng 		 * the source tree is a reloc tree, all tree blocks
9403fd0a558SYan, Zheng 		 * modified after it was created have RELOC flag
9413fd0a558SYan, Zheng 		 * set in their headers. so it's OK to not update
9423fd0a558SYan, Zheng 		 * the 'last_snapshot'.
9433fd0a558SYan, Zheng 		 */
9443fd0a558SYan, Zheng 		ret = btrfs_copy_root(trans, root, root->node, &eb,
9453fd0a558SYan, Zheng 				      BTRFS_TREE_RELOC_OBJECTID);
9463fd0a558SYan, Zheng 		BUG_ON(ret);
9473fd0a558SYan, Zheng 	}
9483fd0a558SYan, Zheng 
9495d4f98a2SYan Zheng 	memcpy(root_item, &root->root_item, sizeof(*root_item));
9505d4f98a2SYan Zheng 	btrfs_set_root_bytenr(root_item, eb->start);
9515d4f98a2SYan Zheng 	btrfs_set_root_level(root_item, btrfs_header_level(eb));
9525d4f98a2SYan Zheng 	btrfs_set_root_generation(root_item, trans->transid);
9533fd0a558SYan, Zheng 
9543fd0a558SYan, Zheng 	if (root->root_key.objectid == objectid) {
9553fd0a558SYan, Zheng 		btrfs_set_root_refs(root_item, 0);
9563fd0a558SYan, Zheng 		memset(&root_item->drop_progress, 0,
9573fd0a558SYan, Zheng 		       sizeof(struct btrfs_disk_key));
9585d4f98a2SYan Zheng 		root_item->drop_level = 0;
9593fd0a558SYan, Zheng 	}
9605d4f98a2SYan Zheng 
9615d4f98a2SYan Zheng 	btrfs_tree_unlock(eb);
9625d4f98a2SYan Zheng 	free_extent_buffer(eb);
9635d4f98a2SYan Zheng 
9640b246afaSJeff Mahoney 	ret = btrfs_insert_root(trans, fs_info->tree_root,
9655d4f98a2SYan Zheng 				&root_key, root_item);
9665d4f98a2SYan Zheng 	BUG_ON(ret);
9675d4f98a2SYan Zheng 	kfree(root_item);
9685d4f98a2SYan Zheng 
9693dbf1738SJosef Bacik 	reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key);
9705d4f98a2SYan Zheng 	BUG_ON(IS_ERR(reloc_root));
9713dbf1738SJosef Bacik 	set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state);
9725d4f98a2SYan Zheng 	reloc_root->last_trans = trans->transid;
9733fd0a558SYan, Zheng 	return reloc_root;
9743fd0a558SYan, Zheng }
9753fd0a558SYan, Zheng 
9763fd0a558SYan, Zheng /*
9773fd0a558SYan, Zheng  * create reloc tree for a given fs tree. reloc tree is just a
9783fd0a558SYan, Zheng  * snapshot of the fs tree with special root objectid.
979f44deb74SJosef Bacik  *
980f44deb74SJosef Bacik  * The reloc_root comes out of here with two references, one for
981f44deb74SJosef Bacik  * root->reloc_root, and another for being on the rc->reloc_roots list.
9823fd0a558SYan, Zheng  */
9833fd0a558SYan, Zheng int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
9843fd0a558SYan, Zheng 			  struct btrfs_root *root)
9853fd0a558SYan, Zheng {
9860b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
9873fd0a558SYan, Zheng 	struct btrfs_root *reloc_root;
9880b246afaSJeff Mahoney 	struct reloc_control *rc = fs_info->reloc_ctl;
98920dd2cbfSMiao Xie 	struct btrfs_block_rsv *rsv;
9903fd0a558SYan, Zheng 	int clear_rsv = 0;
991ffd7b339SJeff Mahoney 	int ret;
9923fd0a558SYan, Zheng 
993aec7db3bSJosef Bacik 	if (!rc)
9942abc726aSJosef Bacik 		return 0;
9952abc726aSJosef Bacik 
9961fac4a54SQu Wenruo 	/*
9971fac4a54SQu Wenruo 	 * The subvolume has reloc tree but the swap is finished, no need to
9981fac4a54SQu Wenruo 	 * create/update the dead reloc tree
9991fac4a54SQu Wenruo 	 */
10006282675eSQu Wenruo 	if (reloc_root_is_dead(root))
10011fac4a54SQu Wenruo 		return 0;
10021fac4a54SQu Wenruo 
1003aec7db3bSJosef Bacik 	/*
1004aec7db3bSJosef Bacik 	 * This is subtle but important.  We do not do
1005aec7db3bSJosef Bacik 	 * record_root_in_transaction for reloc roots, instead we record their
1006aec7db3bSJosef Bacik 	 * corresponding fs root, and then here we update the last trans for the
1007aec7db3bSJosef Bacik 	 * reloc root.  This means that we have to do this for the entire life
1008aec7db3bSJosef Bacik 	 * of the reloc root, regardless of which stage of the relocation we are
1009aec7db3bSJosef Bacik 	 * in.
1010aec7db3bSJosef Bacik 	 */
10113fd0a558SYan, Zheng 	if (root->reloc_root) {
10123fd0a558SYan, Zheng 		reloc_root = root->reloc_root;
10133fd0a558SYan, Zheng 		reloc_root->last_trans = trans->transid;
10143fd0a558SYan, Zheng 		return 0;
10153fd0a558SYan, Zheng 	}
10163fd0a558SYan, Zheng 
1017aec7db3bSJosef Bacik 	/*
1018aec7db3bSJosef Bacik 	 * We are merging reloc roots, we do not need new reloc trees.  Also
1019aec7db3bSJosef Bacik 	 * reloc trees never need their own reloc tree.
1020aec7db3bSJosef Bacik 	 */
1021aec7db3bSJosef Bacik 	if (!rc->create_reloc_tree ||
1022aec7db3bSJosef Bacik 	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1023aec7db3bSJosef Bacik 		return 0;
1024aec7db3bSJosef Bacik 
102520dd2cbfSMiao Xie 	if (!trans->reloc_reserved) {
102620dd2cbfSMiao Xie 		rsv = trans->block_rsv;
10273fd0a558SYan, Zheng 		trans->block_rsv = rc->block_rsv;
10283fd0a558SYan, Zheng 		clear_rsv = 1;
10293fd0a558SYan, Zheng 	}
10303fd0a558SYan, Zheng 	reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
10313fd0a558SYan, Zheng 	if (clear_rsv)
103220dd2cbfSMiao Xie 		trans->block_rsv = rsv;
10335d4f98a2SYan Zheng 
1034ffd7b339SJeff Mahoney 	ret = __add_reloc_root(reloc_root);
1035ffd7b339SJeff Mahoney 	BUG_ON(ret < 0);
1036f44deb74SJosef Bacik 	root->reloc_root = btrfs_grab_root(reloc_root);
10375d4f98a2SYan Zheng 	return 0;
10385d4f98a2SYan Zheng }
10395d4f98a2SYan Zheng 
10405d4f98a2SYan Zheng /*
10415d4f98a2SYan Zheng  * update root item of reloc tree
10425d4f98a2SYan Zheng  */
10435d4f98a2SYan Zheng int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
10445d4f98a2SYan Zheng 			    struct btrfs_root *root)
10455d4f98a2SYan Zheng {
10460b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
10475d4f98a2SYan Zheng 	struct btrfs_root *reloc_root;
10485d4f98a2SYan Zheng 	struct btrfs_root_item *root_item;
10495d4f98a2SYan Zheng 	int ret;
10505d4f98a2SYan Zheng 
10516282675eSQu Wenruo 	if (!have_reloc_root(root))
10527585717fSChris Mason 		goto out;
10535d4f98a2SYan Zheng 
10545d4f98a2SYan Zheng 	reloc_root = root->reloc_root;
10555d4f98a2SYan Zheng 	root_item = &reloc_root->root_item;
10565d4f98a2SYan Zheng 
1057f44deb74SJosef Bacik 	/*
1058f44deb74SJosef Bacik 	 * We are probably ok here, but __del_reloc_root() will drop its ref of
1059f44deb74SJosef Bacik 	 * the root.  We have the ref for root->reloc_root, but just in case
1060f44deb74SJosef Bacik 	 * hold it while we update the reloc root.
1061f44deb74SJosef Bacik 	 */
1062f44deb74SJosef Bacik 	btrfs_grab_root(reloc_root);
1063f44deb74SJosef Bacik 
1064d2311e69SQu Wenruo 	/* root->reloc_root will stay until current relocation finished */
10650b246afaSJeff Mahoney 	if (fs_info->reloc_ctl->merge_reloc_tree &&
10663fd0a558SYan, Zheng 	    btrfs_root_refs(root_item) == 0) {
1067d2311e69SQu Wenruo 		set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
10686282675eSQu Wenruo 		/*
10696282675eSQu Wenruo 		 * Mark the tree as dead before we change reloc_root so
10706282675eSQu Wenruo 		 * have_reloc_root will not touch it from now on.
10716282675eSQu Wenruo 		 */
10726282675eSQu Wenruo 		smp_wmb();
1073c974c464SWang Shilong 		__del_reloc_root(reloc_root);
10745d4f98a2SYan Zheng 	}
10755d4f98a2SYan Zheng 
10765d4f98a2SYan Zheng 	if (reloc_root->commit_root != reloc_root->node) {
1077ea287ab1SJosef Bacik 		__update_reloc_root(reloc_root);
10785d4f98a2SYan Zheng 		btrfs_set_root_node(root_item, reloc_root->node);
10795d4f98a2SYan Zheng 		free_extent_buffer(reloc_root->commit_root);
10805d4f98a2SYan Zheng 		reloc_root->commit_root = btrfs_root_node(reloc_root);
10815d4f98a2SYan Zheng 	}
10825d4f98a2SYan Zheng 
10830b246afaSJeff Mahoney 	ret = btrfs_update_root(trans, fs_info->tree_root,
10845d4f98a2SYan Zheng 				&reloc_root->root_key, root_item);
10855d4f98a2SYan Zheng 	BUG_ON(ret);
1086f44deb74SJosef Bacik 	btrfs_put_root(reloc_root);
10877585717fSChris Mason out:
10885d4f98a2SYan Zheng 	return 0;
10895d4f98a2SYan Zheng }
10905d4f98a2SYan Zheng 
10915d4f98a2SYan Zheng /*
10925d4f98a2SYan Zheng  * helper to find first cached inode with inode number >= objectid
10935d4f98a2SYan Zheng  * in a subvolume
10945d4f98a2SYan Zheng  */
10955d4f98a2SYan Zheng static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
10965d4f98a2SYan Zheng {
10975d4f98a2SYan Zheng 	struct rb_node *node;
10985d4f98a2SYan Zheng 	struct rb_node *prev;
10995d4f98a2SYan Zheng 	struct btrfs_inode *entry;
11005d4f98a2SYan Zheng 	struct inode *inode;
11015d4f98a2SYan Zheng 
11025d4f98a2SYan Zheng 	spin_lock(&root->inode_lock);
11035d4f98a2SYan Zheng again:
11045d4f98a2SYan Zheng 	node = root->inode_tree.rb_node;
11055d4f98a2SYan Zheng 	prev = NULL;
11065d4f98a2SYan Zheng 	while (node) {
11075d4f98a2SYan Zheng 		prev = node;
11085d4f98a2SYan Zheng 		entry = rb_entry(node, struct btrfs_inode, rb_node);
11095d4f98a2SYan Zheng 
11104a0cc7caSNikolay Borisov 		if (objectid < btrfs_ino(entry))
11115d4f98a2SYan Zheng 			node = node->rb_left;
11124a0cc7caSNikolay Borisov 		else if (objectid > btrfs_ino(entry))
11135d4f98a2SYan Zheng 			node = node->rb_right;
11145d4f98a2SYan Zheng 		else
11155d4f98a2SYan Zheng 			break;
11165d4f98a2SYan Zheng 	}
11175d4f98a2SYan Zheng 	if (!node) {
11185d4f98a2SYan Zheng 		while (prev) {
11195d4f98a2SYan Zheng 			entry = rb_entry(prev, struct btrfs_inode, rb_node);
11204a0cc7caSNikolay Borisov 			if (objectid <= btrfs_ino(entry)) {
11215d4f98a2SYan Zheng 				node = prev;
11225d4f98a2SYan Zheng 				break;
11235d4f98a2SYan Zheng 			}
11245d4f98a2SYan Zheng 			prev = rb_next(prev);
11255d4f98a2SYan Zheng 		}
11265d4f98a2SYan Zheng 	}
11275d4f98a2SYan Zheng 	while (node) {
11285d4f98a2SYan Zheng 		entry = rb_entry(node, struct btrfs_inode, rb_node);
11295d4f98a2SYan Zheng 		inode = igrab(&entry->vfs_inode);
11305d4f98a2SYan Zheng 		if (inode) {
11315d4f98a2SYan Zheng 			spin_unlock(&root->inode_lock);
11325d4f98a2SYan Zheng 			return inode;
11335d4f98a2SYan Zheng 		}
11345d4f98a2SYan Zheng 
11354a0cc7caSNikolay Borisov 		objectid = btrfs_ino(entry) + 1;
11365d4f98a2SYan Zheng 		if (cond_resched_lock(&root->inode_lock))
11375d4f98a2SYan Zheng 			goto again;
11385d4f98a2SYan Zheng 
11395d4f98a2SYan Zheng 		node = rb_next(node);
11405d4f98a2SYan Zheng 	}
11415d4f98a2SYan Zheng 	spin_unlock(&root->inode_lock);
11425d4f98a2SYan Zheng 	return NULL;
11435d4f98a2SYan Zheng }
11445d4f98a2SYan Zheng 
11455d4f98a2SYan Zheng /*
11465d4f98a2SYan Zheng  * get new location of data
11475d4f98a2SYan Zheng  */
11485d4f98a2SYan Zheng static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
11495d4f98a2SYan Zheng 			    u64 bytenr, u64 num_bytes)
11505d4f98a2SYan Zheng {
11515d4f98a2SYan Zheng 	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
11525d4f98a2SYan Zheng 	struct btrfs_path *path;
11535d4f98a2SYan Zheng 	struct btrfs_file_extent_item *fi;
11545d4f98a2SYan Zheng 	struct extent_buffer *leaf;
11555d4f98a2SYan Zheng 	int ret;
11565d4f98a2SYan Zheng 
11575d4f98a2SYan Zheng 	path = btrfs_alloc_path();
11585d4f98a2SYan Zheng 	if (!path)
11595d4f98a2SYan Zheng 		return -ENOMEM;
11605d4f98a2SYan Zheng 
11615d4f98a2SYan Zheng 	bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1162f85b7379SDavid Sterba 	ret = btrfs_lookup_file_extent(NULL, root, path,
1163f85b7379SDavid Sterba 			btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
11645d4f98a2SYan Zheng 	if (ret < 0)
11655d4f98a2SYan Zheng 		goto out;
11665d4f98a2SYan Zheng 	if (ret > 0) {
11675d4f98a2SYan Zheng 		ret = -ENOENT;
11685d4f98a2SYan Zheng 		goto out;
11695d4f98a2SYan Zheng 	}
11705d4f98a2SYan Zheng 
11715d4f98a2SYan Zheng 	leaf = path->nodes[0];
11725d4f98a2SYan Zheng 	fi = btrfs_item_ptr(leaf, path->slots[0],
11735d4f98a2SYan Zheng 			    struct btrfs_file_extent_item);
11745d4f98a2SYan Zheng 
11755d4f98a2SYan Zheng 	BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
11765d4f98a2SYan Zheng 	       btrfs_file_extent_compression(leaf, fi) ||
11775d4f98a2SYan Zheng 	       btrfs_file_extent_encryption(leaf, fi) ||
11785d4f98a2SYan Zheng 	       btrfs_file_extent_other_encoding(leaf, fi));
11795d4f98a2SYan Zheng 
11805d4f98a2SYan Zheng 	if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
118183d4cfd4SJosef Bacik 		ret = -EINVAL;
11825d4f98a2SYan Zheng 		goto out;
11835d4f98a2SYan Zheng 	}
11845d4f98a2SYan Zheng 
11855d4f98a2SYan Zheng 	*new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
11865d4f98a2SYan Zheng 	ret = 0;
11875d4f98a2SYan Zheng out:
11885d4f98a2SYan Zheng 	btrfs_free_path(path);
11895d4f98a2SYan Zheng 	return ret;
11905d4f98a2SYan Zheng }
11915d4f98a2SYan Zheng 
11925d4f98a2SYan Zheng /*
11935d4f98a2SYan Zheng  * update file extent items in the tree leaf to point to
11945d4f98a2SYan Zheng  * the new locations.
11955d4f98a2SYan Zheng  */
11963fd0a558SYan, Zheng static noinline_for_stack
11973fd0a558SYan, Zheng int replace_file_extents(struct btrfs_trans_handle *trans,
11985d4f98a2SYan Zheng 			 struct reloc_control *rc,
11995d4f98a2SYan Zheng 			 struct btrfs_root *root,
12003fd0a558SYan, Zheng 			 struct extent_buffer *leaf)
12015d4f98a2SYan Zheng {
12020b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
12035d4f98a2SYan Zheng 	struct btrfs_key key;
12045d4f98a2SYan Zheng 	struct btrfs_file_extent_item *fi;
12055d4f98a2SYan Zheng 	struct inode *inode = NULL;
12065d4f98a2SYan Zheng 	u64 parent;
12075d4f98a2SYan Zheng 	u64 bytenr;
12083fd0a558SYan, Zheng 	u64 new_bytenr = 0;
12095d4f98a2SYan Zheng 	u64 num_bytes;
12105d4f98a2SYan Zheng 	u64 end;
12115d4f98a2SYan Zheng 	u32 nritems;
12125d4f98a2SYan Zheng 	u32 i;
121383d4cfd4SJosef Bacik 	int ret = 0;
12145d4f98a2SYan Zheng 	int first = 1;
12155d4f98a2SYan Zheng 	int dirty = 0;
12165d4f98a2SYan Zheng 
12175d4f98a2SYan Zheng 	if (rc->stage != UPDATE_DATA_PTRS)
12185d4f98a2SYan Zheng 		return 0;
12195d4f98a2SYan Zheng 
12205d4f98a2SYan Zheng 	/* reloc trees always use full backref */
12215d4f98a2SYan Zheng 	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
12225d4f98a2SYan Zheng 		parent = leaf->start;
12235d4f98a2SYan Zheng 	else
12245d4f98a2SYan Zheng 		parent = 0;
12255d4f98a2SYan Zheng 
12265d4f98a2SYan Zheng 	nritems = btrfs_header_nritems(leaf);
12275d4f98a2SYan Zheng 	for (i = 0; i < nritems; i++) {
122882fa113fSQu Wenruo 		struct btrfs_ref ref = { 0 };
122982fa113fSQu Wenruo 
12305d4f98a2SYan Zheng 		cond_resched();
12315d4f98a2SYan Zheng 		btrfs_item_key_to_cpu(leaf, &key, i);
12325d4f98a2SYan Zheng 		if (key.type != BTRFS_EXTENT_DATA_KEY)
12335d4f98a2SYan Zheng 			continue;
12345d4f98a2SYan Zheng 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
12355d4f98a2SYan Zheng 		if (btrfs_file_extent_type(leaf, fi) ==
12365d4f98a2SYan Zheng 		    BTRFS_FILE_EXTENT_INLINE)
12375d4f98a2SYan Zheng 			continue;
12385d4f98a2SYan Zheng 		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
12395d4f98a2SYan Zheng 		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
12405d4f98a2SYan Zheng 		if (bytenr == 0)
12415d4f98a2SYan Zheng 			continue;
12429569cc20SQu Wenruo 		if (!in_range(bytenr, rc->block_group->start,
12439569cc20SQu Wenruo 			      rc->block_group->length))
12445d4f98a2SYan Zheng 			continue;
12455d4f98a2SYan Zheng 
12465d4f98a2SYan Zheng 		/*
12475d4f98a2SYan Zheng 		 * if we are modifying block in fs tree, wait for readpage
12485d4f98a2SYan Zheng 		 * to complete and drop the extent cache
12495d4f98a2SYan Zheng 		 */
12505d4f98a2SYan Zheng 		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
12515d4f98a2SYan Zheng 			if (first) {
12525d4f98a2SYan Zheng 				inode = find_next_inode(root, key.objectid);
12535d4f98a2SYan Zheng 				first = 0;
12544a0cc7caSNikolay Borisov 			} else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) {
12553fd0a558SYan, Zheng 				btrfs_add_delayed_iput(inode);
12565d4f98a2SYan Zheng 				inode = find_next_inode(root, key.objectid);
12575d4f98a2SYan Zheng 			}
12584a0cc7caSNikolay Borisov 			if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) {
12595d4f98a2SYan Zheng 				end = key.offset +
12605d4f98a2SYan Zheng 				      btrfs_file_extent_num_bytes(leaf, fi);
12615d4f98a2SYan Zheng 				WARN_ON(!IS_ALIGNED(key.offset,
12620b246afaSJeff Mahoney 						    fs_info->sectorsize));
12630b246afaSJeff Mahoney 				WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
12645d4f98a2SYan Zheng 				end--;
12655d4f98a2SYan Zheng 				ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1266d0082371SJeff Mahoney 						      key.offset, end);
12675d4f98a2SYan Zheng 				if (!ret)
12685d4f98a2SYan Zheng 					continue;
12695d4f98a2SYan Zheng 
1270dcdbc059SNikolay Borisov 				btrfs_drop_extent_cache(BTRFS_I(inode),
1271dcdbc059SNikolay Borisov 						key.offset,	end, 1);
12725d4f98a2SYan Zheng 				unlock_extent(&BTRFS_I(inode)->io_tree,
1273d0082371SJeff Mahoney 					      key.offset, end);
12745d4f98a2SYan Zheng 			}
12755d4f98a2SYan Zheng 		}
12765d4f98a2SYan Zheng 
12775d4f98a2SYan Zheng 		ret = get_new_location(rc->data_inode, &new_bytenr,
12785d4f98a2SYan Zheng 				       bytenr, num_bytes);
127983d4cfd4SJosef Bacik 		if (ret) {
128083d4cfd4SJosef Bacik 			/*
128183d4cfd4SJosef Bacik 			 * Don't have to abort since we've not changed anything
128283d4cfd4SJosef Bacik 			 * in the file extent yet.
128383d4cfd4SJosef Bacik 			 */
128483d4cfd4SJosef Bacik 			break;
12853fd0a558SYan, Zheng 		}
12865d4f98a2SYan Zheng 
12875d4f98a2SYan Zheng 		btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
12885d4f98a2SYan Zheng 		dirty = 1;
12895d4f98a2SYan Zheng 
12905d4f98a2SYan Zheng 		key.offset -= btrfs_file_extent_offset(leaf, fi);
129182fa113fSQu Wenruo 		btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
129282fa113fSQu Wenruo 				       num_bytes, parent);
129382fa113fSQu Wenruo 		ref.real_root = root->root_key.objectid;
129482fa113fSQu Wenruo 		btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1295b06c4bf5SFilipe Manana 				    key.objectid, key.offset);
129682fa113fSQu Wenruo 		ret = btrfs_inc_extent_ref(trans, &ref);
129783d4cfd4SJosef Bacik 		if (ret) {
129866642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
129983d4cfd4SJosef Bacik 			break;
130083d4cfd4SJosef Bacik 		}
13015d4f98a2SYan Zheng 
1302ffd4bb2aSQu Wenruo 		btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
1303ffd4bb2aSQu Wenruo 				       num_bytes, parent);
1304ffd4bb2aSQu Wenruo 		ref.real_root = root->root_key.objectid;
1305ffd4bb2aSQu Wenruo 		btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
1306b06c4bf5SFilipe Manana 				    key.objectid, key.offset);
1307ffd4bb2aSQu Wenruo 		ret = btrfs_free_extent(trans, &ref);
130883d4cfd4SJosef Bacik 		if (ret) {
130966642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
131083d4cfd4SJosef Bacik 			break;
131183d4cfd4SJosef Bacik 		}
13125d4f98a2SYan Zheng 	}
13135d4f98a2SYan Zheng 	if (dirty)
13145d4f98a2SYan Zheng 		btrfs_mark_buffer_dirty(leaf);
13153fd0a558SYan, Zheng 	if (inode)
13163fd0a558SYan, Zheng 		btrfs_add_delayed_iput(inode);
131783d4cfd4SJosef Bacik 	return ret;
13185d4f98a2SYan Zheng }
13195d4f98a2SYan Zheng 
13205d4f98a2SYan Zheng static noinline_for_stack
13215d4f98a2SYan Zheng int memcmp_node_keys(struct extent_buffer *eb, int slot,
13225d4f98a2SYan Zheng 		     struct btrfs_path *path, int level)
13235d4f98a2SYan Zheng {
13245d4f98a2SYan Zheng 	struct btrfs_disk_key key1;
13255d4f98a2SYan Zheng 	struct btrfs_disk_key key2;
13265d4f98a2SYan Zheng 	btrfs_node_key(eb, &key1, slot);
13275d4f98a2SYan Zheng 	btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
13285d4f98a2SYan Zheng 	return memcmp(&key1, &key2, sizeof(key1));
13295d4f98a2SYan Zheng }
13305d4f98a2SYan Zheng 
13315d4f98a2SYan Zheng /*
13325d4f98a2SYan Zheng  * try to replace tree blocks in fs tree with the new blocks
13335d4f98a2SYan Zheng  * in reloc tree. tree blocks haven't been modified since the
13345d4f98a2SYan Zheng  * reloc tree was create can be replaced.
13355d4f98a2SYan Zheng  *
13365d4f98a2SYan Zheng  * if a block was replaced, level of the block + 1 is returned.
13375d4f98a2SYan Zheng  * if no block got replaced, 0 is returned. if there are other
13385d4f98a2SYan Zheng  * errors, a negative error number is returned.
13395d4f98a2SYan Zheng  */
13403fd0a558SYan, Zheng static noinline_for_stack
13413d0174f7SQu Wenruo int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
13425d4f98a2SYan Zheng 		 struct btrfs_root *dest, struct btrfs_root *src,
13435d4f98a2SYan Zheng 		 struct btrfs_path *path, struct btrfs_key *next_key,
13445d4f98a2SYan Zheng 		 int lowest_level, int max_level)
13455d4f98a2SYan Zheng {
13460b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = dest->fs_info;
13475d4f98a2SYan Zheng 	struct extent_buffer *eb;
13485d4f98a2SYan Zheng 	struct extent_buffer *parent;
134982fa113fSQu Wenruo 	struct btrfs_ref ref = { 0 };
13505d4f98a2SYan Zheng 	struct btrfs_key key;
13515d4f98a2SYan Zheng 	u64 old_bytenr;
13525d4f98a2SYan Zheng 	u64 new_bytenr;
13535d4f98a2SYan Zheng 	u64 old_ptr_gen;
13545d4f98a2SYan Zheng 	u64 new_ptr_gen;
13555d4f98a2SYan Zheng 	u64 last_snapshot;
13565d4f98a2SYan Zheng 	u32 blocksize;
13573fd0a558SYan, Zheng 	int cow = 0;
13585d4f98a2SYan Zheng 	int level;
13595d4f98a2SYan Zheng 	int ret;
13605d4f98a2SYan Zheng 	int slot;
13615d4f98a2SYan Zheng 
13625d4f98a2SYan Zheng 	BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
13635d4f98a2SYan Zheng 	BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
13645d4f98a2SYan Zheng 
13655d4f98a2SYan Zheng 	last_snapshot = btrfs_root_last_snapshot(&src->root_item);
13663fd0a558SYan, Zheng again:
13675d4f98a2SYan Zheng 	slot = path->slots[lowest_level];
13685d4f98a2SYan Zheng 	btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
13695d4f98a2SYan Zheng 
13705d4f98a2SYan Zheng 	eb = btrfs_lock_root_node(dest);
13718bead258SDavid Sterba 	btrfs_set_lock_blocking_write(eb);
13725d4f98a2SYan Zheng 	level = btrfs_header_level(eb);
13735d4f98a2SYan Zheng 
13745d4f98a2SYan Zheng 	if (level < lowest_level) {
13755d4f98a2SYan Zheng 		btrfs_tree_unlock(eb);
13765d4f98a2SYan Zheng 		free_extent_buffer(eb);
13775d4f98a2SYan Zheng 		return 0;
13785d4f98a2SYan Zheng 	}
13795d4f98a2SYan Zheng 
13803fd0a558SYan, Zheng 	if (cow) {
13815d4f98a2SYan Zheng 		ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
13825d4f98a2SYan Zheng 		BUG_ON(ret);
13833fd0a558SYan, Zheng 	}
13848bead258SDavid Sterba 	btrfs_set_lock_blocking_write(eb);
13855d4f98a2SYan Zheng 
13865d4f98a2SYan Zheng 	if (next_key) {
13875d4f98a2SYan Zheng 		next_key->objectid = (u64)-1;
13885d4f98a2SYan Zheng 		next_key->type = (u8)-1;
13895d4f98a2SYan Zheng 		next_key->offset = (u64)-1;
13905d4f98a2SYan Zheng 	}
13915d4f98a2SYan Zheng 
13925d4f98a2SYan Zheng 	parent = eb;
13935d4f98a2SYan Zheng 	while (1) {
1394581c1760SQu Wenruo 		struct btrfs_key first_key;
1395581c1760SQu Wenruo 
13965d4f98a2SYan Zheng 		level = btrfs_header_level(parent);
13975d4f98a2SYan Zheng 		BUG_ON(level < lowest_level);
13985d4f98a2SYan Zheng 
13995d4f98a2SYan Zheng 		ret = btrfs_bin_search(parent, &key, level, &slot);
1400cbca7d59SFilipe Manana 		if (ret < 0)
1401cbca7d59SFilipe Manana 			break;
14025d4f98a2SYan Zheng 		if (ret && slot > 0)
14035d4f98a2SYan Zheng 			slot--;
14045d4f98a2SYan Zheng 
14055d4f98a2SYan Zheng 		if (next_key && slot + 1 < btrfs_header_nritems(parent))
14065d4f98a2SYan Zheng 			btrfs_node_key_to_cpu(parent, next_key, slot + 1);
14075d4f98a2SYan Zheng 
14085d4f98a2SYan Zheng 		old_bytenr = btrfs_node_blockptr(parent, slot);
14090b246afaSJeff Mahoney 		blocksize = fs_info->nodesize;
14105d4f98a2SYan Zheng 		old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
141117515f1bSQu Wenruo 		btrfs_node_key_to_cpu(parent, &first_key, slot);
14125d4f98a2SYan Zheng 
14135d4f98a2SYan Zheng 		if (level <= max_level) {
14145d4f98a2SYan Zheng 			eb = path->nodes[level];
14155d4f98a2SYan Zheng 			new_bytenr = btrfs_node_blockptr(eb,
14165d4f98a2SYan Zheng 							path->slots[level]);
14175d4f98a2SYan Zheng 			new_ptr_gen = btrfs_node_ptr_generation(eb,
14185d4f98a2SYan Zheng 							path->slots[level]);
14195d4f98a2SYan Zheng 		} else {
14205d4f98a2SYan Zheng 			new_bytenr = 0;
14215d4f98a2SYan Zheng 			new_ptr_gen = 0;
14225d4f98a2SYan Zheng 		}
14235d4f98a2SYan Zheng 
1424fae7f21cSDulshani Gunawardhana 		if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
14255d4f98a2SYan Zheng 			ret = level;
14265d4f98a2SYan Zheng 			break;
14275d4f98a2SYan Zheng 		}
14285d4f98a2SYan Zheng 
14295d4f98a2SYan Zheng 		if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
14305d4f98a2SYan Zheng 		    memcmp_node_keys(parent, slot, path, level)) {
14313fd0a558SYan, Zheng 			if (level <= lowest_level) {
14325d4f98a2SYan Zheng 				ret = 0;
14335d4f98a2SYan Zheng 				break;
14345d4f98a2SYan Zheng 			}
14355d4f98a2SYan Zheng 
1436581c1760SQu Wenruo 			eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
1437581c1760SQu Wenruo 					     level - 1, &first_key);
143864c043deSLiu Bo 			if (IS_ERR(eb)) {
143964c043deSLiu Bo 				ret = PTR_ERR(eb);
1440264813acSLiu Bo 				break;
144164c043deSLiu Bo 			} else if (!extent_buffer_uptodate(eb)) {
144264c043deSLiu Bo 				ret = -EIO;
1443416bc658SJosef Bacik 				free_extent_buffer(eb);
1444379cde74SStefan Behrens 				break;
1445416bc658SJosef Bacik 			}
14465d4f98a2SYan Zheng 			btrfs_tree_lock(eb);
14473fd0a558SYan, Zheng 			if (cow) {
14485d4f98a2SYan Zheng 				ret = btrfs_cow_block(trans, dest, eb, parent,
14495d4f98a2SYan Zheng 						      slot, &eb);
14505d4f98a2SYan Zheng 				BUG_ON(ret);
14515d4f98a2SYan Zheng 			}
14528bead258SDavid Sterba 			btrfs_set_lock_blocking_write(eb);
14535d4f98a2SYan Zheng 
14545d4f98a2SYan Zheng 			btrfs_tree_unlock(parent);
14555d4f98a2SYan Zheng 			free_extent_buffer(parent);
14565d4f98a2SYan Zheng 
14575d4f98a2SYan Zheng 			parent = eb;
14585d4f98a2SYan Zheng 			continue;
14595d4f98a2SYan Zheng 		}
14605d4f98a2SYan Zheng 
14613fd0a558SYan, Zheng 		if (!cow) {
14623fd0a558SYan, Zheng 			btrfs_tree_unlock(parent);
14633fd0a558SYan, Zheng 			free_extent_buffer(parent);
14643fd0a558SYan, Zheng 			cow = 1;
14653fd0a558SYan, Zheng 			goto again;
14663fd0a558SYan, Zheng 		}
14673fd0a558SYan, Zheng 
14685d4f98a2SYan Zheng 		btrfs_node_key_to_cpu(path->nodes[level], &key,
14695d4f98a2SYan Zheng 				      path->slots[level]);
1470b3b4aa74SDavid Sterba 		btrfs_release_path(path);
14715d4f98a2SYan Zheng 
14725d4f98a2SYan Zheng 		path->lowest_level = level;
14735d4f98a2SYan Zheng 		ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
14745d4f98a2SYan Zheng 		path->lowest_level = 0;
14755d4f98a2SYan Zheng 		BUG_ON(ret);
14765d4f98a2SYan Zheng 
14775d4f98a2SYan Zheng 		/*
1478824d8dffSQu Wenruo 		 * Info qgroup to trace both subtrees.
1479824d8dffSQu Wenruo 		 *
1480824d8dffSQu Wenruo 		 * We must trace both trees.
1481824d8dffSQu Wenruo 		 * 1) Tree reloc subtree
1482824d8dffSQu Wenruo 		 *    If not traced, we will leak data numbers
1483824d8dffSQu Wenruo 		 * 2) Fs subtree
1484824d8dffSQu Wenruo 		 *    If not traced, we will double count old data
1485f616f5cdSQu Wenruo 		 *
1486f616f5cdSQu Wenruo 		 * We don't scan the subtree right now, but only record
1487f616f5cdSQu Wenruo 		 * the swapped tree blocks.
1488f616f5cdSQu Wenruo 		 * The real subtree rescan is delayed until we have new
1489f616f5cdSQu Wenruo 		 * CoW on the subtree root node before transaction commit.
1490824d8dffSQu Wenruo 		 */
1491370a11b8SQu Wenruo 		ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
1492370a11b8SQu Wenruo 				rc->block_group, parent, slot,
1493370a11b8SQu Wenruo 				path->nodes[level], path->slots[level],
1494370a11b8SQu Wenruo 				last_snapshot);
1495370a11b8SQu Wenruo 		if (ret < 0)
1496370a11b8SQu Wenruo 			break;
1497824d8dffSQu Wenruo 		/*
14985d4f98a2SYan Zheng 		 * swap blocks in fs tree and reloc tree.
14995d4f98a2SYan Zheng 		 */
15005d4f98a2SYan Zheng 		btrfs_set_node_blockptr(parent, slot, new_bytenr);
15015d4f98a2SYan Zheng 		btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
15025d4f98a2SYan Zheng 		btrfs_mark_buffer_dirty(parent);
15035d4f98a2SYan Zheng 
15045d4f98a2SYan Zheng 		btrfs_set_node_blockptr(path->nodes[level],
15055d4f98a2SYan Zheng 					path->slots[level], old_bytenr);
15065d4f98a2SYan Zheng 		btrfs_set_node_ptr_generation(path->nodes[level],
15075d4f98a2SYan Zheng 					      path->slots[level], old_ptr_gen);
15085d4f98a2SYan Zheng 		btrfs_mark_buffer_dirty(path->nodes[level]);
15095d4f98a2SYan Zheng 
151082fa113fSQu Wenruo 		btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
151182fa113fSQu Wenruo 				       blocksize, path->nodes[level]->start);
151282fa113fSQu Wenruo 		ref.skip_qgroup = true;
151382fa113fSQu Wenruo 		btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
151482fa113fSQu Wenruo 		ret = btrfs_inc_extent_ref(trans, &ref);
15155d4f98a2SYan Zheng 		BUG_ON(ret);
151682fa113fSQu Wenruo 		btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
151782fa113fSQu Wenruo 				       blocksize, 0);
151882fa113fSQu Wenruo 		ref.skip_qgroup = true;
151982fa113fSQu Wenruo 		btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
152082fa113fSQu Wenruo 		ret = btrfs_inc_extent_ref(trans, &ref);
15215d4f98a2SYan Zheng 		BUG_ON(ret);
15225d4f98a2SYan Zheng 
1523ffd4bb2aSQu Wenruo 		btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
1524ffd4bb2aSQu Wenruo 				       blocksize, path->nodes[level]->start);
1525ffd4bb2aSQu Wenruo 		btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
1526ffd4bb2aSQu Wenruo 		ref.skip_qgroup = true;
1527ffd4bb2aSQu Wenruo 		ret = btrfs_free_extent(trans, &ref);
15285d4f98a2SYan Zheng 		BUG_ON(ret);
15295d4f98a2SYan Zheng 
1530ffd4bb2aSQu Wenruo 		btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
1531ffd4bb2aSQu Wenruo 				       blocksize, 0);
1532ffd4bb2aSQu Wenruo 		btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
1533ffd4bb2aSQu Wenruo 		ref.skip_qgroup = true;
1534ffd4bb2aSQu Wenruo 		ret = btrfs_free_extent(trans, &ref);
15355d4f98a2SYan Zheng 		BUG_ON(ret);
15365d4f98a2SYan Zheng 
15375d4f98a2SYan Zheng 		btrfs_unlock_up_safe(path, 0);
15385d4f98a2SYan Zheng 
15395d4f98a2SYan Zheng 		ret = level;
15405d4f98a2SYan Zheng 		break;
15415d4f98a2SYan Zheng 	}
15425d4f98a2SYan Zheng 	btrfs_tree_unlock(parent);
15435d4f98a2SYan Zheng 	free_extent_buffer(parent);
15445d4f98a2SYan Zheng 	return ret;
15455d4f98a2SYan Zheng }
15465d4f98a2SYan Zheng 
15475d4f98a2SYan Zheng /*
15485d4f98a2SYan Zheng  * helper to find next relocated block in reloc tree
15495d4f98a2SYan Zheng  */
15505d4f98a2SYan Zheng static noinline_for_stack
15515d4f98a2SYan Zheng int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
15525d4f98a2SYan Zheng 		       int *level)
15535d4f98a2SYan Zheng {
15545d4f98a2SYan Zheng 	struct extent_buffer *eb;
15555d4f98a2SYan Zheng 	int i;
15565d4f98a2SYan Zheng 	u64 last_snapshot;
15575d4f98a2SYan Zheng 	u32 nritems;
15585d4f98a2SYan Zheng 
15595d4f98a2SYan Zheng 	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
15605d4f98a2SYan Zheng 
15615d4f98a2SYan Zheng 	for (i = 0; i < *level; i++) {
15625d4f98a2SYan Zheng 		free_extent_buffer(path->nodes[i]);
15635d4f98a2SYan Zheng 		path->nodes[i] = NULL;
15645d4f98a2SYan Zheng 	}
15655d4f98a2SYan Zheng 
15665d4f98a2SYan Zheng 	for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
15675d4f98a2SYan Zheng 		eb = path->nodes[i];
15685d4f98a2SYan Zheng 		nritems = btrfs_header_nritems(eb);
15695d4f98a2SYan Zheng 		while (path->slots[i] + 1 < nritems) {
15705d4f98a2SYan Zheng 			path->slots[i]++;
15715d4f98a2SYan Zheng 			if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
15725d4f98a2SYan Zheng 			    last_snapshot)
15735d4f98a2SYan Zheng 				continue;
15745d4f98a2SYan Zheng 
15755d4f98a2SYan Zheng 			*level = i;
15765d4f98a2SYan Zheng 			return 0;
15775d4f98a2SYan Zheng 		}
15785d4f98a2SYan Zheng 		free_extent_buffer(path->nodes[i]);
15795d4f98a2SYan Zheng 		path->nodes[i] = NULL;
15805d4f98a2SYan Zheng 	}
15815d4f98a2SYan Zheng 	return 1;
15825d4f98a2SYan Zheng }
15835d4f98a2SYan Zheng 
15845d4f98a2SYan Zheng /*
15855d4f98a2SYan Zheng  * walk down reloc tree to find relocated block of lowest level
15865d4f98a2SYan Zheng  */
15875d4f98a2SYan Zheng static noinline_for_stack
15885d4f98a2SYan Zheng int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
15895d4f98a2SYan Zheng 			 int *level)
15905d4f98a2SYan Zheng {
15912ff7e61eSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
15925d4f98a2SYan Zheng 	struct extent_buffer *eb = NULL;
15935d4f98a2SYan Zheng 	int i;
15945d4f98a2SYan Zheng 	u64 bytenr;
15955d4f98a2SYan Zheng 	u64 ptr_gen = 0;
15965d4f98a2SYan Zheng 	u64 last_snapshot;
15975d4f98a2SYan Zheng 	u32 nritems;
15985d4f98a2SYan Zheng 
15995d4f98a2SYan Zheng 	last_snapshot = btrfs_root_last_snapshot(&root->root_item);
16005d4f98a2SYan Zheng 
16015d4f98a2SYan Zheng 	for (i = *level; i > 0; i--) {
1602581c1760SQu Wenruo 		struct btrfs_key first_key;
1603581c1760SQu Wenruo 
16045d4f98a2SYan Zheng 		eb = path->nodes[i];
16055d4f98a2SYan Zheng 		nritems = btrfs_header_nritems(eb);
16065d4f98a2SYan Zheng 		while (path->slots[i] < nritems) {
16075d4f98a2SYan Zheng 			ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
16085d4f98a2SYan Zheng 			if (ptr_gen > last_snapshot)
16095d4f98a2SYan Zheng 				break;
16105d4f98a2SYan Zheng 			path->slots[i]++;
16115d4f98a2SYan Zheng 		}
16125d4f98a2SYan Zheng 		if (path->slots[i] >= nritems) {
16135d4f98a2SYan Zheng 			if (i == *level)
16145d4f98a2SYan Zheng 				break;
16155d4f98a2SYan Zheng 			*level = i + 1;
16165d4f98a2SYan Zheng 			return 0;
16175d4f98a2SYan Zheng 		}
16185d4f98a2SYan Zheng 		if (i == 1) {
16195d4f98a2SYan Zheng 			*level = i;
16205d4f98a2SYan Zheng 			return 0;
16215d4f98a2SYan Zheng 		}
16225d4f98a2SYan Zheng 
16235d4f98a2SYan Zheng 		bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1624581c1760SQu Wenruo 		btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
1625581c1760SQu Wenruo 		eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
1626581c1760SQu Wenruo 				     &first_key);
162764c043deSLiu Bo 		if (IS_ERR(eb)) {
162864c043deSLiu Bo 			return PTR_ERR(eb);
162964c043deSLiu Bo 		} else if (!extent_buffer_uptodate(eb)) {
1630416bc658SJosef Bacik 			free_extent_buffer(eb);
1631416bc658SJosef Bacik 			return -EIO;
1632416bc658SJosef Bacik 		}
16335d4f98a2SYan Zheng 		BUG_ON(btrfs_header_level(eb) != i - 1);
16345d4f98a2SYan Zheng 		path->nodes[i - 1] = eb;
16355d4f98a2SYan Zheng 		path->slots[i - 1] = 0;
16365d4f98a2SYan Zheng 	}
16375d4f98a2SYan Zheng 	return 1;
16385d4f98a2SYan Zheng }
16395d4f98a2SYan Zheng 
16405d4f98a2SYan Zheng /*
16415d4f98a2SYan Zheng  * invalidate extent cache for file extents whose key in range of
16425d4f98a2SYan Zheng  * [min_key, max_key)
16435d4f98a2SYan Zheng  */
16445d4f98a2SYan Zheng static int invalidate_extent_cache(struct btrfs_root *root,
16455d4f98a2SYan Zheng 				   struct btrfs_key *min_key,
16465d4f98a2SYan Zheng 				   struct btrfs_key *max_key)
16475d4f98a2SYan Zheng {
16480b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
16495d4f98a2SYan Zheng 	struct inode *inode = NULL;
16505d4f98a2SYan Zheng 	u64 objectid;
16515d4f98a2SYan Zheng 	u64 start, end;
165233345d01SLi Zefan 	u64 ino;
16535d4f98a2SYan Zheng 
16545d4f98a2SYan Zheng 	objectid = min_key->objectid;
16555d4f98a2SYan Zheng 	while (1) {
16565d4f98a2SYan Zheng 		cond_resched();
16575d4f98a2SYan Zheng 		iput(inode);
16585d4f98a2SYan Zheng 
16595d4f98a2SYan Zheng 		if (objectid > max_key->objectid)
16605d4f98a2SYan Zheng 			break;
16615d4f98a2SYan Zheng 
16625d4f98a2SYan Zheng 		inode = find_next_inode(root, objectid);
16635d4f98a2SYan Zheng 		if (!inode)
16645d4f98a2SYan Zheng 			break;
16654a0cc7caSNikolay Borisov 		ino = btrfs_ino(BTRFS_I(inode));
16665d4f98a2SYan Zheng 
166733345d01SLi Zefan 		if (ino > max_key->objectid) {
16685d4f98a2SYan Zheng 			iput(inode);
16695d4f98a2SYan Zheng 			break;
16705d4f98a2SYan Zheng 		}
16715d4f98a2SYan Zheng 
167233345d01SLi Zefan 		objectid = ino + 1;
16735d4f98a2SYan Zheng 		if (!S_ISREG(inode->i_mode))
16745d4f98a2SYan Zheng 			continue;
16755d4f98a2SYan Zheng 
167633345d01SLi Zefan 		if (unlikely(min_key->objectid == ino)) {
16775d4f98a2SYan Zheng 			if (min_key->type > BTRFS_EXTENT_DATA_KEY)
16785d4f98a2SYan Zheng 				continue;
16795d4f98a2SYan Zheng 			if (min_key->type < BTRFS_EXTENT_DATA_KEY)
16805d4f98a2SYan Zheng 				start = 0;
16815d4f98a2SYan Zheng 			else {
16825d4f98a2SYan Zheng 				start = min_key->offset;
16830b246afaSJeff Mahoney 				WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize));
16845d4f98a2SYan Zheng 			}
16855d4f98a2SYan Zheng 		} else {
16865d4f98a2SYan Zheng 			start = 0;
16875d4f98a2SYan Zheng 		}
16885d4f98a2SYan Zheng 
168933345d01SLi Zefan 		if (unlikely(max_key->objectid == ino)) {
16905d4f98a2SYan Zheng 			if (max_key->type < BTRFS_EXTENT_DATA_KEY)
16915d4f98a2SYan Zheng 				continue;
16925d4f98a2SYan Zheng 			if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
16935d4f98a2SYan Zheng 				end = (u64)-1;
16945d4f98a2SYan Zheng 			} else {
16955d4f98a2SYan Zheng 				if (max_key->offset == 0)
16965d4f98a2SYan Zheng 					continue;
16975d4f98a2SYan Zheng 				end = max_key->offset;
16980b246afaSJeff Mahoney 				WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
16995d4f98a2SYan Zheng 				end--;
17005d4f98a2SYan Zheng 			}
17015d4f98a2SYan Zheng 		} else {
17025d4f98a2SYan Zheng 			end = (u64)-1;
17035d4f98a2SYan Zheng 		}
17045d4f98a2SYan Zheng 
17055d4f98a2SYan Zheng 		/* the lock_extent waits for readpage to complete */
1706d0082371SJeff Mahoney 		lock_extent(&BTRFS_I(inode)->io_tree, start, end);
1707dcdbc059SNikolay Borisov 		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
1708d0082371SJeff Mahoney 		unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
17095d4f98a2SYan Zheng 	}
17105d4f98a2SYan Zheng 	return 0;
17115d4f98a2SYan Zheng }
17125d4f98a2SYan Zheng 
17135d4f98a2SYan Zheng static int find_next_key(struct btrfs_path *path, int level,
17145d4f98a2SYan Zheng 			 struct btrfs_key *key)
17155d4f98a2SYan Zheng 
17165d4f98a2SYan Zheng {
17175d4f98a2SYan Zheng 	while (level < BTRFS_MAX_LEVEL) {
17185d4f98a2SYan Zheng 		if (!path->nodes[level])
17195d4f98a2SYan Zheng 			break;
17205d4f98a2SYan Zheng 		if (path->slots[level] + 1 <
17215d4f98a2SYan Zheng 		    btrfs_header_nritems(path->nodes[level])) {
17225d4f98a2SYan Zheng 			btrfs_node_key_to_cpu(path->nodes[level], key,
17235d4f98a2SYan Zheng 					      path->slots[level] + 1);
17245d4f98a2SYan Zheng 			return 0;
17255d4f98a2SYan Zheng 		}
17265d4f98a2SYan Zheng 		level++;
17275d4f98a2SYan Zheng 	}
17285d4f98a2SYan Zheng 	return 1;
17295d4f98a2SYan Zheng }
17305d4f98a2SYan Zheng 
17315d4f98a2SYan Zheng /*
1732d2311e69SQu Wenruo  * Insert current subvolume into reloc_control::dirty_subvol_roots
1733d2311e69SQu Wenruo  */
1734d2311e69SQu Wenruo static void insert_dirty_subvol(struct btrfs_trans_handle *trans,
1735d2311e69SQu Wenruo 				struct reloc_control *rc,
1736d2311e69SQu Wenruo 				struct btrfs_root *root)
1737d2311e69SQu Wenruo {
1738d2311e69SQu Wenruo 	struct btrfs_root *reloc_root = root->reloc_root;
1739d2311e69SQu Wenruo 	struct btrfs_root_item *reloc_root_item;
1740d2311e69SQu Wenruo 
1741d2311e69SQu Wenruo 	/* @root must be a subvolume tree root with a valid reloc tree */
1742d2311e69SQu Wenruo 	ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1743d2311e69SQu Wenruo 	ASSERT(reloc_root);
1744d2311e69SQu Wenruo 
1745d2311e69SQu Wenruo 	reloc_root_item = &reloc_root->root_item;
1746d2311e69SQu Wenruo 	memset(&reloc_root_item->drop_progress, 0,
1747d2311e69SQu Wenruo 		sizeof(reloc_root_item->drop_progress));
1748d2311e69SQu Wenruo 	reloc_root_item->drop_level = 0;
1749d2311e69SQu Wenruo 	btrfs_set_root_refs(reloc_root_item, 0);
1750d2311e69SQu Wenruo 	btrfs_update_reloc_root(trans, root);
1751d2311e69SQu Wenruo 
1752d2311e69SQu Wenruo 	if (list_empty(&root->reloc_dirty_list)) {
175300246528SJosef Bacik 		btrfs_grab_root(root);
1754d2311e69SQu Wenruo 		list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots);
1755d2311e69SQu Wenruo 	}
1756d2311e69SQu Wenruo }
1757d2311e69SQu Wenruo 
1758d2311e69SQu Wenruo static int clean_dirty_subvols(struct reloc_control *rc)
1759d2311e69SQu Wenruo {
1760d2311e69SQu Wenruo 	struct btrfs_root *root;
1761d2311e69SQu Wenruo 	struct btrfs_root *next;
1762d2311e69SQu Wenruo 	int ret = 0;
176330d40577SQu Wenruo 	int ret2;
1764d2311e69SQu Wenruo 
1765d2311e69SQu Wenruo 	list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots,
1766d2311e69SQu Wenruo 				 reloc_dirty_list) {
176730d40577SQu Wenruo 		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
176830d40577SQu Wenruo 			/* Merged subvolume, cleanup its reloc root */
1769d2311e69SQu Wenruo 			struct btrfs_root *reloc_root = root->reloc_root;
1770d2311e69SQu Wenruo 
1771d2311e69SQu Wenruo 			list_del_init(&root->reloc_dirty_list);
1772d2311e69SQu Wenruo 			root->reloc_root = NULL;
17736282675eSQu Wenruo 			/*
17746282675eSQu Wenruo 			 * Need barrier to ensure clear_bit() only happens after
17756282675eSQu Wenruo 			 * root->reloc_root = NULL. Pairs with have_reloc_root.
17766282675eSQu Wenruo 			 */
17776282675eSQu Wenruo 			smp_wmb();
17781fac4a54SQu Wenruo 			clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
1779f28de8d8SJosef Bacik 			if (reloc_root) {
1780f44deb74SJosef Bacik 				/*
1781f44deb74SJosef Bacik 				 * btrfs_drop_snapshot drops our ref we hold for
1782f44deb74SJosef Bacik 				 * ->reloc_root.  If it fails however we must
1783f44deb74SJosef Bacik 				 * drop the ref ourselves.
1784f44deb74SJosef Bacik 				 */
1785f28de8d8SJosef Bacik 				ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
1786f44deb74SJosef Bacik 				if (ret2 < 0) {
1787f44deb74SJosef Bacik 					btrfs_put_root(reloc_root);
1788f44deb74SJosef Bacik 					if (!ret)
1789f28de8d8SJosef Bacik 						ret = ret2;
1790f28de8d8SJosef Bacik 				}
1791f44deb74SJosef Bacik 			}
179200246528SJosef Bacik 			btrfs_put_root(root);
179330d40577SQu Wenruo 		} else {
179430d40577SQu Wenruo 			/* Orphan reloc tree, just clean it up */
17950078a9f9SNikolay Borisov 			ret2 = btrfs_drop_snapshot(root, 0, 1);
1796f44deb74SJosef Bacik 			if (ret2 < 0) {
1797f44deb74SJosef Bacik 				btrfs_put_root(root);
1798f44deb74SJosef Bacik 				if (!ret)
179930d40577SQu Wenruo 					ret = ret2;
180030d40577SQu Wenruo 			}
1801d2311e69SQu Wenruo 		}
1802f44deb74SJosef Bacik 	}
1803d2311e69SQu Wenruo 	return ret;
1804d2311e69SQu Wenruo }
1805d2311e69SQu Wenruo 
1806d2311e69SQu Wenruo /*
18075d4f98a2SYan Zheng  * merge the relocated tree blocks in reloc tree with corresponding
18085d4f98a2SYan Zheng  * fs tree.
18095d4f98a2SYan Zheng  */
18105d4f98a2SYan Zheng static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
18115d4f98a2SYan Zheng 					       struct btrfs_root *root)
18125d4f98a2SYan Zheng {
18130b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
18145d4f98a2SYan Zheng 	struct btrfs_key key;
18155d4f98a2SYan Zheng 	struct btrfs_key next_key;
18169e6a0c52SJosef Bacik 	struct btrfs_trans_handle *trans = NULL;
18175d4f98a2SYan Zheng 	struct btrfs_root *reloc_root;
18185d4f98a2SYan Zheng 	struct btrfs_root_item *root_item;
18195d4f98a2SYan Zheng 	struct btrfs_path *path;
18203fd0a558SYan, Zheng 	struct extent_buffer *leaf;
18215d4f98a2SYan Zheng 	int level;
18225d4f98a2SYan Zheng 	int max_level;
18235d4f98a2SYan Zheng 	int replaced = 0;
18245d4f98a2SYan Zheng 	int ret;
18255d4f98a2SYan Zheng 	int err = 0;
18263fd0a558SYan, Zheng 	u32 min_reserved;
18275d4f98a2SYan Zheng 
18285d4f98a2SYan Zheng 	path = btrfs_alloc_path();
18295d4f98a2SYan Zheng 	if (!path)
18305d4f98a2SYan Zheng 		return -ENOMEM;
1831e4058b54SDavid Sterba 	path->reada = READA_FORWARD;
18325d4f98a2SYan Zheng 
18335d4f98a2SYan Zheng 	reloc_root = root->reloc_root;
18345d4f98a2SYan Zheng 	root_item = &reloc_root->root_item;
18355d4f98a2SYan Zheng 
18365d4f98a2SYan Zheng 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
18375d4f98a2SYan Zheng 		level = btrfs_root_level(root_item);
183867439dadSDavid Sterba 		atomic_inc(&reloc_root->node->refs);
18395d4f98a2SYan Zheng 		path->nodes[level] = reloc_root->node;
18405d4f98a2SYan Zheng 		path->slots[level] = 0;
18415d4f98a2SYan Zheng 	} else {
18425d4f98a2SYan Zheng 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
18435d4f98a2SYan Zheng 
18445d4f98a2SYan Zheng 		level = root_item->drop_level;
18455d4f98a2SYan Zheng 		BUG_ON(level == 0);
18465d4f98a2SYan Zheng 		path->lowest_level = level;
18475d4f98a2SYan Zheng 		ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
184833c66f43SYan Zheng 		path->lowest_level = 0;
18495d4f98a2SYan Zheng 		if (ret < 0) {
18505d4f98a2SYan Zheng 			btrfs_free_path(path);
18515d4f98a2SYan Zheng 			return ret;
18525d4f98a2SYan Zheng 		}
18535d4f98a2SYan Zheng 
18545d4f98a2SYan Zheng 		btrfs_node_key_to_cpu(path->nodes[level], &next_key,
18555d4f98a2SYan Zheng 				      path->slots[level]);
18565d4f98a2SYan Zheng 		WARN_ON(memcmp(&key, &next_key, sizeof(key)));
18575d4f98a2SYan Zheng 
18585d4f98a2SYan Zheng 		btrfs_unlock_up_safe(path, 0);
18595d4f98a2SYan Zheng 	}
18605d4f98a2SYan Zheng 
18610b246afaSJeff Mahoney 	min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
18625d4f98a2SYan Zheng 	memset(&next_key, 0, sizeof(next_key));
18635d4f98a2SYan Zheng 
18645d4f98a2SYan Zheng 	while (1) {
186508e007d2SMiao Xie 		ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
186608e007d2SMiao Xie 					     BTRFS_RESERVE_FLUSH_ALL);
18673fd0a558SYan, Zheng 		if (ret) {
18689e6a0c52SJosef Bacik 			err = ret;
18699e6a0c52SJosef Bacik 			goto out;
18703fd0a558SYan, Zheng 		}
18719e6a0c52SJosef Bacik 		trans = btrfs_start_transaction(root, 0);
18729e6a0c52SJosef Bacik 		if (IS_ERR(trans)) {
18739e6a0c52SJosef Bacik 			err = PTR_ERR(trans);
18749e6a0c52SJosef Bacik 			trans = NULL;
18759e6a0c52SJosef Bacik 			goto out;
18769e6a0c52SJosef Bacik 		}
18772abc726aSJosef Bacik 
18782abc726aSJosef Bacik 		/*
18792abc726aSJosef Bacik 		 * At this point we no longer have a reloc_control, so we can't
18802abc726aSJosef Bacik 		 * depend on btrfs_init_reloc_root to update our last_trans.
18812abc726aSJosef Bacik 		 *
18822abc726aSJosef Bacik 		 * But that's ok, we started the trans handle on our
18832abc726aSJosef Bacik 		 * corresponding fs_root, which means it's been added to the
18842abc726aSJosef Bacik 		 * dirty list.  At commit time we'll still call
18852abc726aSJosef Bacik 		 * btrfs_update_reloc_root() and update our root item
18862abc726aSJosef Bacik 		 * appropriately.
18872abc726aSJosef Bacik 		 */
18882abc726aSJosef Bacik 		reloc_root->last_trans = trans->transid;
18899e6a0c52SJosef Bacik 		trans->block_rsv = rc->block_rsv;
18903fd0a558SYan, Zheng 
18913fd0a558SYan, Zheng 		replaced = 0;
18925d4f98a2SYan Zheng 		max_level = level;
18935d4f98a2SYan Zheng 
18945d4f98a2SYan Zheng 		ret = walk_down_reloc_tree(reloc_root, path, &level);
18955d4f98a2SYan Zheng 		if (ret < 0) {
18965d4f98a2SYan Zheng 			err = ret;
18975d4f98a2SYan Zheng 			goto out;
18985d4f98a2SYan Zheng 		}
18995d4f98a2SYan Zheng 		if (ret > 0)
19005d4f98a2SYan Zheng 			break;
19015d4f98a2SYan Zheng 
19025d4f98a2SYan Zheng 		if (!find_next_key(path, level, &key) &&
19035d4f98a2SYan Zheng 		    btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
19045d4f98a2SYan Zheng 			ret = 0;
19055d4f98a2SYan Zheng 		} else {
19063d0174f7SQu Wenruo 			ret = replace_path(trans, rc, root, reloc_root, path,
19073fd0a558SYan, Zheng 					   &next_key, level, max_level);
19085d4f98a2SYan Zheng 		}
19095d4f98a2SYan Zheng 		if (ret < 0) {
19105d4f98a2SYan Zheng 			err = ret;
19115d4f98a2SYan Zheng 			goto out;
19125d4f98a2SYan Zheng 		}
19135d4f98a2SYan Zheng 
19145d4f98a2SYan Zheng 		if (ret > 0) {
19155d4f98a2SYan Zheng 			level = ret;
19165d4f98a2SYan Zheng 			btrfs_node_key_to_cpu(path->nodes[level], &key,
19175d4f98a2SYan Zheng 					      path->slots[level]);
19185d4f98a2SYan Zheng 			replaced = 1;
19195d4f98a2SYan Zheng 		}
19205d4f98a2SYan Zheng 
19215d4f98a2SYan Zheng 		ret = walk_up_reloc_tree(reloc_root, path, &level);
19225d4f98a2SYan Zheng 		if (ret > 0)
19235d4f98a2SYan Zheng 			break;
19245d4f98a2SYan Zheng 
19255d4f98a2SYan Zheng 		BUG_ON(level == 0);
19265d4f98a2SYan Zheng 		/*
19275d4f98a2SYan Zheng 		 * save the merging progress in the drop_progress.
19285d4f98a2SYan Zheng 		 * this is OK since root refs == 1 in this case.
19295d4f98a2SYan Zheng 		 */
19305d4f98a2SYan Zheng 		btrfs_node_key(path->nodes[level], &root_item->drop_progress,
19315d4f98a2SYan Zheng 			       path->slots[level]);
19325d4f98a2SYan Zheng 		root_item->drop_level = level;
19335d4f98a2SYan Zheng 
19343a45bb20SJeff Mahoney 		btrfs_end_transaction_throttle(trans);
19359e6a0c52SJosef Bacik 		trans = NULL;
19365d4f98a2SYan Zheng 
19372ff7e61eSJeff Mahoney 		btrfs_btree_balance_dirty(fs_info);
19385d4f98a2SYan Zheng 
19395d4f98a2SYan Zheng 		if (replaced && rc->stage == UPDATE_DATA_PTRS)
19405d4f98a2SYan Zheng 			invalidate_extent_cache(root, &key, &next_key);
19415d4f98a2SYan Zheng 	}
19425d4f98a2SYan Zheng 
19435d4f98a2SYan Zheng 	/*
19445d4f98a2SYan Zheng 	 * handle the case only one block in the fs tree need to be
19455d4f98a2SYan Zheng 	 * relocated and the block is tree root.
19465d4f98a2SYan Zheng 	 */
19475d4f98a2SYan Zheng 	leaf = btrfs_lock_root_node(root);
19485d4f98a2SYan Zheng 	ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
19495d4f98a2SYan Zheng 	btrfs_tree_unlock(leaf);
19505d4f98a2SYan Zheng 	free_extent_buffer(leaf);
19515d4f98a2SYan Zheng 	if (ret < 0)
19525d4f98a2SYan Zheng 		err = ret;
19535d4f98a2SYan Zheng out:
19545d4f98a2SYan Zheng 	btrfs_free_path(path);
19555d4f98a2SYan Zheng 
1956d2311e69SQu Wenruo 	if (err == 0)
1957d2311e69SQu Wenruo 		insert_dirty_subvol(trans, rc, root);
19585d4f98a2SYan Zheng 
19599e6a0c52SJosef Bacik 	if (trans)
19603a45bb20SJeff Mahoney 		btrfs_end_transaction_throttle(trans);
19615d4f98a2SYan Zheng 
19622ff7e61eSJeff Mahoney 	btrfs_btree_balance_dirty(fs_info);
19635d4f98a2SYan Zheng 
19645d4f98a2SYan Zheng 	if (replaced && rc->stage == UPDATE_DATA_PTRS)
19655d4f98a2SYan Zheng 		invalidate_extent_cache(root, &key, &next_key);
19665d4f98a2SYan Zheng 
19675d4f98a2SYan Zheng 	return err;
19685d4f98a2SYan Zheng }
19695d4f98a2SYan Zheng 
19703fd0a558SYan, Zheng static noinline_for_stack
19713fd0a558SYan, Zheng int prepare_to_merge(struct reloc_control *rc, int err)
19725d4f98a2SYan Zheng {
19733fd0a558SYan, Zheng 	struct btrfs_root *root = rc->extent_root;
19740b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
19753fd0a558SYan, Zheng 	struct btrfs_root *reloc_root;
19765d4f98a2SYan Zheng 	struct btrfs_trans_handle *trans;
19773fd0a558SYan, Zheng 	LIST_HEAD(reloc_roots);
19783fd0a558SYan, Zheng 	u64 num_bytes = 0;
19793fd0a558SYan, Zheng 	int ret;
19803fd0a558SYan, Zheng 
19810b246afaSJeff Mahoney 	mutex_lock(&fs_info->reloc_mutex);
19820b246afaSJeff Mahoney 	rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
19833fd0a558SYan, Zheng 	rc->merging_rsv_size += rc->nodes_relocated * 2;
19840b246afaSJeff Mahoney 	mutex_unlock(&fs_info->reloc_mutex);
19857585717fSChris Mason 
19863fd0a558SYan, Zheng again:
19873fd0a558SYan, Zheng 	if (!err) {
19883fd0a558SYan, Zheng 		num_bytes = rc->merging_rsv_size;
198908e007d2SMiao Xie 		ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes,
199008e007d2SMiao Xie 					  BTRFS_RESERVE_FLUSH_ALL);
19913fd0a558SYan, Zheng 		if (ret)
19923fd0a558SYan, Zheng 			err = ret;
19933fd0a558SYan, Zheng 	}
19943fd0a558SYan, Zheng 
19957a7eaa40SJosef Bacik 	trans = btrfs_join_transaction(rc->extent_root);
19963612b495STsutomu Itoh 	if (IS_ERR(trans)) {
19973612b495STsutomu Itoh 		if (!err)
19982ff7e61eSJeff Mahoney 			btrfs_block_rsv_release(fs_info, rc->block_rsv,
199963f018beSNikolay Borisov 						num_bytes, NULL);
20003612b495STsutomu Itoh 		return PTR_ERR(trans);
20013612b495STsutomu Itoh 	}
20023fd0a558SYan, Zheng 
20033fd0a558SYan, Zheng 	if (!err) {
20043fd0a558SYan, Zheng 		if (num_bytes != rc->merging_rsv_size) {
20053a45bb20SJeff Mahoney 			btrfs_end_transaction(trans);
20062ff7e61eSJeff Mahoney 			btrfs_block_rsv_release(fs_info, rc->block_rsv,
200763f018beSNikolay Borisov 						num_bytes, NULL);
20083fd0a558SYan, Zheng 			goto again;
20093fd0a558SYan, Zheng 		}
20103fd0a558SYan, Zheng 	}
20113fd0a558SYan, Zheng 
20123fd0a558SYan, Zheng 	rc->merge_reloc_tree = 1;
20133fd0a558SYan, Zheng 
20143fd0a558SYan, Zheng 	while (!list_empty(&rc->reloc_roots)) {
20153fd0a558SYan, Zheng 		reloc_root = list_entry(rc->reloc_roots.next,
20163fd0a558SYan, Zheng 					struct btrfs_root, root_list);
20173fd0a558SYan, Zheng 		list_del_init(&reloc_root->root_list);
20183fd0a558SYan, Zheng 
20190b246afaSJeff Mahoney 		root = read_fs_root(fs_info, reloc_root->root_key.offset);
20203fd0a558SYan, Zheng 		BUG_ON(IS_ERR(root));
20213fd0a558SYan, Zheng 		BUG_ON(root->reloc_root != reloc_root);
20223fd0a558SYan, Zheng 
20233fd0a558SYan, Zheng 		/*
20243fd0a558SYan, Zheng 		 * set reference count to 1, so btrfs_recover_relocation
20253fd0a558SYan, Zheng 		 * knows it should resumes merging
20263fd0a558SYan, Zheng 		 */
20273fd0a558SYan, Zheng 		if (!err)
20283fd0a558SYan, Zheng 			btrfs_set_root_refs(&reloc_root->root_item, 1);
20293fd0a558SYan, Zheng 		btrfs_update_reloc_root(trans, root);
20303fd0a558SYan, Zheng 
20313fd0a558SYan, Zheng 		list_add(&reloc_root->root_list, &reloc_roots);
203200246528SJosef Bacik 		btrfs_put_root(root);
20333fd0a558SYan, Zheng 	}
20343fd0a558SYan, Zheng 
20353fd0a558SYan, Zheng 	list_splice(&reloc_roots, &rc->reloc_roots);
20363fd0a558SYan, Zheng 
20373fd0a558SYan, Zheng 	if (!err)
20383a45bb20SJeff Mahoney 		btrfs_commit_transaction(trans);
20393fd0a558SYan, Zheng 	else
20403a45bb20SJeff Mahoney 		btrfs_end_transaction(trans);
20413fd0a558SYan, Zheng 	return err;
20423fd0a558SYan, Zheng }
20433fd0a558SYan, Zheng 
20443fd0a558SYan, Zheng static noinline_for_stack
2045aca1bba6SLiu Bo void free_reloc_roots(struct list_head *list)
2046aca1bba6SLiu Bo {
2047aca1bba6SLiu Bo 	struct btrfs_root *reloc_root;
2048aca1bba6SLiu Bo 
2049aca1bba6SLiu Bo 	while (!list_empty(list)) {
2050aca1bba6SLiu Bo 		reloc_root = list_entry(list->next, struct btrfs_root,
2051aca1bba6SLiu Bo 					root_list);
2052bb166d72SNaohiro Aota 		__del_reloc_root(reloc_root);
2053aca1bba6SLiu Bo 	}
2054aca1bba6SLiu Bo }
2055aca1bba6SLiu Bo 
2056aca1bba6SLiu Bo static noinline_for_stack
205794404e82SDavid Sterba void merge_reloc_roots(struct reloc_control *rc)
20583fd0a558SYan, Zheng {
20590b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
20605d4f98a2SYan Zheng 	struct btrfs_root *root;
20615d4f98a2SYan Zheng 	struct btrfs_root *reloc_root;
20623fd0a558SYan, Zheng 	LIST_HEAD(reloc_roots);
20633fd0a558SYan, Zheng 	int found = 0;
2064aca1bba6SLiu Bo 	int ret = 0;
20653fd0a558SYan, Zheng again:
20663fd0a558SYan, Zheng 	root = rc->extent_root;
20677585717fSChris Mason 
20687585717fSChris Mason 	/*
20697585717fSChris Mason 	 * this serializes us with btrfs_record_root_in_transaction,
20707585717fSChris Mason 	 * we have to make sure nobody is in the middle of
20717585717fSChris Mason 	 * adding their roots to the list while we are
20727585717fSChris Mason 	 * doing this splice
20737585717fSChris Mason 	 */
20740b246afaSJeff Mahoney 	mutex_lock(&fs_info->reloc_mutex);
20753fd0a558SYan, Zheng 	list_splice_init(&rc->reloc_roots, &reloc_roots);
20760b246afaSJeff Mahoney 	mutex_unlock(&fs_info->reloc_mutex);
20775d4f98a2SYan Zheng 
20783fd0a558SYan, Zheng 	while (!list_empty(&reloc_roots)) {
20793fd0a558SYan, Zheng 		found = 1;
20803fd0a558SYan, Zheng 		reloc_root = list_entry(reloc_roots.next,
20813fd0a558SYan, Zheng 					struct btrfs_root, root_list);
20825d4f98a2SYan Zheng 
20835d4f98a2SYan Zheng 		if (btrfs_root_refs(&reloc_root->root_item) > 0) {
20840b246afaSJeff Mahoney 			root = read_fs_root(fs_info,
20855d4f98a2SYan Zheng 					    reloc_root->root_key.offset);
20865d4f98a2SYan Zheng 			BUG_ON(IS_ERR(root));
20875d4f98a2SYan Zheng 			BUG_ON(root->reloc_root != reloc_root);
20885d4f98a2SYan Zheng 
20893fd0a558SYan, Zheng 			ret = merge_reloc_root(rc, root);
209000246528SJosef Bacik 			btrfs_put_root(root);
2091b37b39cdSJosef Bacik 			if (ret) {
209225e293c2SWang Shilong 				if (list_empty(&reloc_root->root_list))
209325e293c2SWang Shilong 					list_add_tail(&reloc_root->root_list,
209425e293c2SWang Shilong 						      &reloc_roots);
2095aca1bba6SLiu Bo 				goto out;
2096b37b39cdSJosef Bacik 			}
20973fd0a558SYan, Zheng 		} else {
20983fd0a558SYan, Zheng 			list_del_init(&reloc_root->root_list);
209930d40577SQu Wenruo 			/* Don't forget to queue this reloc root for cleanup */
210030d40577SQu Wenruo 			list_add_tail(&reloc_root->reloc_dirty_list,
210130d40577SQu Wenruo 				      &rc->dirty_subvol_roots);
21023fd0a558SYan, Zheng 		}
21035d4f98a2SYan Zheng 	}
21045d4f98a2SYan Zheng 
21053fd0a558SYan, Zheng 	if (found) {
21063fd0a558SYan, Zheng 		found = 0;
21073fd0a558SYan, Zheng 		goto again;
21085d4f98a2SYan Zheng 	}
2109aca1bba6SLiu Bo out:
2110aca1bba6SLiu Bo 	if (ret) {
21110b246afaSJeff Mahoney 		btrfs_handle_fs_error(fs_info, ret, NULL);
2112aca1bba6SLiu Bo 		if (!list_empty(&reloc_roots))
2113aca1bba6SLiu Bo 			free_reloc_roots(&reloc_roots);
2114467bb1d2SWang Shilong 
2115467bb1d2SWang Shilong 		/* new reloc root may be added */
21160b246afaSJeff Mahoney 		mutex_lock(&fs_info->reloc_mutex);
2117467bb1d2SWang Shilong 		list_splice_init(&rc->reloc_roots, &reloc_roots);
21180b246afaSJeff Mahoney 		mutex_unlock(&fs_info->reloc_mutex);
2119467bb1d2SWang Shilong 		if (!list_empty(&reloc_roots))
2120467bb1d2SWang Shilong 			free_reloc_roots(&reloc_roots);
2121aca1bba6SLiu Bo 	}
2122aca1bba6SLiu Bo 
21237b7b7431SJosef Bacik 	/*
21247b7b7431SJosef Bacik 	 * We used to have
21257b7b7431SJosef Bacik 	 *
21267b7b7431SJosef Bacik 	 * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
21277b7b7431SJosef Bacik 	 *
21287b7b7431SJosef Bacik 	 * here, but it's wrong.  If we fail to start the transaction in
21297b7b7431SJosef Bacik 	 * prepare_to_merge() we will have only 0 ref reloc roots, none of which
21307b7b7431SJosef Bacik 	 * have actually been removed from the reloc_root_tree rb tree.  This is
21317b7b7431SJosef Bacik 	 * fine because we're bailing here, and we hold a reference on the root
21327b7b7431SJosef Bacik 	 * for the list that holds it, so these roots will be cleaned up when we
21337b7b7431SJosef Bacik 	 * do the reloc_dirty_list afterwards.  Meanwhile the root->reloc_root
21347b7b7431SJosef Bacik 	 * will be cleaned up on unmount.
21357b7b7431SJosef Bacik 	 *
21367b7b7431SJosef Bacik 	 * The remaining nodes will be cleaned up by free_reloc_control.
21377b7b7431SJosef Bacik 	 */
21385d4f98a2SYan Zheng }
21395d4f98a2SYan Zheng 
21405d4f98a2SYan Zheng static void free_block_list(struct rb_root *blocks)
21415d4f98a2SYan Zheng {
21425d4f98a2SYan Zheng 	struct tree_block *block;
21435d4f98a2SYan Zheng 	struct rb_node *rb_node;
21445d4f98a2SYan Zheng 	while ((rb_node = rb_first(blocks))) {
21455d4f98a2SYan Zheng 		block = rb_entry(rb_node, struct tree_block, rb_node);
21465d4f98a2SYan Zheng 		rb_erase(rb_node, blocks);
21475d4f98a2SYan Zheng 		kfree(block);
21485d4f98a2SYan Zheng 	}
21495d4f98a2SYan Zheng }
21505d4f98a2SYan Zheng 
21515d4f98a2SYan Zheng static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
21525d4f98a2SYan Zheng 				      struct btrfs_root *reloc_root)
21535d4f98a2SYan Zheng {
21540b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = reloc_root->fs_info;
21555d4f98a2SYan Zheng 	struct btrfs_root *root;
2156442b1ac5SJosef Bacik 	int ret;
21575d4f98a2SYan Zheng 
21585d4f98a2SYan Zheng 	if (reloc_root->last_trans == trans->transid)
21595d4f98a2SYan Zheng 		return 0;
21605d4f98a2SYan Zheng 
21610b246afaSJeff Mahoney 	root = read_fs_root(fs_info, reloc_root->root_key.offset);
21625d4f98a2SYan Zheng 	BUG_ON(IS_ERR(root));
21635d4f98a2SYan Zheng 	BUG_ON(root->reloc_root != reloc_root);
2164442b1ac5SJosef Bacik 	ret = btrfs_record_root_in_trans(trans, root);
216500246528SJosef Bacik 	btrfs_put_root(root);
21665d4f98a2SYan Zheng 
2167442b1ac5SJosef Bacik 	return ret;
21685d4f98a2SYan Zheng }
21695d4f98a2SYan Zheng 
21703fd0a558SYan, Zheng static noinline_for_stack
21713fd0a558SYan, Zheng struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
21723fd0a558SYan, Zheng 				     struct reloc_control *rc,
2173a26195a5SQu Wenruo 				     struct btrfs_backref_node *node,
2174a26195a5SQu Wenruo 				     struct btrfs_backref_edge *edges[])
21755d4f98a2SYan Zheng {
2176a26195a5SQu Wenruo 	struct btrfs_backref_node *next;
21775d4f98a2SYan Zheng 	struct btrfs_root *root;
21783fd0a558SYan, Zheng 	int index = 0;
21793fd0a558SYan, Zheng 
21805d4f98a2SYan Zheng 	next = node;
21815d4f98a2SYan Zheng 	while (1) {
21825d4f98a2SYan Zheng 		cond_resched();
21835d4f98a2SYan Zheng 		next = walk_up_backref(next, edges, &index);
21845d4f98a2SYan Zheng 		root = next->root;
21853fd0a558SYan, Zheng 		BUG_ON(!root);
218627cdeb70SMiao Xie 		BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state));
21875d4f98a2SYan Zheng 
21885d4f98a2SYan Zheng 		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
21895d4f98a2SYan Zheng 			record_reloc_root_in_trans(trans, root);
21905d4f98a2SYan Zheng 			break;
21915d4f98a2SYan Zheng 		}
21925d4f98a2SYan Zheng 
21935d4f98a2SYan Zheng 		btrfs_record_root_in_trans(trans, root);
21943fd0a558SYan, Zheng 		root = root->reloc_root;
21953fd0a558SYan, Zheng 
21963fd0a558SYan, Zheng 		if (next->new_bytenr != root->node->start) {
21973fd0a558SYan, Zheng 			BUG_ON(next->new_bytenr);
21983fd0a558SYan, Zheng 			BUG_ON(!list_empty(&next->list));
21993fd0a558SYan, Zheng 			next->new_bytenr = root->node->start;
220000246528SJosef Bacik 			btrfs_put_root(next->root);
220100246528SJosef Bacik 			next->root = btrfs_grab_root(root);
22020b530bc5SJosef Bacik 			ASSERT(next->root);
22033fd0a558SYan, Zheng 			list_add_tail(&next->list,
22043fd0a558SYan, Zheng 				      &rc->backref_cache.changed);
22059569cc20SQu Wenruo 			mark_block_processed(rc, next);
22065d4f98a2SYan Zheng 			break;
22075d4f98a2SYan Zheng 		}
22085d4f98a2SYan Zheng 
22093fd0a558SYan, Zheng 		WARN_ON(1);
22105d4f98a2SYan Zheng 		root = NULL;
22115d4f98a2SYan Zheng 		next = walk_down_backref(edges, &index);
22125d4f98a2SYan Zheng 		if (!next || next->level <= node->level)
22135d4f98a2SYan Zheng 			break;
22145d4f98a2SYan Zheng 	}
22153fd0a558SYan, Zheng 	if (!root)
22163fd0a558SYan, Zheng 		return NULL;
22175d4f98a2SYan Zheng 
22183fd0a558SYan, Zheng 	next = node;
22193fd0a558SYan, Zheng 	/* setup backref node path for btrfs_reloc_cow_block */
22203fd0a558SYan, Zheng 	while (1) {
22213fd0a558SYan, Zheng 		rc->backref_cache.path[next->level] = next;
22223fd0a558SYan, Zheng 		if (--index < 0)
22233fd0a558SYan, Zheng 			break;
22243fd0a558SYan, Zheng 		next = edges[index]->node[UPPER];
22253fd0a558SYan, Zheng 	}
22265d4f98a2SYan Zheng 	return root;
22275d4f98a2SYan Zheng }
22285d4f98a2SYan Zheng 
22293fd0a558SYan, Zheng /*
22303fd0a558SYan, Zheng  * select a tree root for relocation. return NULL if the block
22313fd0a558SYan, Zheng  * is reference counted. we should use do_relocation() in this
22323fd0a558SYan, Zheng  * case. return a tree root pointer if the block isn't reference
22333fd0a558SYan, Zheng  * counted. return -ENOENT if the block is root of reloc tree.
22343fd0a558SYan, Zheng  */
22355d4f98a2SYan Zheng static noinline_for_stack
2236a26195a5SQu Wenruo struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
22375d4f98a2SYan Zheng {
2238a26195a5SQu Wenruo 	struct btrfs_backref_node *next;
22393fd0a558SYan, Zheng 	struct btrfs_root *root;
22403fd0a558SYan, Zheng 	struct btrfs_root *fs_root = NULL;
2241a26195a5SQu Wenruo 	struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
22423fd0a558SYan, Zheng 	int index = 0;
22433fd0a558SYan, Zheng 
22443fd0a558SYan, Zheng 	next = node;
22453fd0a558SYan, Zheng 	while (1) {
22463fd0a558SYan, Zheng 		cond_resched();
22473fd0a558SYan, Zheng 		next = walk_up_backref(next, edges, &index);
22483fd0a558SYan, Zheng 		root = next->root;
22493fd0a558SYan, Zheng 		BUG_ON(!root);
22503fd0a558SYan, Zheng 
225125985edcSLucas De Marchi 		/* no other choice for non-references counted tree */
225227cdeb70SMiao Xie 		if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
22533fd0a558SYan, Zheng 			return root;
22543fd0a558SYan, Zheng 
22553fd0a558SYan, Zheng 		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
22563fd0a558SYan, Zheng 			fs_root = root;
22573fd0a558SYan, Zheng 
22583fd0a558SYan, Zheng 		if (next != node)
22593fd0a558SYan, Zheng 			return NULL;
22603fd0a558SYan, Zheng 
22613fd0a558SYan, Zheng 		next = walk_down_backref(edges, &index);
22623fd0a558SYan, Zheng 		if (!next || next->level <= node->level)
22633fd0a558SYan, Zheng 			break;
22643fd0a558SYan, Zheng 	}
22653fd0a558SYan, Zheng 
22663fd0a558SYan, Zheng 	if (!fs_root)
22673fd0a558SYan, Zheng 		return ERR_PTR(-ENOENT);
22683fd0a558SYan, Zheng 	return fs_root;
22695d4f98a2SYan Zheng }
22705d4f98a2SYan Zheng 
22715d4f98a2SYan Zheng static noinline_for_stack
22723fd0a558SYan, Zheng u64 calcu_metadata_size(struct reloc_control *rc,
2273a26195a5SQu Wenruo 			struct btrfs_backref_node *node, int reserve)
22745d4f98a2SYan Zheng {
22750b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2276a26195a5SQu Wenruo 	struct btrfs_backref_node *next = node;
2277a26195a5SQu Wenruo 	struct btrfs_backref_edge *edge;
2278a26195a5SQu Wenruo 	struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
22793fd0a558SYan, Zheng 	u64 num_bytes = 0;
22803fd0a558SYan, Zheng 	int index = 0;
22815d4f98a2SYan Zheng 
22823fd0a558SYan, Zheng 	BUG_ON(reserve && node->processed);
22833fd0a558SYan, Zheng 
22843fd0a558SYan, Zheng 	while (next) {
22853fd0a558SYan, Zheng 		cond_resched();
22865d4f98a2SYan Zheng 		while (1) {
22873fd0a558SYan, Zheng 			if (next->processed && (reserve || next != node))
22885d4f98a2SYan Zheng 				break;
22895d4f98a2SYan Zheng 
22900b246afaSJeff Mahoney 			num_bytes += fs_info->nodesize;
22913fd0a558SYan, Zheng 
22923fd0a558SYan, Zheng 			if (list_empty(&next->upper))
22933fd0a558SYan, Zheng 				break;
22943fd0a558SYan, Zheng 
22953fd0a558SYan, Zheng 			edge = list_entry(next->upper.next,
2296a26195a5SQu Wenruo 					struct btrfs_backref_edge, list[LOWER]);
22973fd0a558SYan, Zheng 			edges[index++] = edge;
22983fd0a558SYan, Zheng 			next = edge->node[UPPER];
22995d4f98a2SYan Zheng 		}
23003fd0a558SYan, Zheng 		next = walk_down_backref(edges, &index);
23013fd0a558SYan, Zheng 	}
23023fd0a558SYan, Zheng 	return num_bytes;
23033fd0a558SYan, Zheng }
23043fd0a558SYan, Zheng 
23053fd0a558SYan, Zheng static int reserve_metadata_space(struct btrfs_trans_handle *trans,
23063fd0a558SYan, Zheng 				  struct reloc_control *rc,
2307a26195a5SQu Wenruo 				  struct btrfs_backref_node *node)
23083fd0a558SYan, Zheng {
23093fd0a558SYan, Zheng 	struct btrfs_root *root = rc->extent_root;
2310da17066cSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
23113fd0a558SYan, Zheng 	u64 num_bytes;
23123fd0a558SYan, Zheng 	int ret;
23130647bf56SWang Shilong 	u64 tmp;
23143fd0a558SYan, Zheng 
23153fd0a558SYan, Zheng 	num_bytes = calcu_metadata_size(rc, node, 1) * 2;
23163fd0a558SYan, Zheng 
23173fd0a558SYan, Zheng 	trans->block_rsv = rc->block_rsv;
23180647bf56SWang Shilong 	rc->reserved_bytes += num_bytes;
23198ca17f0fSJosef Bacik 
23208ca17f0fSJosef Bacik 	/*
23218ca17f0fSJosef Bacik 	 * We are under a transaction here so we can only do limited flushing.
23228ca17f0fSJosef Bacik 	 * If we get an enospc just kick back -EAGAIN so we know to drop the
23238ca17f0fSJosef Bacik 	 * transaction and try to refill when we can flush all the things.
23248ca17f0fSJosef Bacik 	 */
23250647bf56SWang Shilong 	ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes,
23268ca17f0fSJosef Bacik 				BTRFS_RESERVE_FLUSH_LIMIT);
23273fd0a558SYan, Zheng 	if (ret) {
2328da17066cSJeff Mahoney 		tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES;
23290647bf56SWang Shilong 		while (tmp <= rc->reserved_bytes)
23300647bf56SWang Shilong 			tmp <<= 1;
23310647bf56SWang Shilong 		/*
23320647bf56SWang Shilong 		 * only one thread can access block_rsv at this point,
23330647bf56SWang Shilong 		 * so we don't need hold lock to protect block_rsv.
23340647bf56SWang Shilong 		 * we expand more reservation size here to allow enough
233552042d8eSAndrea Gelmini 		 * space for relocation and we will return earlier in
23360647bf56SWang Shilong 		 * enospc case.
23370647bf56SWang Shilong 		 */
2338da17066cSJeff Mahoney 		rc->block_rsv->size = tmp + fs_info->nodesize *
23390647bf56SWang Shilong 				      RELOCATION_RESERVED_NODES;
23408ca17f0fSJosef Bacik 		return -EAGAIN;
23413fd0a558SYan, Zheng 	}
23423fd0a558SYan, Zheng 
23433fd0a558SYan, Zheng 	return 0;
23443fd0a558SYan, Zheng }
23453fd0a558SYan, Zheng 
23465d4f98a2SYan Zheng /*
23475d4f98a2SYan Zheng  * relocate a block tree, and then update pointers in upper level
23485d4f98a2SYan Zheng  * blocks that reference the block to point to the new location.
23495d4f98a2SYan Zheng  *
23505d4f98a2SYan Zheng  * if called by link_to_upper, the block has already been relocated.
23515d4f98a2SYan Zheng  * in that case this function just updates pointers.
23525d4f98a2SYan Zheng  */
23535d4f98a2SYan Zheng static int do_relocation(struct btrfs_trans_handle *trans,
23543fd0a558SYan, Zheng 			 struct reloc_control *rc,
2355a26195a5SQu Wenruo 			 struct btrfs_backref_node *node,
23565d4f98a2SYan Zheng 			 struct btrfs_key *key,
23575d4f98a2SYan Zheng 			 struct btrfs_path *path, int lowest)
23585d4f98a2SYan Zheng {
23592ff7e61eSJeff Mahoney 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2360a26195a5SQu Wenruo 	struct btrfs_backref_node *upper;
2361a26195a5SQu Wenruo 	struct btrfs_backref_edge *edge;
2362a26195a5SQu Wenruo 	struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
23635d4f98a2SYan Zheng 	struct btrfs_root *root;
23645d4f98a2SYan Zheng 	struct extent_buffer *eb;
23655d4f98a2SYan Zheng 	u32 blocksize;
23665d4f98a2SYan Zheng 	u64 bytenr;
23675d4f98a2SYan Zheng 	u64 generation;
23685d4f98a2SYan Zheng 	int slot;
23695d4f98a2SYan Zheng 	int ret;
23705d4f98a2SYan Zheng 	int err = 0;
23715d4f98a2SYan Zheng 
23725d4f98a2SYan Zheng 	BUG_ON(lowest && node->eb);
23735d4f98a2SYan Zheng 
23745d4f98a2SYan Zheng 	path->lowest_level = node->level + 1;
23753fd0a558SYan, Zheng 	rc->backref_cache.path[node->level] = node;
23765d4f98a2SYan Zheng 	list_for_each_entry(edge, &node->upper, list[LOWER]) {
2377581c1760SQu Wenruo 		struct btrfs_key first_key;
237882fa113fSQu Wenruo 		struct btrfs_ref ref = { 0 };
2379581c1760SQu Wenruo 
23805d4f98a2SYan Zheng 		cond_resched();
23815d4f98a2SYan Zheng 
23825d4f98a2SYan Zheng 		upper = edge->node[UPPER];
2383dc4103f9SWang Shilong 		root = select_reloc_root(trans, rc, upper, edges);
23843fd0a558SYan, Zheng 		BUG_ON(!root);
23855d4f98a2SYan Zheng 
23863fd0a558SYan, Zheng 		if (upper->eb && !upper->locked) {
23873fd0a558SYan, Zheng 			if (!lowest) {
23883fd0a558SYan, Zheng 				ret = btrfs_bin_search(upper->eb, key,
23893fd0a558SYan, Zheng 						       upper->level, &slot);
2390cbca7d59SFilipe Manana 				if (ret < 0) {
2391cbca7d59SFilipe Manana 					err = ret;
2392cbca7d59SFilipe Manana 					goto next;
2393cbca7d59SFilipe Manana 				}
23943fd0a558SYan, Zheng 				BUG_ON(ret);
23953fd0a558SYan, Zheng 				bytenr = btrfs_node_blockptr(upper->eb, slot);
23963fd0a558SYan, Zheng 				if (node->eb->start == bytenr)
23973fd0a558SYan, Zheng 					goto next;
23983fd0a558SYan, Zheng 			}
2399b0fe7078SQu Wenruo 			btrfs_backref_drop_node_buffer(upper);
24003fd0a558SYan, Zheng 		}
24015d4f98a2SYan Zheng 
24025d4f98a2SYan Zheng 		if (!upper->eb) {
24035d4f98a2SYan Zheng 			ret = btrfs_search_slot(trans, root, key, path, 0, 1);
24043561b9dbSLiu Bo 			if (ret) {
24053561b9dbSLiu Bo 				if (ret < 0)
24065d4f98a2SYan Zheng 					err = ret;
24073561b9dbSLiu Bo 				else
24083561b9dbSLiu Bo 					err = -ENOENT;
24093561b9dbSLiu Bo 
24103561b9dbSLiu Bo 				btrfs_release_path(path);
24115d4f98a2SYan Zheng 				break;
24125d4f98a2SYan Zheng 			}
24135d4f98a2SYan Zheng 
24143fd0a558SYan, Zheng 			if (!upper->eb) {
24153fd0a558SYan, Zheng 				upper->eb = path->nodes[upper->level];
24163fd0a558SYan, Zheng 				path->nodes[upper->level] = NULL;
24173fd0a558SYan, Zheng 			} else {
24183fd0a558SYan, Zheng 				BUG_ON(upper->eb != path->nodes[upper->level]);
24193fd0a558SYan, Zheng 			}
24203fd0a558SYan, Zheng 
24213fd0a558SYan, Zheng 			upper->locked = 1;
24223fd0a558SYan, Zheng 			path->locks[upper->level] = 0;
24233fd0a558SYan, Zheng 
24245d4f98a2SYan Zheng 			slot = path->slots[upper->level];
2425b3b4aa74SDavid Sterba 			btrfs_release_path(path);
24265d4f98a2SYan Zheng 		} else {
24275d4f98a2SYan Zheng 			ret = btrfs_bin_search(upper->eb, key, upper->level,
24285d4f98a2SYan Zheng 					       &slot);
2429cbca7d59SFilipe Manana 			if (ret < 0) {
2430cbca7d59SFilipe Manana 				err = ret;
2431cbca7d59SFilipe Manana 				goto next;
2432cbca7d59SFilipe Manana 			}
24335d4f98a2SYan Zheng 			BUG_ON(ret);
24345d4f98a2SYan Zheng 		}
24355d4f98a2SYan Zheng 
24365d4f98a2SYan Zheng 		bytenr = btrfs_node_blockptr(upper->eb, slot);
24373fd0a558SYan, Zheng 		if (lowest) {
24384547f4d8SLiu Bo 			if (bytenr != node->bytenr) {
24394547f4d8SLiu Bo 				btrfs_err(root->fs_info,
24404547f4d8SLiu Bo 		"lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
24414547f4d8SLiu Bo 					  bytenr, node->bytenr, slot,
24424547f4d8SLiu Bo 					  upper->eb->start);
24434547f4d8SLiu Bo 				err = -EIO;
24444547f4d8SLiu Bo 				goto next;
24454547f4d8SLiu Bo 			}
24465d4f98a2SYan Zheng 		} else {
24473fd0a558SYan, Zheng 			if (node->eb->start == bytenr)
24483fd0a558SYan, Zheng 				goto next;
24495d4f98a2SYan Zheng 		}
24505d4f98a2SYan Zheng 
2451da17066cSJeff Mahoney 		blocksize = root->fs_info->nodesize;
24525d4f98a2SYan Zheng 		generation = btrfs_node_ptr_generation(upper->eb, slot);
2453581c1760SQu Wenruo 		btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
2454581c1760SQu Wenruo 		eb = read_tree_block(fs_info, bytenr, generation,
2455581c1760SQu Wenruo 				     upper->level - 1, &first_key);
245664c043deSLiu Bo 		if (IS_ERR(eb)) {
245764c043deSLiu Bo 			err = PTR_ERR(eb);
245864c043deSLiu Bo 			goto next;
245964c043deSLiu Bo 		} else if (!extent_buffer_uptodate(eb)) {
2460416bc658SJosef Bacik 			free_extent_buffer(eb);
246197d9a8a4STsutomu Itoh 			err = -EIO;
246297d9a8a4STsutomu Itoh 			goto next;
246397d9a8a4STsutomu Itoh 		}
24645d4f98a2SYan Zheng 		btrfs_tree_lock(eb);
24658bead258SDavid Sterba 		btrfs_set_lock_blocking_write(eb);
24665d4f98a2SYan Zheng 
24675d4f98a2SYan Zheng 		if (!node->eb) {
24685d4f98a2SYan Zheng 			ret = btrfs_cow_block(trans, root, eb, upper->eb,
24695d4f98a2SYan Zheng 					      slot, &eb);
24703fd0a558SYan, Zheng 			btrfs_tree_unlock(eb);
24713fd0a558SYan, Zheng 			free_extent_buffer(eb);
24725d4f98a2SYan Zheng 			if (ret < 0) {
24735d4f98a2SYan Zheng 				err = ret;
24743fd0a558SYan, Zheng 				goto next;
24755d4f98a2SYan Zheng 			}
24763fd0a558SYan, Zheng 			BUG_ON(node->eb != eb);
24775d4f98a2SYan Zheng 		} else {
24785d4f98a2SYan Zheng 			btrfs_set_node_blockptr(upper->eb, slot,
24795d4f98a2SYan Zheng 						node->eb->start);
24805d4f98a2SYan Zheng 			btrfs_set_node_ptr_generation(upper->eb, slot,
24815d4f98a2SYan Zheng 						      trans->transid);
24825d4f98a2SYan Zheng 			btrfs_mark_buffer_dirty(upper->eb);
24835d4f98a2SYan Zheng 
248482fa113fSQu Wenruo 			btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
24855d4f98a2SYan Zheng 					       node->eb->start, blocksize,
248682fa113fSQu Wenruo 					       upper->eb->start);
248782fa113fSQu Wenruo 			ref.real_root = root->root_key.objectid;
248882fa113fSQu Wenruo 			btrfs_init_tree_ref(&ref, node->level,
248982fa113fSQu Wenruo 					    btrfs_header_owner(upper->eb));
249082fa113fSQu Wenruo 			ret = btrfs_inc_extent_ref(trans, &ref);
24915d4f98a2SYan Zheng 			BUG_ON(ret);
24925d4f98a2SYan Zheng 
24935d4f98a2SYan Zheng 			ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
24945d4f98a2SYan Zheng 			BUG_ON(ret);
24955d4f98a2SYan Zheng 		}
24963fd0a558SYan, Zheng next:
24973fd0a558SYan, Zheng 		if (!upper->pending)
2498b0fe7078SQu Wenruo 			btrfs_backref_drop_node_buffer(upper);
24993fd0a558SYan, Zheng 		else
2500b0fe7078SQu Wenruo 			btrfs_backref_unlock_node_buffer(upper);
25013fd0a558SYan, Zheng 		if (err)
25023fd0a558SYan, Zheng 			break;
25035d4f98a2SYan Zheng 	}
25043fd0a558SYan, Zheng 
25053fd0a558SYan, Zheng 	if (!err && node->pending) {
2506b0fe7078SQu Wenruo 		btrfs_backref_drop_node_buffer(node);
25073fd0a558SYan, Zheng 		list_move_tail(&node->list, &rc->backref_cache.changed);
25083fd0a558SYan, Zheng 		node->pending = 0;
25095d4f98a2SYan Zheng 	}
25103fd0a558SYan, Zheng 
25115d4f98a2SYan Zheng 	path->lowest_level = 0;
25123fd0a558SYan, Zheng 	BUG_ON(err == -ENOSPC);
25135d4f98a2SYan Zheng 	return err;
25145d4f98a2SYan Zheng }
25155d4f98a2SYan Zheng 
25165d4f98a2SYan Zheng static int link_to_upper(struct btrfs_trans_handle *trans,
25173fd0a558SYan, Zheng 			 struct reloc_control *rc,
2518a26195a5SQu Wenruo 			 struct btrfs_backref_node *node,
25195d4f98a2SYan Zheng 			 struct btrfs_path *path)
25205d4f98a2SYan Zheng {
25215d4f98a2SYan Zheng 	struct btrfs_key key;
25225d4f98a2SYan Zheng 
25235d4f98a2SYan Zheng 	btrfs_node_key_to_cpu(node->eb, &key, 0);
25243fd0a558SYan, Zheng 	return do_relocation(trans, rc, node, &key, path, 0);
25255d4f98a2SYan Zheng }
25265d4f98a2SYan Zheng 
25275d4f98a2SYan Zheng static int finish_pending_nodes(struct btrfs_trans_handle *trans,
25283fd0a558SYan, Zheng 				struct reloc_control *rc,
25293fd0a558SYan, Zheng 				struct btrfs_path *path, int err)
25305d4f98a2SYan Zheng {
25313fd0a558SYan, Zheng 	LIST_HEAD(list);
2532a26195a5SQu Wenruo 	struct btrfs_backref_cache *cache = &rc->backref_cache;
2533a26195a5SQu Wenruo 	struct btrfs_backref_node *node;
25345d4f98a2SYan Zheng 	int level;
25355d4f98a2SYan Zheng 	int ret;
25365d4f98a2SYan Zheng 
25375d4f98a2SYan Zheng 	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
25385d4f98a2SYan Zheng 		while (!list_empty(&cache->pending[level])) {
25395d4f98a2SYan Zheng 			node = list_entry(cache->pending[level].next,
2540a26195a5SQu Wenruo 					  struct btrfs_backref_node, list);
25413fd0a558SYan, Zheng 			list_move_tail(&node->list, &list);
25423fd0a558SYan, Zheng 			BUG_ON(!node->pending);
25435d4f98a2SYan Zheng 
25443fd0a558SYan, Zheng 			if (!err) {
25453fd0a558SYan, Zheng 				ret = link_to_upper(trans, rc, node, path);
25465d4f98a2SYan Zheng 				if (ret < 0)
25475d4f98a2SYan Zheng 					err = ret;
25485d4f98a2SYan Zheng 			}
25495d4f98a2SYan Zheng 		}
25503fd0a558SYan, Zheng 		list_splice_init(&list, &cache->pending[level]);
25513fd0a558SYan, Zheng 	}
25525d4f98a2SYan Zheng 	return err;
25535d4f98a2SYan Zheng }
25545d4f98a2SYan Zheng 
25555d4f98a2SYan Zheng /*
25565d4f98a2SYan Zheng  * mark a block and all blocks directly/indirectly reference the block
25575d4f98a2SYan Zheng  * as processed.
25585d4f98a2SYan Zheng  */
25595d4f98a2SYan Zheng static void update_processed_blocks(struct reloc_control *rc,
2560a26195a5SQu Wenruo 				    struct btrfs_backref_node *node)
25615d4f98a2SYan Zheng {
2562a26195a5SQu Wenruo 	struct btrfs_backref_node *next = node;
2563a26195a5SQu Wenruo 	struct btrfs_backref_edge *edge;
2564a26195a5SQu Wenruo 	struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
25655d4f98a2SYan Zheng 	int index = 0;
25665d4f98a2SYan Zheng 
25675d4f98a2SYan Zheng 	while (next) {
25685d4f98a2SYan Zheng 		cond_resched();
25695d4f98a2SYan Zheng 		while (1) {
25705d4f98a2SYan Zheng 			if (next->processed)
25715d4f98a2SYan Zheng 				break;
25725d4f98a2SYan Zheng 
25739569cc20SQu Wenruo 			mark_block_processed(rc, next);
25745d4f98a2SYan Zheng 
25755d4f98a2SYan Zheng 			if (list_empty(&next->upper))
25765d4f98a2SYan Zheng 				break;
25775d4f98a2SYan Zheng 
25785d4f98a2SYan Zheng 			edge = list_entry(next->upper.next,
2579a26195a5SQu Wenruo 					struct btrfs_backref_edge, list[LOWER]);
25805d4f98a2SYan Zheng 			edges[index++] = edge;
25815d4f98a2SYan Zheng 			next = edge->node[UPPER];
25825d4f98a2SYan Zheng 		}
25835d4f98a2SYan Zheng 		next = walk_down_backref(edges, &index);
25845d4f98a2SYan Zheng 	}
25855d4f98a2SYan Zheng }
25865d4f98a2SYan Zheng 
25877476dfdaSDavid Sterba static int tree_block_processed(u64 bytenr, struct reloc_control *rc)
25885d4f98a2SYan Zheng {
2589da17066cSJeff Mahoney 	u32 blocksize = rc->extent_root->fs_info->nodesize;
25907476dfdaSDavid Sterba 
25915d4f98a2SYan Zheng 	if (test_range_bit(&rc->processed_blocks, bytenr,
25929655d298SChris Mason 			   bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
25935d4f98a2SYan Zheng 		return 1;
25945d4f98a2SYan Zheng 	return 0;
25955d4f98a2SYan Zheng }
25965d4f98a2SYan Zheng 
25972ff7e61eSJeff Mahoney static int get_tree_block_key(struct btrfs_fs_info *fs_info,
25985d4f98a2SYan Zheng 			      struct tree_block *block)
25995d4f98a2SYan Zheng {
26005d4f98a2SYan Zheng 	struct extent_buffer *eb;
26015d4f98a2SYan Zheng 
2602581c1760SQu Wenruo 	eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
2603581c1760SQu Wenruo 			     block->level, NULL);
260464c043deSLiu Bo 	if (IS_ERR(eb)) {
260564c043deSLiu Bo 		return PTR_ERR(eb);
260664c043deSLiu Bo 	} else if (!extent_buffer_uptodate(eb)) {
2607416bc658SJosef Bacik 		free_extent_buffer(eb);
2608416bc658SJosef Bacik 		return -EIO;
2609416bc658SJosef Bacik 	}
26105d4f98a2SYan Zheng 	if (block->level == 0)
26115d4f98a2SYan Zheng 		btrfs_item_key_to_cpu(eb, &block->key, 0);
26125d4f98a2SYan Zheng 	else
26135d4f98a2SYan Zheng 		btrfs_node_key_to_cpu(eb, &block->key, 0);
26145d4f98a2SYan Zheng 	free_extent_buffer(eb);
26155d4f98a2SYan Zheng 	block->key_ready = 1;
26165d4f98a2SYan Zheng 	return 0;
26175d4f98a2SYan Zheng }
26185d4f98a2SYan Zheng 
26195d4f98a2SYan Zheng /*
26205d4f98a2SYan Zheng  * helper function to relocate a tree block
26215d4f98a2SYan Zheng  */
26225d4f98a2SYan Zheng static int relocate_tree_block(struct btrfs_trans_handle *trans,
26235d4f98a2SYan Zheng 				struct reloc_control *rc,
2624a26195a5SQu Wenruo 				struct btrfs_backref_node *node,
26255d4f98a2SYan Zheng 				struct btrfs_key *key,
26265d4f98a2SYan Zheng 				struct btrfs_path *path)
26275d4f98a2SYan Zheng {
26285d4f98a2SYan Zheng 	struct btrfs_root *root;
26293fd0a558SYan, Zheng 	int ret = 0;
26305d4f98a2SYan Zheng 
26313fd0a558SYan, Zheng 	if (!node)
26325d4f98a2SYan Zheng 		return 0;
26333fd0a558SYan, Zheng 
26345f6b2e5cSJosef Bacik 	/*
26355f6b2e5cSJosef Bacik 	 * If we fail here we want to drop our backref_node because we are going
26365f6b2e5cSJosef Bacik 	 * to start over and regenerate the tree for it.
26375f6b2e5cSJosef Bacik 	 */
26385f6b2e5cSJosef Bacik 	ret = reserve_metadata_space(trans, rc, node);
26395f6b2e5cSJosef Bacik 	if (ret)
26405f6b2e5cSJosef Bacik 		goto out;
26415f6b2e5cSJosef Bacik 
26423fd0a558SYan, Zheng 	BUG_ON(node->processed);
2643147d256eSZhaolei 	root = select_one_root(node);
26443fd0a558SYan, Zheng 	if (root == ERR_PTR(-ENOENT)) {
26453fd0a558SYan, Zheng 		update_processed_blocks(rc, node);
26463fd0a558SYan, Zheng 		goto out;
26475d4f98a2SYan Zheng 	}
26485d4f98a2SYan Zheng 
26493fd0a558SYan, Zheng 	if (root) {
265027cdeb70SMiao Xie 		if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
26513fd0a558SYan, Zheng 			BUG_ON(node->new_bytenr);
26523fd0a558SYan, Zheng 			BUG_ON(!list_empty(&node->list));
26533fd0a558SYan, Zheng 			btrfs_record_root_in_trans(trans, root);
26543fd0a558SYan, Zheng 			root = root->reloc_root;
26553fd0a558SYan, Zheng 			node->new_bytenr = root->node->start;
265600246528SJosef Bacik 			btrfs_put_root(node->root);
265700246528SJosef Bacik 			node->root = btrfs_grab_root(root);
26580b530bc5SJosef Bacik 			ASSERT(node->root);
26593fd0a558SYan, Zheng 			list_add_tail(&node->list, &rc->backref_cache.changed);
26603fd0a558SYan, Zheng 		} else {
26615d4f98a2SYan Zheng 			path->lowest_level = node->level;
26625d4f98a2SYan Zheng 			ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2663b3b4aa74SDavid Sterba 			btrfs_release_path(path);
26643fd0a558SYan, Zheng 			if (ret > 0)
26655d4f98a2SYan Zheng 				ret = 0;
26663fd0a558SYan, Zheng 		}
26673fd0a558SYan, Zheng 		if (!ret)
26683fd0a558SYan, Zheng 			update_processed_blocks(rc, node);
26693fd0a558SYan, Zheng 	} else {
26703fd0a558SYan, Zheng 		ret = do_relocation(trans, rc, node, key, path, 1);
26713fd0a558SYan, Zheng 	}
26725d4f98a2SYan Zheng out:
26730647bf56SWang Shilong 	if (ret || node->level == 0 || node->cowonly)
2674023acb07SQu Wenruo 		btrfs_backref_cleanup_node(&rc->backref_cache, node);
26755d4f98a2SYan Zheng 	return ret;
26765d4f98a2SYan Zheng }
26775d4f98a2SYan Zheng 
26785d4f98a2SYan Zheng /*
26795d4f98a2SYan Zheng  * relocate a list of blocks
26805d4f98a2SYan Zheng  */
26815d4f98a2SYan Zheng static noinline_for_stack
26825d4f98a2SYan Zheng int relocate_tree_blocks(struct btrfs_trans_handle *trans,
26835d4f98a2SYan Zheng 			 struct reloc_control *rc, struct rb_root *blocks)
26845d4f98a2SYan Zheng {
26852ff7e61eSJeff Mahoney 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
2686a26195a5SQu Wenruo 	struct btrfs_backref_node *node;
26875d4f98a2SYan Zheng 	struct btrfs_path *path;
26885d4f98a2SYan Zheng 	struct tree_block *block;
268998ff7b94SQu Wenruo 	struct tree_block *next;
26905d4f98a2SYan Zheng 	int ret;
26915d4f98a2SYan Zheng 	int err = 0;
26925d4f98a2SYan Zheng 
26935d4f98a2SYan Zheng 	path = btrfs_alloc_path();
2694e1a12670SLiu Bo 	if (!path) {
2695e1a12670SLiu Bo 		err = -ENOMEM;
269634c2b290SDavid Sterba 		goto out_free_blocks;
2697e1a12670SLiu Bo 	}
26985d4f98a2SYan Zheng 
269998ff7b94SQu Wenruo 	/* Kick in readahead for tree blocks with missing keys */
270098ff7b94SQu Wenruo 	rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
27015d4f98a2SYan Zheng 		if (!block->key_ready)
27022ff7e61eSJeff Mahoney 			readahead_tree_block(fs_info, block->bytenr);
27035d4f98a2SYan Zheng 	}
27045d4f98a2SYan Zheng 
270598ff7b94SQu Wenruo 	/* Get first keys */
270698ff7b94SQu Wenruo 	rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
270734c2b290SDavid Sterba 		if (!block->key_ready) {
27082ff7e61eSJeff Mahoney 			err = get_tree_block_key(fs_info, block);
270934c2b290SDavid Sterba 			if (err)
271034c2b290SDavid Sterba 				goto out_free_path;
271134c2b290SDavid Sterba 		}
27125d4f98a2SYan Zheng 	}
27135d4f98a2SYan Zheng 
271498ff7b94SQu Wenruo 	/* Do tree relocation */
271598ff7b94SQu Wenruo 	rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
27163fd0a558SYan, Zheng 		node = build_backref_tree(rc, &block->key,
27175d4f98a2SYan Zheng 					  block->level, block->bytenr);
27185d4f98a2SYan Zheng 		if (IS_ERR(node)) {
27195d4f98a2SYan Zheng 			err = PTR_ERR(node);
27205d4f98a2SYan Zheng 			goto out;
27215d4f98a2SYan Zheng 		}
27225d4f98a2SYan Zheng 
27235d4f98a2SYan Zheng 		ret = relocate_tree_block(trans, rc, node, &block->key,
27245d4f98a2SYan Zheng 					  path);
27255d4f98a2SYan Zheng 		if (ret < 0) {
27265d4f98a2SYan Zheng 			err = ret;
272750dbbb71SJosef Bacik 			break;
27285d4f98a2SYan Zheng 		}
27295d4f98a2SYan Zheng 	}
27305d4f98a2SYan Zheng out:
27313fd0a558SYan, Zheng 	err = finish_pending_nodes(trans, rc, path, err);
27325d4f98a2SYan Zheng 
273334c2b290SDavid Sterba out_free_path:
27345d4f98a2SYan Zheng 	btrfs_free_path(path);
273534c2b290SDavid Sterba out_free_blocks:
2736e1a12670SLiu Bo 	free_block_list(blocks);
27375d4f98a2SYan Zheng 	return err;
27385d4f98a2SYan Zheng }
27395d4f98a2SYan Zheng 
27405d4f98a2SYan Zheng static noinline_for_stack
2741efa56464SYan, Zheng int prealloc_file_extent_cluster(struct inode *inode,
2742efa56464SYan, Zheng 				 struct file_extent_cluster *cluster)
2743efa56464SYan, Zheng {
2744efa56464SYan, Zheng 	u64 alloc_hint = 0;
2745efa56464SYan, Zheng 	u64 start;
2746efa56464SYan, Zheng 	u64 end;
2747efa56464SYan, Zheng 	u64 offset = BTRFS_I(inode)->index_cnt;
2748efa56464SYan, Zheng 	u64 num_bytes;
2749efa56464SYan, Zheng 	int nr = 0;
2750efa56464SYan, Zheng 	int ret = 0;
2751dcb40c19SWang Xiaoguang 	u64 prealloc_start = cluster->start - offset;
2752dcb40c19SWang Xiaoguang 	u64 prealloc_end = cluster->end - offset;
275318513091SWang Xiaoguang 	u64 cur_offset;
2754364ecf36SQu Wenruo 	struct extent_changeset *data_reserved = NULL;
2755efa56464SYan, Zheng 
2756efa56464SYan, Zheng 	BUG_ON(cluster->start != cluster->boundary[0]);
27575955102cSAl Viro 	inode_lock(inode);
2758efa56464SYan, Zheng 
2759364ecf36SQu Wenruo 	ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start,
2760dcb40c19SWang Xiaoguang 					  prealloc_end + 1 - prealloc_start);
2761efa56464SYan, Zheng 	if (ret)
2762efa56464SYan, Zheng 		goto out;
2763efa56464SYan, Zheng 
276418513091SWang Xiaoguang 	cur_offset = prealloc_start;
2765efa56464SYan, Zheng 	while (nr < cluster->nr) {
2766efa56464SYan, Zheng 		start = cluster->boundary[nr] - offset;
2767efa56464SYan, Zheng 		if (nr + 1 < cluster->nr)
2768efa56464SYan, Zheng 			end = cluster->boundary[nr + 1] - 1 - offset;
2769efa56464SYan, Zheng 		else
2770efa56464SYan, Zheng 			end = cluster->end - offset;
2771efa56464SYan, Zheng 
2772d0082371SJeff Mahoney 		lock_extent(&BTRFS_I(inode)->io_tree, start, end);
2773efa56464SYan, Zheng 		num_bytes = end + 1 - start;
277418513091SWang Xiaoguang 		if (cur_offset < start)
2775bc42bda2SQu Wenruo 			btrfs_free_reserved_data_space(inode, data_reserved,
2776bc42bda2SQu Wenruo 					cur_offset, start - cur_offset);
2777efa56464SYan, Zheng 		ret = btrfs_prealloc_file_range(inode, 0, start,
2778efa56464SYan, Zheng 						num_bytes, num_bytes,
2779efa56464SYan, Zheng 						end + 1, &alloc_hint);
278018513091SWang Xiaoguang 		cur_offset = end + 1;
2781d0082371SJeff Mahoney 		unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
2782efa56464SYan, Zheng 		if (ret)
2783efa56464SYan, Zheng 			break;
2784efa56464SYan, Zheng 		nr++;
2785efa56464SYan, Zheng 	}
278618513091SWang Xiaoguang 	if (cur_offset < prealloc_end)
2787bc42bda2SQu Wenruo 		btrfs_free_reserved_data_space(inode, data_reserved,
2788bc42bda2SQu Wenruo 				cur_offset, prealloc_end + 1 - cur_offset);
2789efa56464SYan, Zheng out:
27905955102cSAl Viro 	inode_unlock(inode);
2791364ecf36SQu Wenruo 	extent_changeset_free(data_reserved);
2792efa56464SYan, Zheng 	return ret;
2793efa56464SYan, Zheng }
2794efa56464SYan, Zheng 
2795efa56464SYan, Zheng static noinline_for_stack
27960257bb82SYan, Zheng int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
27970257bb82SYan, Zheng 			 u64 block_start)
27985d4f98a2SYan Zheng {
27995d4f98a2SYan Zheng 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
28005d4f98a2SYan Zheng 	struct extent_map *em;
28010257bb82SYan, Zheng 	int ret = 0;
28025d4f98a2SYan Zheng 
2803172ddd60SDavid Sterba 	em = alloc_extent_map();
28040257bb82SYan, Zheng 	if (!em)
28050257bb82SYan, Zheng 		return -ENOMEM;
28060257bb82SYan, Zheng 
28075d4f98a2SYan Zheng 	em->start = start;
28080257bb82SYan, Zheng 	em->len = end + 1 - start;
28090257bb82SYan, Zheng 	em->block_len = em->len;
28100257bb82SYan, Zheng 	em->block_start = block_start;
28115d4f98a2SYan Zheng 	set_bit(EXTENT_FLAG_PINNED, &em->flags);
28125d4f98a2SYan Zheng 
2813d0082371SJeff Mahoney 	lock_extent(&BTRFS_I(inode)->io_tree, start, end);
28145d4f98a2SYan Zheng 	while (1) {
2815890871beSChris Mason 		write_lock(&em_tree->lock);
281609a2a8f9SJosef Bacik 		ret = add_extent_mapping(em_tree, em, 0);
2817890871beSChris Mason 		write_unlock(&em_tree->lock);
28185d4f98a2SYan Zheng 		if (ret != -EEXIST) {
28195d4f98a2SYan Zheng 			free_extent_map(em);
28205d4f98a2SYan Zheng 			break;
28215d4f98a2SYan Zheng 		}
2822dcdbc059SNikolay Borisov 		btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
28235d4f98a2SYan Zheng 	}
2824d0082371SJeff Mahoney 	unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
28250257bb82SYan, Zheng 	return ret;
28260257bb82SYan, Zheng }
28275d4f98a2SYan Zheng 
2828726a3421SQu Wenruo /*
2829726a3421SQu Wenruo  * Allow error injection to test balance cancellation
2830726a3421SQu Wenruo  */
2831726a3421SQu Wenruo int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
2832726a3421SQu Wenruo {
2833726a3421SQu Wenruo 	return atomic_read(&fs_info->balance_cancel_req);
2834726a3421SQu Wenruo }
2835726a3421SQu Wenruo ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
2836726a3421SQu Wenruo 
28370257bb82SYan, Zheng static int relocate_file_extent_cluster(struct inode *inode,
28380257bb82SYan, Zheng 					struct file_extent_cluster *cluster)
28390257bb82SYan, Zheng {
28402ff7e61eSJeff Mahoney 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
28410257bb82SYan, Zheng 	u64 page_start;
28420257bb82SYan, Zheng 	u64 page_end;
28430257bb82SYan, Zheng 	u64 offset = BTRFS_I(inode)->index_cnt;
28440257bb82SYan, Zheng 	unsigned long index;
28450257bb82SYan, Zheng 	unsigned long last_index;
28460257bb82SYan, Zheng 	struct page *page;
28470257bb82SYan, Zheng 	struct file_ra_state *ra;
28483b16a4e3SJosef Bacik 	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
28490257bb82SYan, Zheng 	int nr = 0;
28500257bb82SYan, Zheng 	int ret = 0;
28510257bb82SYan, Zheng 
28520257bb82SYan, Zheng 	if (!cluster->nr)
28530257bb82SYan, Zheng 		return 0;
28540257bb82SYan, Zheng 
28550257bb82SYan, Zheng 	ra = kzalloc(sizeof(*ra), GFP_NOFS);
28560257bb82SYan, Zheng 	if (!ra)
28570257bb82SYan, Zheng 		return -ENOMEM;
28580257bb82SYan, Zheng 
2859efa56464SYan, Zheng 	ret = prealloc_file_extent_cluster(inode, cluster);
28600257bb82SYan, Zheng 	if (ret)
2861efa56464SYan, Zheng 		goto out;
28620257bb82SYan, Zheng 
28630257bb82SYan, Zheng 	file_ra_state_init(ra, inode->i_mapping);
28640257bb82SYan, Zheng 
2865efa56464SYan, Zheng 	ret = setup_extent_mapping(inode, cluster->start - offset,
2866efa56464SYan, Zheng 				   cluster->end - offset, cluster->start);
2867efa56464SYan, Zheng 	if (ret)
2868efa56464SYan, Zheng 		goto out;
2869efa56464SYan, Zheng 
287009cbfeafSKirill A. Shutemov 	index = (cluster->start - offset) >> PAGE_SHIFT;
287109cbfeafSKirill A. Shutemov 	last_index = (cluster->end - offset) >> PAGE_SHIFT;
28720257bb82SYan, Zheng 	while (index <= last_index) {
28739f3db423SNikolay Borisov 		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
28749f3db423SNikolay Borisov 				PAGE_SIZE);
2875efa56464SYan, Zheng 		if (ret)
2876efa56464SYan, Zheng 			goto out;
2877efa56464SYan, Zheng 
28780257bb82SYan, Zheng 		page = find_lock_page(inode->i_mapping, index);
28790257bb82SYan, Zheng 		if (!page) {
28800257bb82SYan, Zheng 			page_cache_sync_readahead(inode->i_mapping,
28810257bb82SYan, Zheng 						  ra, NULL, index,
28820257bb82SYan, Zheng 						  last_index + 1 - index);
2883a94733d0SJosef Bacik 			page = find_or_create_page(inode->i_mapping, index,
28843b16a4e3SJosef Bacik 						   mask);
28850257bb82SYan, Zheng 			if (!page) {
2886691fa059SNikolay Borisov 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
288743b18595SQu Wenruo 							PAGE_SIZE, true);
288844db1216SFilipe Manana 				btrfs_delalloc_release_extents(BTRFS_I(inode),
28898702ba93SQu Wenruo 							PAGE_SIZE);
28900257bb82SYan, Zheng 				ret = -ENOMEM;
2891efa56464SYan, Zheng 				goto out;
28920257bb82SYan, Zheng 			}
28930257bb82SYan, Zheng 		}
28940257bb82SYan, Zheng 
28950257bb82SYan, Zheng 		if (PageReadahead(page)) {
28960257bb82SYan, Zheng 			page_cache_async_readahead(inode->i_mapping,
28970257bb82SYan, Zheng 						   ra, NULL, page, index,
28980257bb82SYan, Zheng 						   last_index + 1 - index);
28990257bb82SYan, Zheng 		}
29000257bb82SYan, Zheng 
29010257bb82SYan, Zheng 		if (!PageUptodate(page)) {
29020257bb82SYan, Zheng 			btrfs_readpage(NULL, page);
29030257bb82SYan, Zheng 			lock_page(page);
29040257bb82SYan, Zheng 			if (!PageUptodate(page)) {
29050257bb82SYan, Zheng 				unlock_page(page);
290609cbfeafSKirill A. Shutemov 				put_page(page);
2907691fa059SNikolay Borisov 				btrfs_delalloc_release_metadata(BTRFS_I(inode),
290843b18595SQu Wenruo 							PAGE_SIZE, true);
29098b62f87bSJosef Bacik 				btrfs_delalloc_release_extents(BTRFS_I(inode),
29108702ba93SQu Wenruo 							       PAGE_SIZE);
29110257bb82SYan, Zheng 				ret = -EIO;
2912efa56464SYan, Zheng 				goto out;
29130257bb82SYan, Zheng 			}
29140257bb82SYan, Zheng 		}
29150257bb82SYan, Zheng 
29164eee4fa4SMiao Xie 		page_start = page_offset(page);
291709cbfeafSKirill A. Shutemov 		page_end = page_start + PAGE_SIZE - 1;
29180257bb82SYan, Zheng 
2919d0082371SJeff Mahoney 		lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
29200257bb82SYan, Zheng 
29210257bb82SYan, Zheng 		set_page_extent_mapped(page);
29220257bb82SYan, Zheng 
29230257bb82SYan, Zheng 		if (nr < cluster->nr &&
29240257bb82SYan, Zheng 		    page_start + offset == cluster->boundary[nr]) {
29250257bb82SYan, Zheng 			set_extent_bits(&BTRFS_I(inode)->io_tree,
29260257bb82SYan, Zheng 					page_start, page_end,
2927ceeb0ae7SDavid Sterba 					EXTENT_BOUNDARY);
29280257bb82SYan, Zheng 			nr++;
29290257bb82SYan, Zheng 		}
29300257bb82SYan, Zheng 
2931765f3cebSNikolay Borisov 		ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
2932330a5827SNikolay Borisov 						NULL);
2933765f3cebSNikolay Borisov 		if (ret) {
2934765f3cebSNikolay Borisov 			unlock_page(page);
2935765f3cebSNikolay Borisov 			put_page(page);
2936765f3cebSNikolay Borisov 			btrfs_delalloc_release_metadata(BTRFS_I(inode),
293743b18595SQu Wenruo 							 PAGE_SIZE, true);
2938765f3cebSNikolay Borisov 			btrfs_delalloc_release_extents(BTRFS_I(inode),
29398702ba93SQu Wenruo 			                               PAGE_SIZE);
2940765f3cebSNikolay Borisov 
2941765f3cebSNikolay Borisov 			clear_extent_bits(&BTRFS_I(inode)->io_tree,
2942765f3cebSNikolay Borisov 					  page_start, page_end,
2943765f3cebSNikolay Borisov 					  EXTENT_LOCKED | EXTENT_BOUNDARY);
2944765f3cebSNikolay Borisov 			goto out;
2945765f3cebSNikolay Borisov 
2946765f3cebSNikolay Borisov 		}
29470257bb82SYan, Zheng 		set_page_dirty(page);
29480257bb82SYan, Zheng 
29490257bb82SYan, Zheng 		unlock_extent(&BTRFS_I(inode)->io_tree,
2950d0082371SJeff Mahoney 			      page_start, page_end);
29510257bb82SYan, Zheng 		unlock_page(page);
295209cbfeafSKirill A. Shutemov 		put_page(page);
29530257bb82SYan, Zheng 
29540257bb82SYan, Zheng 		index++;
29558702ba93SQu Wenruo 		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
2956efa56464SYan, Zheng 		balance_dirty_pages_ratelimited(inode->i_mapping);
29572ff7e61eSJeff Mahoney 		btrfs_throttle(fs_info);
29587f913c7cSQu Wenruo 		if (btrfs_should_cancel_balance(fs_info)) {
29597f913c7cSQu Wenruo 			ret = -ECANCELED;
29607f913c7cSQu Wenruo 			goto out;
29617f913c7cSQu Wenruo 		}
29620257bb82SYan, Zheng 	}
29630257bb82SYan, Zheng 	WARN_ON(nr != cluster->nr);
2964efa56464SYan, Zheng out:
29650257bb82SYan, Zheng 	kfree(ra);
29660257bb82SYan, Zheng 	return ret;
29670257bb82SYan, Zheng }
29680257bb82SYan, Zheng 
29690257bb82SYan, Zheng static noinline_for_stack
29700257bb82SYan, Zheng int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
29710257bb82SYan, Zheng 			 struct file_extent_cluster *cluster)
29720257bb82SYan, Zheng {
29730257bb82SYan, Zheng 	int ret;
29740257bb82SYan, Zheng 
29750257bb82SYan, Zheng 	if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
29760257bb82SYan, Zheng 		ret = relocate_file_extent_cluster(inode, cluster);
29770257bb82SYan, Zheng 		if (ret)
29780257bb82SYan, Zheng 			return ret;
29790257bb82SYan, Zheng 		cluster->nr = 0;
29800257bb82SYan, Zheng 	}
29810257bb82SYan, Zheng 
29820257bb82SYan, Zheng 	if (!cluster->nr)
29830257bb82SYan, Zheng 		cluster->start = extent_key->objectid;
29840257bb82SYan, Zheng 	else
29850257bb82SYan, Zheng 		BUG_ON(cluster->nr >= MAX_EXTENTS);
29860257bb82SYan, Zheng 	cluster->end = extent_key->objectid + extent_key->offset - 1;
29870257bb82SYan, Zheng 	cluster->boundary[cluster->nr] = extent_key->objectid;
29880257bb82SYan, Zheng 	cluster->nr++;
29890257bb82SYan, Zheng 
29900257bb82SYan, Zheng 	if (cluster->nr >= MAX_EXTENTS) {
29910257bb82SYan, Zheng 		ret = relocate_file_extent_cluster(inode, cluster);
29920257bb82SYan, Zheng 		if (ret)
29930257bb82SYan, Zheng 			return ret;
29940257bb82SYan, Zheng 		cluster->nr = 0;
29950257bb82SYan, Zheng 	}
29960257bb82SYan, Zheng 	return 0;
29975d4f98a2SYan Zheng }
29985d4f98a2SYan Zheng 
29995d4f98a2SYan Zheng /*
30005d4f98a2SYan Zheng  * helper to add a tree block to the list.
30015d4f98a2SYan Zheng  * the major work is getting the generation and level of the block
30025d4f98a2SYan Zheng  */
30035d4f98a2SYan Zheng static int add_tree_block(struct reloc_control *rc,
30045d4f98a2SYan Zheng 			  struct btrfs_key *extent_key,
30055d4f98a2SYan Zheng 			  struct btrfs_path *path,
30065d4f98a2SYan Zheng 			  struct rb_root *blocks)
30075d4f98a2SYan Zheng {
30085d4f98a2SYan Zheng 	struct extent_buffer *eb;
30095d4f98a2SYan Zheng 	struct btrfs_extent_item *ei;
30105d4f98a2SYan Zheng 	struct btrfs_tree_block_info *bi;
30115d4f98a2SYan Zheng 	struct tree_block *block;
30125d4f98a2SYan Zheng 	struct rb_node *rb_node;
30135d4f98a2SYan Zheng 	u32 item_size;
30145d4f98a2SYan Zheng 	int level = -1;
30157fdf4b60SWang Shilong 	u64 generation;
30165d4f98a2SYan Zheng 
30175d4f98a2SYan Zheng 	eb =  path->nodes[0];
30185d4f98a2SYan Zheng 	item_size = btrfs_item_size_nr(eb, path->slots[0]);
30195d4f98a2SYan Zheng 
30203173a18fSJosef Bacik 	if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
30213173a18fSJosef Bacik 	    item_size >= sizeof(*ei) + sizeof(*bi)) {
30225d4f98a2SYan Zheng 		ei = btrfs_item_ptr(eb, path->slots[0],
30235d4f98a2SYan Zheng 				struct btrfs_extent_item);
30243173a18fSJosef Bacik 		if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
30255d4f98a2SYan Zheng 			bi = (struct btrfs_tree_block_info *)(ei + 1);
30265d4f98a2SYan Zheng 			level = btrfs_tree_block_level(eb, bi);
30275d4f98a2SYan Zheng 		} else {
30283173a18fSJosef Bacik 			level = (int)extent_key->offset;
30293173a18fSJosef Bacik 		}
30303173a18fSJosef Bacik 		generation = btrfs_extent_generation(eb, ei);
30316d8ff4e4SDavid Sterba 	} else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3032ba3c2b19SNikolay Borisov 		btrfs_print_v0_err(eb->fs_info);
3033ba3c2b19SNikolay Borisov 		btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
3034ba3c2b19SNikolay Borisov 		return -EINVAL;
30353173a18fSJosef Bacik 	} else {
30365d4f98a2SYan Zheng 		BUG();
30375d4f98a2SYan Zheng 	}
30385d4f98a2SYan Zheng 
3039b3b4aa74SDavid Sterba 	btrfs_release_path(path);
30405d4f98a2SYan Zheng 
30415d4f98a2SYan Zheng 	BUG_ON(level == -1);
30425d4f98a2SYan Zheng 
30435d4f98a2SYan Zheng 	block = kmalloc(sizeof(*block), GFP_NOFS);
30445d4f98a2SYan Zheng 	if (!block)
30455d4f98a2SYan Zheng 		return -ENOMEM;
30465d4f98a2SYan Zheng 
30475d4f98a2SYan Zheng 	block->bytenr = extent_key->objectid;
3048da17066cSJeff Mahoney 	block->key.objectid = rc->extent_root->fs_info->nodesize;
30495d4f98a2SYan Zheng 	block->key.offset = generation;
30505d4f98a2SYan Zheng 	block->level = level;
30515d4f98a2SYan Zheng 	block->key_ready = 0;
30525d4f98a2SYan Zheng 
3053e9a28dc5SQu Wenruo 	rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node);
305443c04fb1SJeff Mahoney 	if (rb_node)
3055982c92cbSQu Wenruo 		btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr,
3056982c92cbSQu Wenruo 				    -EEXIST);
30575d4f98a2SYan Zheng 
30585d4f98a2SYan Zheng 	return 0;
30595d4f98a2SYan Zheng }
30605d4f98a2SYan Zheng 
30615d4f98a2SYan Zheng /*
30625d4f98a2SYan Zheng  * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
30635d4f98a2SYan Zheng  */
30645d4f98a2SYan Zheng static int __add_tree_block(struct reloc_control *rc,
30655d4f98a2SYan Zheng 			    u64 bytenr, u32 blocksize,
30665d4f98a2SYan Zheng 			    struct rb_root *blocks)
30675d4f98a2SYan Zheng {
30680b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
30695d4f98a2SYan Zheng 	struct btrfs_path *path;
30705d4f98a2SYan Zheng 	struct btrfs_key key;
30715d4f98a2SYan Zheng 	int ret;
30720b246afaSJeff Mahoney 	bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
30735d4f98a2SYan Zheng 
30747476dfdaSDavid Sterba 	if (tree_block_processed(bytenr, rc))
30755d4f98a2SYan Zheng 		return 0;
30765d4f98a2SYan Zheng 
3077e9a28dc5SQu Wenruo 	if (rb_simple_search(blocks, bytenr))
30785d4f98a2SYan Zheng 		return 0;
30795d4f98a2SYan Zheng 
30805d4f98a2SYan Zheng 	path = btrfs_alloc_path();
30815d4f98a2SYan Zheng 	if (!path)
30825d4f98a2SYan Zheng 		return -ENOMEM;
3083aee68ee5SJosef Bacik again:
30845d4f98a2SYan Zheng 	key.objectid = bytenr;
3085aee68ee5SJosef Bacik 	if (skinny) {
3086aee68ee5SJosef Bacik 		key.type = BTRFS_METADATA_ITEM_KEY;
3087aee68ee5SJosef Bacik 		key.offset = (u64)-1;
3088aee68ee5SJosef Bacik 	} else {
30895d4f98a2SYan Zheng 		key.type = BTRFS_EXTENT_ITEM_KEY;
30905d4f98a2SYan Zheng 		key.offset = blocksize;
3091aee68ee5SJosef Bacik 	}
30925d4f98a2SYan Zheng 
30935d4f98a2SYan Zheng 	path->search_commit_root = 1;
30945d4f98a2SYan Zheng 	path->skip_locking = 1;
30955d4f98a2SYan Zheng 	ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
30965d4f98a2SYan Zheng 	if (ret < 0)
30975d4f98a2SYan Zheng 		goto out;
30985d4f98a2SYan Zheng 
3099aee68ee5SJosef Bacik 	if (ret > 0 && skinny) {
3100aee68ee5SJosef Bacik 		if (path->slots[0]) {
3101aee68ee5SJosef Bacik 			path->slots[0]--;
3102aee68ee5SJosef Bacik 			btrfs_item_key_to_cpu(path->nodes[0], &key,
3103aee68ee5SJosef Bacik 					      path->slots[0]);
31043173a18fSJosef Bacik 			if (key.objectid == bytenr &&
3105aee68ee5SJosef Bacik 			    (key.type == BTRFS_METADATA_ITEM_KEY ||
3106aee68ee5SJosef Bacik 			     (key.type == BTRFS_EXTENT_ITEM_KEY &&
3107aee68ee5SJosef Bacik 			      key.offset == blocksize)))
31083173a18fSJosef Bacik 				ret = 0;
31093173a18fSJosef Bacik 		}
3110aee68ee5SJosef Bacik 
3111aee68ee5SJosef Bacik 		if (ret) {
3112aee68ee5SJosef Bacik 			skinny = false;
3113aee68ee5SJosef Bacik 			btrfs_release_path(path);
3114aee68ee5SJosef Bacik 			goto again;
3115aee68ee5SJosef Bacik 		}
3116aee68ee5SJosef Bacik 	}
3117cdccee99SLiu Bo 	if (ret) {
3118cdccee99SLiu Bo 		ASSERT(ret == 1);
3119cdccee99SLiu Bo 		btrfs_print_leaf(path->nodes[0]);
3120cdccee99SLiu Bo 		btrfs_err(fs_info,
3121cdccee99SLiu Bo 	     "tree block extent item (%llu) is not found in extent tree",
3122cdccee99SLiu Bo 		     bytenr);
3123cdccee99SLiu Bo 		WARN_ON(1);
3124cdccee99SLiu Bo 		ret = -EINVAL;
3125cdccee99SLiu Bo 		goto out;
3126cdccee99SLiu Bo 	}
31273173a18fSJosef Bacik 
31285d4f98a2SYan Zheng 	ret = add_tree_block(rc, &key, path, blocks);
31295d4f98a2SYan Zheng out:
31305d4f98a2SYan Zheng 	btrfs_free_path(path);
31315d4f98a2SYan Zheng 	return ret;
31325d4f98a2SYan Zheng }
31335d4f98a2SYan Zheng 
31340af3d00bSJosef Bacik static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
313532da5386SDavid Sterba 				    struct btrfs_block_group *block_group,
31361bbc621eSChris Mason 				    struct inode *inode,
31371bbc621eSChris Mason 				    u64 ino)
31380af3d00bSJosef Bacik {
31390af3d00bSJosef Bacik 	struct btrfs_key key;
31400af3d00bSJosef Bacik 	struct btrfs_root *root = fs_info->tree_root;
31410af3d00bSJosef Bacik 	struct btrfs_trans_handle *trans;
31420af3d00bSJosef Bacik 	int ret = 0;
31430af3d00bSJosef Bacik 
31440af3d00bSJosef Bacik 	if (inode)
31450af3d00bSJosef Bacik 		goto truncate;
31460af3d00bSJosef Bacik 
31470af3d00bSJosef Bacik 	key.objectid = ino;
31480af3d00bSJosef Bacik 	key.type = BTRFS_INODE_ITEM_KEY;
31490af3d00bSJosef Bacik 	key.offset = 0;
31500af3d00bSJosef Bacik 
31514c66e0d4SDavid Sterba 	inode = btrfs_iget(fs_info->sb, &key, root);
31522e19f1f9SAl Viro 	if (IS_ERR(inode))
31530af3d00bSJosef Bacik 		return -ENOENT;
31540af3d00bSJosef Bacik 
31550af3d00bSJosef Bacik truncate:
31562ff7e61eSJeff Mahoney 	ret = btrfs_check_trunc_cache_free_space(fs_info,
31577b61cd92SMiao Xie 						 &fs_info->global_block_rsv);
31587b61cd92SMiao Xie 	if (ret)
31597b61cd92SMiao Xie 		goto out;
31607b61cd92SMiao Xie 
31617a7eaa40SJosef Bacik 	trans = btrfs_join_transaction(root);
31620af3d00bSJosef Bacik 	if (IS_ERR(trans)) {
31633612b495STsutomu Itoh 		ret = PTR_ERR(trans);
31640af3d00bSJosef Bacik 		goto out;
31650af3d00bSJosef Bacik 	}
31660af3d00bSJosef Bacik 
316777ab86bfSJeff Mahoney 	ret = btrfs_truncate_free_space_cache(trans, block_group, inode);
31680af3d00bSJosef Bacik 
31693a45bb20SJeff Mahoney 	btrfs_end_transaction(trans);
31702ff7e61eSJeff Mahoney 	btrfs_btree_balance_dirty(fs_info);
31710af3d00bSJosef Bacik out:
31720af3d00bSJosef Bacik 	iput(inode);
31730af3d00bSJosef Bacik 	return ret;
31740af3d00bSJosef Bacik }
31750af3d00bSJosef Bacik 
31765d4f98a2SYan Zheng /*
317719b546d7SQu Wenruo  * Locate the free space cache EXTENT_DATA in root tree leaf and delete the
317819b546d7SQu Wenruo  * cache inode, to avoid free space cache data extent blocking data relocation.
31795d4f98a2SYan Zheng  */
318019b546d7SQu Wenruo static int delete_v1_space_cache(struct extent_buffer *leaf,
318119b546d7SQu Wenruo 				 struct btrfs_block_group *block_group,
318219b546d7SQu Wenruo 				 u64 data_bytenr)
31835d4f98a2SYan Zheng {
318419b546d7SQu Wenruo 	u64 space_cache_ino;
318519b546d7SQu Wenruo 	struct btrfs_file_extent_item *ei;
31865d4f98a2SYan Zheng 	struct btrfs_key key;
318719b546d7SQu Wenruo 	bool found = false;
318819b546d7SQu Wenruo 	int i;
31895d4f98a2SYan Zheng 	int ret;
31905d4f98a2SYan Zheng 
319119b546d7SQu Wenruo 	if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID)
319219b546d7SQu Wenruo 		return 0;
31935d4f98a2SYan Zheng 
319419b546d7SQu Wenruo 	for (i = 0; i < btrfs_header_nritems(leaf); i++) {
319519b546d7SQu Wenruo 		btrfs_item_key_to_cpu(leaf, &key, i);
319619b546d7SQu Wenruo 		if (key.type != BTRFS_EXTENT_DATA_KEY)
319719b546d7SQu Wenruo 			continue;
319819b546d7SQu Wenruo 		ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
319919b546d7SQu Wenruo 		if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG &&
320019b546d7SQu Wenruo 		    btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) {
320119b546d7SQu Wenruo 			found = true;
320219b546d7SQu Wenruo 			space_cache_ino = key.objectid;
320319b546d7SQu Wenruo 			break;
320419b546d7SQu Wenruo 		}
320519b546d7SQu Wenruo 	}
320619b546d7SQu Wenruo 	if (!found)
320719b546d7SQu Wenruo 		return -ENOENT;
320819b546d7SQu Wenruo 	ret = delete_block_group_cache(leaf->fs_info, block_group, NULL,
320919b546d7SQu Wenruo 					space_cache_ino);
32100af3d00bSJosef Bacik 	return ret;
32115d4f98a2SYan Zheng }
32125d4f98a2SYan Zheng 
32135d4f98a2SYan Zheng /*
32142c016dc2SLiu Bo  * helper to find all tree blocks that reference a given data extent
32155d4f98a2SYan Zheng  */
32165d4f98a2SYan Zheng static noinline_for_stack
32175d4f98a2SYan Zheng int add_data_references(struct reloc_control *rc,
32185d4f98a2SYan Zheng 			struct btrfs_key *extent_key,
32195d4f98a2SYan Zheng 			struct btrfs_path *path,
32205d4f98a2SYan Zheng 			struct rb_root *blocks)
32215d4f98a2SYan Zheng {
322219b546d7SQu Wenruo 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
322319b546d7SQu Wenruo 	struct ulist *leaves = NULL;
322419b546d7SQu Wenruo 	struct ulist_iterator leaf_uiter;
322519b546d7SQu Wenruo 	struct ulist_node *ref_node = NULL;
322619b546d7SQu Wenruo 	const u32 blocksize = fs_info->nodesize;
3227647f63bdSFilipe David Borba Manana 	int ret = 0;
32285d4f98a2SYan Zheng 
3229b3b4aa74SDavid Sterba 	btrfs_release_path(path);
323019b546d7SQu Wenruo 	ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid,
323119b546d7SQu Wenruo 				   0, &leaves, NULL, true);
323219b546d7SQu Wenruo 	if (ret < 0)
323319b546d7SQu Wenruo 		return ret;
323419b546d7SQu Wenruo 
323519b546d7SQu Wenruo 	ULIST_ITER_INIT(&leaf_uiter);
323619b546d7SQu Wenruo 	while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
323719b546d7SQu Wenruo 		struct extent_buffer *eb;
323819b546d7SQu Wenruo 
323919b546d7SQu Wenruo 		eb = read_tree_block(fs_info, ref_node->val, 0, 0, NULL);
324019b546d7SQu Wenruo 		if (IS_ERR(eb)) {
324119b546d7SQu Wenruo 			ret = PTR_ERR(eb);
324219b546d7SQu Wenruo 			break;
324319b546d7SQu Wenruo 		}
324419b546d7SQu Wenruo 		ret = delete_v1_space_cache(eb, rc->block_group,
324519b546d7SQu Wenruo 					    extent_key->objectid);
324619b546d7SQu Wenruo 		free_extent_buffer(eb);
324719b546d7SQu Wenruo 		if (ret < 0)
324819b546d7SQu Wenruo 			break;
324919b546d7SQu Wenruo 		ret = __add_tree_block(rc, ref_node->val, blocksize, blocks);
325019b546d7SQu Wenruo 		if (ret < 0)
325119b546d7SQu Wenruo 			break;
325219b546d7SQu Wenruo 	}
325319b546d7SQu Wenruo 	if (ret < 0)
32545d4f98a2SYan Zheng 		free_block_list(blocks);
325519b546d7SQu Wenruo 	ulist_free(leaves);
325619b546d7SQu Wenruo 	return ret;
32575d4f98a2SYan Zheng }
32585d4f98a2SYan Zheng 
32595d4f98a2SYan Zheng /*
32602c016dc2SLiu Bo  * helper to find next unprocessed extent
32615d4f98a2SYan Zheng  */
32625d4f98a2SYan Zheng static noinline_for_stack
3263147d256eSZhaolei int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
32643fd0a558SYan, Zheng 		     struct btrfs_key *extent_key)
32655d4f98a2SYan Zheng {
32660b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
32675d4f98a2SYan Zheng 	struct btrfs_key key;
32685d4f98a2SYan Zheng 	struct extent_buffer *leaf;
32695d4f98a2SYan Zheng 	u64 start, end, last;
32705d4f98a2SYan Zheng 	int ret;
32715d4f98a2SYan Zheng 
3272b3470b5dSDavid Sterba 	last = rc->block_group->start + rc->block_group->length;
32735d4f98a2SYan Zheng 	while (1) {
32745d4f98a2SYan Zheng 		cond_resched();
32755d4f98a2SYan Zheng 		if (rc->search_start >= last) {
32765d4f98a2SYan Zheng 			ret = 1;
32775d4f98a2SYan Zheng 			break;
32785d4f98a2SYan Zheng 		}
32795d4f98a2SYan Zheng 
32805d4f98a2SYan Zheng 		key.objectid = rc->search_start;
32815d4f98a2SYan Zheng 		key.type = BTRFS_EXTENT_ITEM_KEY;
32825d4f98a2SYan Zheng 		key.offset = 0;
32835d4f98a2SYan Zheng 
32845d4f98a2SYan Zheng 		path->search_commit_root = 1;
32855d4f98a2SYan Zheng 		path->skip_locking = 1;
32865d4f98a2SYan Zheng 		ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
32875d4f98a2SYan Zheng 					0, 0);
32885d4f98a2SYan Zheng 		if (ret < 0)
32895d4f98a2SYan Zheng 			break;
32905d4f98a2SYan Zheng next:
32915d4f98a2SYan Zheng 		leaf = path->nodes[0];
32925d4f98a2SYan Zheng 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
32935d4f98a2SYan Zheng 			ret = btrfs_next_leaf(rc->extent_root, path);
32945d4f98a2SYan Zheng 			if (ret != 0)
32955d4f98a2SYan Zheng 				break;
32965d4f98a2SYan Zheng 			leaf = path->nodes[0];
32975d4f98a2SYan Zheng 		}
32985d4f98a2SYan Zheng 
32995d4f98a2SYan Zheng 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
33005d4f98a2SYan Zheng 		if (key.objectid >= last) {
33015d4f98a2SYan Zheng 			ret = 1;
33025d4f98a2SYan Zheng 			break;
33035d4f98a2SYan Zheng 		}
33045d4f98a2SYan Zheng 
33053173a18fSJosef Bacik 		if (key.type != BTRFS_EXTENT_ITEM_KEY &&
33063173a18fSJosef Bacik 		    key.type != BTRFS_METADATA_ITEM_KEY) {
33073173a18fSJosef Bacik 			path->slots[0]++;
33083173a18fSJosef Bacik 			goto next;
33093173a18fSJosef Bacik 		}
33103173a18fSJosef Bacik 
33113173a18fSJosef Bacik 		if (key.type == BTRFS_EXTENT_ITEM_KEY &&
33125d4f98a2SYan Zheng 		    key.objectid + key.offset <= rc->search_start) {
33135d4f98a2SYan Zheng 			path->slots[0]++;
33145d4f98a2SYan Zheng 			goto next;
33155d4f98a2SYan Zheng 		}
33165d4f98a2SYan Zheng 
33173173a18fSJosef Bacik 		if (key.type == BTRFS_METADATA_ITEM_KEY &&
33180b246afaSJeff Mahoney 		    key.objectid + fs_info->nodesize <=
33193173a18fSJosef Bacik 		    rc->search_start) {
33203173a18fSJosef Bacik 			path->slots[0]++;
33213173a18fSJosef Bacik 			goto next;
33223173a18fSJosef Bacik 		}
33233173a18fSJosef Bacik 
33245d4f98a2SYan Zheng 		ret = find_first_extent_bit(&rc->processed_blocks,
33255d4f98a2SYan Zheng 					    key.objectid, &start, &end,
3326e6138876SJosef Bacik 					    EXTENT_DIRTY, NULL);
33275d4f98a2SYan Zheng 
33285d4f98a2SYan Zheng 		if (ret == 0 && start <= key.objectid) {
3329b3b4aa74SDavid Sterba 			btrfs_release_path(path);
33305d4f98a2SYan Zheng 			rc->search_start = end + 1;
33315d4f98a2SYan Zheng 		} else {
33323173a18fSJosef Bacik 			if (key.type == BTRFS_EXTENT_ITEM_KEY)
33335d4f98a2SYan Zheng 				rc->search_start = key.objectid + key.offset;
33343173a18fSJosef Bacik 			else
33353173a18fSJosef Bacik 				rc->search_start = key.objectid +
33360b246afaSJeff Mahoney 					fs_info->nodesize;
33373fd0a558SYan, Zheng 			memcpy(extent_key, &key, sizeof(key));
33385d4f98a2SYan Zheng 			return 0;
33395d4f98a2SYan Zheng 		}
33405d4f98a2SYan Zheng 	}
3341b3b4aa74SDavid Sterba 	btrfs_release_path(path);
33425d4f98a2SYan Zheng 	return ret;
33435d4f98a2SYan Zheng }
33445d4f98a2SYan Zheng 
33455d4f98a2SYan Zheng static void set_reloc_control(struct reloc_control *rc)
33465d4f98a2SYan Zheng {
33475d4f98a2SYan Zheng 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
33487585717fSChris Mason 
33497585717fSChris Mason 	mutex_lock(&fs_info->reloc_mutex);
33505d4f98a2SYan Zheng 	fs_info->reloc_ctl = rc;
33517585717fSChris Mason 	mutex_unlock(&fs_info->reloc_mutex);
33525d4f98a2SYan Zheng }
33535d4f98a2SYan Zheng 
33545d4f98a2SYan Zheng static void unset_reloc_control(struct reloc_control *rc)
33555d4f98a2SYan Zheng {
33565d4f98a2SYan Zheng 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
33577585717fSChris Mason 
33587585717fSChris Mason 	mutex_lock(&fs_info->reloc_mutex);
33595d4f98a2SYan Zheng 	fs_info->reloc_ctl = NULL;
33607585717fSChris Mason 	mutex_unlock(&fs_info->reloc_mutex);
33615d4f98a2SYan Zheng }
33625d4f98a2SYan Zheng 
33635d4f98a2SYan Zheng static int check_extent_flags(u64 flags)
33645d4f98a2SYan Zheng {
33655d4f98a2SYan Zheng 	if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
33665d4f98a2SYan Zheng 	    (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
33675d4f98a2SYan Zheng 		return 1;
33685d4f98a2SYan Zheng 	if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
33695d4f98a2SYan Zheng 	    !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
33705d4f98a2SYan Zheng 		return 1;
33715d4f98a2SYan Zheng 	if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
33725d4f98a2SYan Zheng 	    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
33735d4f98a2SYan Zheng 		return 1;
33745d4f98a2SYan Zheng 	return 0;
33755d4f98a2SYan Zheng }
33765d4f98a2SYan Zheng 
33773fd0a558SYan, Zheng static noinline_for_stack
33783fd0a558SYan, Zheng int prepare_to_relocate(struct reloc_control *rc)
33793fd0a558SYan, Zheng {
33803fd0a558SYan, Zheng 	struct btrfs_trans_handle *trans;
3381ac2fabacSJosef Bacik 	int ret;
33823fd0a558SYan, Zheng 
33832ff7e61eSJeff Mahoney 	rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info,
338466d8f3ddSMiao Xie 					      BTRFS_BLOCK_RSV_TEMP);
33853fd0a558SYan, Zheng 	if (!rc->block_rsv)
33863fd0a558SYan, Zheng 		return -ENOMEM;
33873fd0a558SYan, Zheng 
33883fd0a558SYan, Zheng 	memset(&rc->cluster, 0, sizeof(rc->cluster));
3389b3470b5dSDavid Sterba 	rc->search_start = rc->block_group->start;
33903fd0a558SYan, Zheng 	rc->extents_found = 0;
33913fd0a558SYan, Zheng 	rc->nodes_relocated = 0;
33923fd0a558SYan, Zheng 	rc->merging_rsv_size = 0;
33930647bf56SWang Shilong 	rc->reserved_bytes = 0;
3394da17066cSJeff Mahoney 	rc->block_rsv->size = rc->extent_root->fs_info->nodesize *
33950647bf56SWang Shilong 			      RELOCATION_RESERVED_NODES;
3396ac2fabacSJosef Bacik 	ret = btrfs_block_rsv_refill(rc->extent_root,
3397ac2fabacSJosef Bacik 				     rc->block_rsv, rc->block_rsv->size,
3398ac2fabacSJosef Bacik 				     BTRFS_RESERVE_FLUSH_ALL);
3399ac2fabacSJosef Bacik 	if (ret)
3400ac2fabacSJosef Bacik 		return ret;
34013fd0a558SYan, Zheng 
34023fd0a558SYan, Zheng 	rc->create_reloc_tree = 1;
34033fd0a558SYan, Zheng 	set_reloc_control(rc);
34043fd0a558SYan, Zheng 
34057a7eaa40SJosef Bacik 	trans = btrfs_join_transaction(rc->extent_root);
340628818947SLiu Bo 	if (IS_ERR(trans)) {
340728818947SLiu Bo 		unset_reloc_control(rc);
340828818947SLiu Bo 		/*
340928818947SLiu Bo 		 * extent tree is not a ref_cow tree and has no reloc_root to
341028818947SLiu Bo 		 * cleanup.  And callers are responsible to free the above
341128818947SLiu Bo 		 * block rsv.
341228818947SLiu Bo 		 */
341328818947SLiu Bo 		return PTR_ERR(trans);
341428818947SLiu Bo 	}
34153a45bb20SJeff Mahoney 	btrfs_commit_transaction(trans);
34163fd0a558SYan, Zheng 	return 0;
34173fd0a558SYan, Zheng }
341876dda93cSYan, Zheng 
34195d4f98a2SYan Zheng static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
34205d4f98a2SYan Zheng {
34212ff7e61eSJeff Mahoney 	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
34225d4f98a2SYan Zheng 	struct rb_root blocks = RB_ROOT;
34235d4f98a2SYan Zheng 	struct btrfs_key key;
34245d4f98a2SYan Zheng 	struct btrfs_trans_handle *trans = NULL;
34255d4f98a2SYan Zheng 	struct btrfs_path *path;
34265d4f98a2SYan Zheng 	struct btrfs_extent_item *ei;
34275d4f98a2SYan Zheng 	u64 flags;
34285d4f98a2SYan Zheng 	u32 item_size;
34295d4f98a2SYan Zheng 	int ret;
34305d4f98a2SYan Zheng 	int err = 0;
3431c87f08caSChris Mason 	int progress = 0;
34325d4f98a2SYan Zheng 
34335d4f98a2SYan Zheng 	path = btrfs_alloc_path();
34343fd0a558SYan, Zheng 	if (!path)
34355d4f98a2SYan Zheng 		return -ENOMEM;
3436e4058b54SDavid Sterba 	path->reada = READA_FORWARD;
34373fd0a558SYan, Zheng 
34383fd0a558SYan, Zheng 	ret = prepare_to_relocate(rc);
34393fd0a558SYan, Zheng 	if (ret) {
34403fd0a558SYan, Zheng 		err = ret;
34413fd0a558SYan, Zheng 		goto out_free;
34422423fdfbSJiri Slaby 	}
34435d4f98a2SYan Zheng 
34445d4f98a2SYan Zheng 	while (1) {
34450647bf56SWang Shilong 		rc->reserved_bytes = 0;
34460647bf56SWang Shilong 		ret = btrfs_block_rsv_refill(rc->extent_root,
34470647bf56SWang Shilong 					rc->block_rsv, rc->block_rsv->size,
34480647bf56SWang Shilong 					BTRFS_RESERVE_FLUSH_ALL);
34490647bf56SWang Shilong 		if (ret) {
34500647bf56SWang Shilong 			err = ret;
34510647bf56SWang Shilong 			break;
34520647bf56SWang Shilong 		}
3453c87f08caSChris Mason 		progress++;
3454a22285a6SYan, Zheng 		trans = btrfs_start_transaction(rc->extent_root, 0);
34550f788c58SLiu Bo 		if (IS_ERR(trans)) {
34560f788c58SLiu Bo 			err = PTR_ERR(trans);
34570f788c58SLiu Bo 			trans = NULL;
34580f788c58SLiu Bo 			break;
34590f788c58SLiu Bo 		}
3460c87f08caSChris Mason restart:
34613fd0a558SYan, Zheng 		if (update_backref_cache(trans, &rc->backref_cache)) {
34623a45bb20SJeff Mahoney 			btrfs_end_transaction(trans);
346342a657f5SPan Bian 			trans = NULL;
34643fd0a558SYan, Zheng 			continue;
34653fd0a558SYan, Zheng 		}
34663fd0a558SYan, Zheng 
3467147d256eSZhaolei 		ret = find_next_extent(rc, path, &key);
34685d4f98a2SYan Zheng 		if (ret < 0)
34695d4f98a2SYan Zheng 			err = ret;
34705d4f98a2SYan Zheng 		if (ret != 0)
34715d4f98a2SYan Zheng 			break;
34725d4f98a2SYan Zheng 
34735d4f98a2SYan Zheng 		rc->extents_found++;
34745d4f98a2SYan Zheng 
34755d4f98a2SYan Zheng 		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
34765d4f98a2SYan Zheng 				    struct btrfs_extent_item);
34773fd0a558SYan, Zheng 		item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
34785d4f98a2SYan Zheng 		if (item_size >= sizeof(*ei)) {
34795d4f98a2SYan Zheng 			flags = btrfs_extent_flags(path->nodes[0], ei);
34805d4f98a2SYan Zheng 			ret = check_extent_flags(flags);
34815d4f98a2SYan Zheng 			BUG_ON(ret);
34826d8ff4e4SDavid Sterba 		} else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
3483ba3c2b19SNikolay Borisov 			err = -EINVAL;
3484ba3c2b19SNikolay Borisov 			btrfs_print_v0_err(trans->fs_info);
3485ba3c2b19SNikolay Borisov 			btrfs_abort_transaction(trans, err);
3486ba3c2b19SNikolay Borisov 			break;
34875d4f98a2SYan Zheng 		} else {
34885d4f98a2SYan Zheng 			BUG();
34895d4f98a2SYan Zheng 		}
34905d4f98a2SYan Zheng 
34915d4f98a2SYan Zheng 		if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
34925d4f98a2SYan Zheng 			ret = add_tree_block(rc, &key, path, &blocks);
34935d4f98a2SYan Zheng 		} else if (rc->stage == UPDATE_DATA_PTRS &&
34945d4f98a2SYan Zheng 			   (flags & BTRFS_EXTENT_FLAG_DATA)) {
34955d4f98a2SYan Zheng 			ret = add_data_references(rc, &key, path, &blocks);
34965d4f98a2SYan Zheng 		} else {
3497b3b4aa74SDavid Sterba 			btrfs_release_path(path);
34985d4f98a2SYan Zheng 			ret = 0;
34995d4f98a2SYan Zheng 		}
35005d4f98a2SYan Zheng 		if (ret < 0) {
35013fd0a558SYan, Zheng 			err = ret;
35025d4f98a2SYan Zheng 			break;
35035d4f98a2SYan Zheng 		}
35045d4f98a2SYan Zheng 
35055d4f98a2SYan Zheng 		if (!RB_EMPTY_ROOT(&blocks)) {
35065d4f98a2SYan Zheng 			ret = relocate_tree_blocks(trans, rc, &blocks);
35075d4f98a2SYan Zheng 			if (ret < 0) {
35083fd0a558SYan, Zheng 				if (ret != -EAGAIN) {
35095d4f98a2SYan Zheng 					err = ret;
35105d4f98a2SYan Zheng 					break;
35115d4f98a2SYan Zheng 				}
35123fd0a558SYan, Zheng 				rc->extents_found--;
35133fd0a558SYan, Zheng 				rc->search_start = key.objectid;
35143fd0a558SYan, Zheng 			}
35155d4f98a2SYan Zheng 		}
35165d4f98a2SYan Zheng 
35173a45bb20SJeff Mahoney 		btrfs_end_transaction_throttle(trans);
35182ff7e61eSJeff Mahoney 		btrfs_btree_balance_dirty(fs_info);
35193fd0a558SYan, Zheng 		trans = NULL;
35205d4f98a2SYan Zheng 
35215d4f98a2SYan Zheng 		if (rc->stage == MOVE_DATA_EXTENTS &&
35225d4f98a2SYan Zheng 		    (flags & BTRFS_EXTENT_FLAG_DATA)) {
35235d4f98a2SYan Zheng 			rc->found_file_extent = 1;
35240257bb82SYan, Zheng 			ret = relocate_data_extent(rc->data_inode,
35253fd0a558SYan, Zheng 						   &key, &rc->cluster);
35265d4f98a2SYan Zheng 			if (ret < 0) {
35275d4f98a2SYan Zheng 				err = ret;
35285d4f98a2SYan Zheng 				break;
35295d4f98a2SYan Zheng 			}
35305d4f98a2SYan Zheng 		}
3531f31ea088SQu Wenruo 		if (btrfs_should_cancel_balance(fs_info)) {
3532f31ea088SQu Wenruo 			err = -ECANCELED;
3533f31ea088SQu Wenruo 			break;
3534f31ea088SQu Wenruo 		}
35355d4f98a2SYan Zheng 	}
3536c87f08caSChris Mason 	if (trans && progress && err == -ENOSPC) {
353743a7e99dSNikolay Borisov 		ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags);
35389689457bSShilong Wang 		if (ret == 1) {
3539c87f08caSChris Mason 			err = 0;
3540c87f08caSChris Mason 			progress = 0;
3541c87f08caSChris Mason 			goto restart;
3542c87f08caSChris Mason 		}
3543c87f08caSChris Mason 	}
35443fd0a558SYan, Zheng 
3545b3b4aa74SDavid Sterba 	btrfs_release_path(path);
354691166212SDavid Sterba 	clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY);
35475d4f98a2SYan Zheng 
35485d4f98a2SYan Zheng 	if (trans) {
35493a45bb20SJeff Mahoney 		btrfs_end_transaction_throttle(trans);
35502ff7e61eSJeff Mahoney 		btrfs_btree_balance_dirty(fs_info);
35515d4f98a2SYan Zheng 	}
35525d4f98a2SYan Zheng 
35530257bb82SYan, Zheng 	if (!err) {
35543fd0a558SYan, Zheng 		ret = relocate_file_extent_cluster(rc->data_inode,
35553fd0a558SYan, Zheng 						   &rc->cluster);
35560257bb82SYan, Zheng 		if (ret < 0)
35570257bb82SYan, Zheng 			err = ret;
35580257bb82SYan, Zheng 	}
35590257bb82SYan, Zheng 
35603fd0a558SYan, Zheng 	rc->create_reloc_tree = 0;
35613fd0a558SYan, Zheng 	set_reloc_control(rc);
35620257bb82SYan, Zheng 
356313fe1bdbSQu Wenruo 	btrfs_backref_release_cache(&rc->backref_cache);
356463f018beSNikolay Borisov 	btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
35655d4f98a2SYan Zheng 
35667f913c7cSQu Wenruo 	/*
35677f913c7cSQu Wenruo 	 * Even in the case when the relocation is cancelled, we should all go
35687f913c7cSQu Wenruo 	 * through prepare_to_merge() and merge_reloc_roots().
35697f913c7cSQu Wenruo 	 *
35707f913c7cSQu Wenruo 	 * For error (including cancelled balance), prepare_to_merge() will
35717f913c7cSQu Wenruo 	 * mark all reloc trees orphan, then queue them for cleanup in
35727f913c7cSQu Wenruo 	 * merge_reloc_roots()
35737f913c7cSQu Wenruo 	 */
35743fd0a558SYan, Zheng 	err = prepare_to_merge(rc, err);
35755d4f98a2SYan Zheng 
35765d4f98a2SYan Zheng 	merge_reloc_roots(rc);
35775d4f98a2SYan Zheng 
35783fd0a558SYan, Zheng 	rc->merge_reloc_tree = 0;
35795d4f98a2SYan Zheng 	unset_reloc_control(rc);
358063f018beSNikolay Borisov 	btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL);
35815d4f98a2SYan Zheng 
35825d4f98a2SYan Zheng 	/* get rid of pinned extents */
35837a7eaa40SJosef Bacik 	trans = btrfs_join_transaction(rc->extent_root);
358462b99540SQu Wenruo 	if (IS_ERR(trans)) {
35853612b495STsutomu Itoh 		err = PTR_ERR(trans);
358662b99540SQu Wenruo 		goto out_free;
358762b99540SQu Wenruo 	}
35883a45bb20SJeff Mahoney 	btrfs_commit_transaction(trans);
35896217b0faSJosef Bacik out_free:
3590d2311e69SQu Wenruo 	ret = clean_dirty_subvols(rc);
3591d2311e69SQu Wenruo 	if (ret < 0 && !err)
3592d2311e69SQu Wenruo 		err = ret;
35932ff7e61eSJeff Mahoney 	btrfs_free_block_rsv(fs_info, rc->block_rsv);
35943fd0a558SYan, Zheng 	btrfs_free_path(path);
35955d4f98a2SYan Zheng 	return err;
35965d4f98a2SYan Zheng }
35975d4f98a2SYan Zheng 
35985d4f98a2SYan Zheng static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
35990257bb82SYan, Zheng 				 struct btrfs_root *root, u64 objectid)
36005d4f98a2SYan Zheng {
36015d4f98a2SYan Zheng 	struct btrfs_path *path;
36025d4f98a2SYan Zheng 	struct btrfs_inode_item *item;
36035d4f98a2SYan Zheng 	struct extent_buffer *leaf;
36045d4f98a2SYan Zheng 	int ret;
36055d4f98a2SYan Zheng 
36065d4f98a2SYan Zheng 	path = btrfs_alloc_path();
36075d4f98a2SYan Zheng 	if (!path)
36085d4f98a2SYan Zheng 		return -ENOMEM;
36095d4f98a2SYan Zheng 
36105d4f98a2SYan Zheng 	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
36115d4f98a2SYan Zheng 	if (ret)
36125d4f98a2SYan Zheng 		goto out;
36135d4f98a2SYan Zheng 
36145d4f98a2SYan Zheng 	leaf = path->nodes[0];
36155d4f98a2SYan Zheng 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3616b159fa28SDavid Sterba 	memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
36175d4f98a2SYan Zheng 	btrfs_set_inode_generation(leaf, item, 1);
36180257bb82SYan, Zheng 	btrfs_set_inode_size(leaf, item, 0);
36195d4f98a2SYan Zheng 	btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
36203fd0a558SYan, Zheng 	btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
36213fd0a558SYan, Zheng 					  BTRFS_INODE_PREALLOC);
36225d4f98a2SYan Zheng 	btrfs_mark_buffer_dirty(leaf);
36235d4f98a2SYan Zheng out:
36245d4f98a2SYan Zheng 	btrfs_free_path(path);
36255d4f98a2SYan Zheng 	return ret;
36265d4f98a2SYan Zheng }
36275d4f98a2SYan Zheng 
36285d4f98a2SYan Zheng /*
36295d4f98a2SYan Zheng  * helper to create inode for data relocation.
36305d4f98a2SYan Zheng  * the inode is in data relocation tree and its link count is 0
36315d4f98a2SYan Zheng  */
36323fd0a558SYan, Zheng static noinline_for_stack
36333fd0a558SYan, Zheng struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
363432da5386SDavid Sterba 				 struct btrfs_block_group *group)
36355d4f98a2SYan Zheng {
36365d4f98a2SYan Zheng 	struct inode *inode = NULL;
36375d4f98a2SYan Zheng 	struct btrfs_trans_handle *trans;
36385d4f98a2SYan Zheng 	struct btrfs_root *root;
36395d4f98a2SYan Zheng 	struct btrfs_key key;
36404624900dSZhaolei 	u64 objectid;
36415d4f98a2SYan Zheng 	int err = 0;
36425d4f98a2SYan Zheng 
36435d4f98a2SYan Zheng 	root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
36445d4f98a2SYan Zheng 	if (IS_ERR(root))
36455d4f98a2SYan Zheng 		return ERR_CAST(root);
36465d4f98a2SYan Zheng 
3647a22285a6SYan, Zheng 	trans = btrfs_start_transaction(root, 6);
364876deacf0SJosef Bacik 	if (IS_ERR(trans)) {
364900246528SJosef Bacik 		btrfs_put_root(root);
36503fd0a558SYan, Zheng 		return ERR_CAST(trans);
365176deacf0SJosef Bacik 	}
36525d4f98a2SYan Zheng 
3653581bb050SLi Zefan 	err = btrfs_find_free_objectid(root, &objectid);
36545d4f98a2SYan Zheng 	if (err)
36555d4f98a2SYan Zheng 		goto out;
36565d4f98a2SYan Zheng 
36570257bb82SYan, Zheng 	err = __insert_orphan_inode(trans, root, objectid);
36585d4f98a2SYan Zheng 	BUG_ON(err);
36595d4f98a2SYan Zheng 
36605d4f98a2SYan Zheng 	key.objectid = objectid;
36615d4f98a2SYan Zheng 	key.type = BTRFS_INODE_ITEM_KEY;
36625d4f98a2SYan Zheng 	key.offset = 0;
36634c66e0d4SDavid Sterba 	inode = btrfs_iget(fs_info->sb, &key, root);
36642e19f1f9SAl Viro 	BUG_ON(IS_ERR(inode));
3665b3470b5dSDavid Sterba 	BTRFS_I(inode)->index_cnt = group->start;
36665d4f98a2SYan Zheng 
366773f2e545SNikolay Borisov 	err = btrfs_orphan_add(trans, BTRFS_I(inode));
36685d4f98a2SYan Zheng out:
366900246528SJosef Bacik 	btrfs_put_root(root);
36703a45bb20SJeff Mahoney 	btrfs_end_transaction(trans);
36712ff7e61eSJeff Mahoney 	btrfs_btree_balance_dirty(fs_info);
36725d4f98a2SYan Zheng 	if (err) {
36735d4f98a2SYan Zheng 		if (inode)
36745d4f98a2SYan Zheng 			iput(inode);
36755d4f98a2SYan Zheng 		inode = ERR_PTR(err);
36765d4f98a2SYan Zheng 	}
36775d4f98a2SYan Zheng 	return inode;
36785d4f98a2SYan Zheng }
36795d4f98a2SYan Zheng 
3680c258d6e3SQu Wenruo static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
36813fd0a558SYan, Zheng {
36823fd0a558SYan, Zheng 	struct reloc_control *rc;
36833fd0a558SYan, Zheng 
36843fd0a558SYan, Zheng 	rc = kzalloc(sizeof(*rc), GFP_NOFS);
36853fd0a558SYan, Zheng 	if (!rc)
36863fd0a558SYan, Zheng 		return NULL;
36873fd0a558SYan, Zheng 
36883fd0a558SYan, Zheng 	INIT_LIST_HEAD(&rc->reloc_roots);
3689d2311e69SQu Wenruo 	INIT_LIST_HEAD(&rc->dirty_subvol_roots);
3690584fb121SQu Wenruo 	btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1);
36913fd0a558SYan, Zheng 	mapping_tree_init(&rc->reloc_root_tree);
369243eb5f29SQu Wenruo 	extent_io_tree_init(fs_info, &rc->processed_blocks,
369343eb5f29SQu Wenruo 			    IO_TREE_RELOC_BLOCKS, NULL);
36943fd0a558SYan, Zheng 	return rc;
36953fd0a558SYan, Zheng }
36963fd0a558SYan, Zheng 
36971a0afa0eSJosef Bacik static void free_reloc_control(struct reloc_control *rc)
36981a0afa0eSJosef Bacik {
36991a0afa0eSJosef Bacik 	struct mapping_node *node, *tmp;
37001a0afa0eSJosef Bacik 
37011a0afa0eSJosef Bacik 	free_reloc_roots(&rc->reloc_roots);
37021a0afa0eSJosef Bacik 	rbtree_postorder_for_each_entry_safe(node, tmp,
37031a0afa0eSJosef Bacik 			&rc->reloc_root_tree.rb_root, rb_node)
37041a0afa0eSJosef Bacik 		kfree(node);
37051a0afa0eSJosef Bacik 
37061a0afa0eSJosef Bacik 	kfree(rc);
37071a0afa0eSJosef Bacik }
37081a0afa0eSJosef Bacik 
37095d4f98a2SYan Zheng /*
3710ebce0e01SAdam Borowski  * Print the block group being relocated
3711ebce0e01SAdam Borowski  */
3712ebce0e01SAdam Borowski static void describe_relocation(struct btrfs_fs_info *fs_info,
371332da5386SDavid Sterba 				struct btrfs_block_group *block_group)
3714ebce0e01SAdam Borowski {
3715f89e09cfSAnand Jain 	char buf[128] = {'\0'};
3716ebce0e01SAdam Borowski 
3717f89e09cfSAnand Jain 	btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
3718ebce0e01SAdam Borowski 
3719ebce0e01SAdam Borowski 	btrfs_info(fs_info,
3720ebce0e01SAdam Borowski 		   "relocating block group %llu flags %s",
3721b3470b5dSDavid Sterba 		   block_group->start, buf);
3722ebce0e01SAdam Borowski }
3723ebce0e01SAdam Borowski 
3724430640e3SQu Wenruo static const char *stage_to_string(int stage)
3725430640e3SQu Wenruo {
3726430640e3SQu Wenruo 	if (stage == MOVE_DATA_EXTENTS)
3727430640e3SQu Wenruo 		return "move data extents";
3728430640e3SQu Wenruo 	if (stage == UPDATE_DATA_PTRS)
3729430640e3SQu Wenruo 		return "update data pointers";
3730430640e3SQu Wenruo 	return "unknown";
3731430640e3SQu Wenruo }
3732430640e3SQu Wenruo 
3733ebce0e01SAdam Borowski /*
37345d4f98a2SYan Zheng  * function to relocate all extents in a block group.
37355d4f98a2SYan Zheng  */
37366bccf3abSJeff Mahoney int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
37375d4f98a2SYan Zheng {
373832da5386SDavid Sterba 	struct btrfs_block_group *bg;
37396bccf3abSJeff Mahoney 	struct btrfs_root *extent_root = fs_info->extent_root;
37405d4f98a2SYan Zheng 	struct reloc_control *rc;
37410af3d00bSJosef Bacik 	struct inode *inode;
37420af3d00bSJosef Bacik 	struct btrfs_path *path;
37435d4f98a2SYan Zheng 	int ret;
3744f0486c68SYan, Zheng 	int rw = 0;
37455d4f98a2SYan Zheng 	int err = 0;
37465d4f98a2SYan Zheng 
3747eede2bf3SOmar Sandoval 	bg = btrfs_lookup_block_group(fs_info, group_start);
3748eede2bf3SOmar Sandoval 	if (!bg)
3749eede2bf3SOmar Sandoval 		return -ENOENT;
3750eede2bf3SOmar Sandoval 
3751eede2bf3SOmar Sandoval 	if (btrfs_pinned_by_swapfile(fs_info, bg)) {
3752eede2bf3SOmar Sandoval 		btrfs_put_block_group(bg);
3753eede2bf3SOmar Sandoval 		return -ETXTBSY;
3754eede2bf3SOmar Sandoval 	}
3755eede2bf3SOmar Sandoval 
3756c258d6e3SQu Wenruo 	rc = alloc_reloc_control(fs_info);
3757eede2bf3SOmar Sandoval 	if (!rc) {
3758eede2bf3SOmar Sandoval 		btrfs_put_block_group(bg);
37595d4f98a2SYan Zheng 		return -ENOMEM;
3760eede2bf3SOmar Sandoval 	}
37615d4f98a2SYan Zheng 
3762f0486c68SYan, Zheng 	rc->extent_root = extent_root;
3763eede2bf3SOmar Sandoval 	rc->block_group = bg;
37645d4f98a2SYan Zheng 
3765b12de528SQu Wenruo 	ret = btrfs_inc_block_group_ro(rc->block_group, true);
3766f0486c68SYan, Zheng 	if (ret) {
3767f0486c68SYan, Zheng 		err = ret;
3768f0486c68SYan, Zheng 		goto out;
3769f0486c68SYan, Zheng 	}
3770f0486c68SYan, Zheng 	rw = 1;
3771f0486c68SYan, Zheng 
37720af3d00bSJosef Bacik 	path = btrfs_alloc_path();
37730af3d00bSJosef Bacik 	if (!path) {
37740af3d00bSJosef Bacik 		err = -ENOMEM;
37750af3d00bSJosef Bacik 		goto out;
37760af3d00bSJosef Bacik 	}
37770af3d00bSJosef Bacik 
37787949f339SDavid Sterba 	inode = lookup_free_space_inode(rc->block_group, path);
37790af3d00bSJosef Bacik 	btrfs_free_path(path);
37800af3d00bSJosef Bacik 
37810af3d00bSJosef Bacik 	if (!IS_ERR(inode))
37821bbc621eSChris Mason 		ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
37830af3d00bSJosef Bacik 	else
37840af3d00bSJosef Bacik 		ret = PTR_ERR(inode);
37850af3d00bSJosef Bacik 
37860af3d00bSJosef Bacik 	if (ret && ret != -ENOENT) {
37870af3d00bSJosef Bacik 		err = ret;
37880af3d00bSJosef Bacik 		goto out;
37890af3d00bSJosef Bacik 	}
37900af3d00bSJosef Bacik 
37915d4f98a2SYan Zheng 	rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
37925d4f98a2SYan Zheng 	if (IS_ERR(rc->data_inode)) {
37935d4f98a2SYan Zheng 		err = PTR_ERR(rc->data_inode);
37945d4f98a2SYan Zheng 		rc->data_inode = NULL;
37955d4f98a2SYan Zheng 		goto out;
37965d4f98a2SYan Zheng 	}
37975d4f98a2SYan Zheng 
37980b246afaSJeff Mahoney 	describe_relocation(fs_info, rc->block_group);
37995d4f98a2SYan Zheng 
38009cfa3e34SFilipe Manana 	btrfs_wait_block_group_reservations(rc->block_group);
3801f78c436cSFilipe Manana 	btrfs_wait_nocow_writers(rc->block_group);
38026374e57aSChris Mason 	btrfs_wait_ordered_roots(fs_info, U64_MAX,
3803b3470b5dSDavid Sterba 				 rc->block_group->start,
3804b3470b5dSDavid Sterba 				 rc->block_group->length);
38055d4f98a2SYan Zheng 
38065d4f98a2SYan Zheng 	while (1) {
3807430640e3SQu Wenruo 		int finishes_stage;
3808430640e3SQu Wenruo 
380976dda93cSYan, Zheng 		mutex_lock(&fs_info->cleaner_mutex);
38105d4f98a2SYan Zheng 		ret = relocate_block_group(rc);
381176dda93cSYan, Zheng 		mutex_unlock(&fs_info->cleaner_mutex);
3812ff612ba7SJosef Bacik 		if (ret < 0)
38135d4f98a2SYan Zheng 			err = ret;
3814ff612ba7SJosef Bacik 
3815430640e3SQu Wenruo 		finishes_stage = rc->stage;
3816ff612ba7SJosef Bacik 		/*
3817ff612ba7SJosef Bacik 		 * We may have gotten ENOSPC after we already dirtied some
3818ff612ba7SJosef Bacik 		 * extents.  If writeout happens while we're relocating a
3819ff612ba7SJosef Bacik 		 * different block group we could end up hitting the
3820ff612ba7SJosef Bacik 		 * BUG_ON(rc->stage == UPDATE_DATA_PTRS) in
3821ff612ba7SJosef Bacik 		 * btrfs_reloc_cow_block.  Make sure we write everything out
3822ff612ba7SJosef Bacik 		 * properly so we don't trip over this problem, and then break
3823ff612ba7SJosef Bacik 		 * out of the loop if we hit an error.
3824ff612ba7SJosef Bacik 		 */
3825ff612ba7SJosef Bacik 		if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3826ff612ba7SJosef Bacik 			ret = btrfs_wait_ordered_range(rc->data_inode, 0,
3827ff612ba7SJosef Bacik 						       (u64)-1);
3828ff612ba7SJosef Bacik 			if (ret)
3829ff612ba7SJosef Bacik 				err = ret;
3830ff612ba7SJosef Bacik 			invalidate_mapping_pages(rc->data_inode->i_mapping,
3831ff612ba7SJosef Bacik 						 0, -1);
3832ff612ba7SJosef Bacik 			rc->stage = UPDATE_DATA_PTRS;
38335d4f98a2SYan Zheng 		}
38345d4f98a2SYan Zheng 
3835ff612ba7SJosef Bacik 		if (err < 0)
3836ff612ba7SJosef Bacik 			goto out;
3837ff612ba7SJosef Bacik 
38385d4f98a2SYan Zheng 		if (rc->extents_found == 0)
38395d4f98a2SYan Zheng 			break;
38405d4f98a2SYan Zheng 
3841430640e3SQu Wenruo 		btrfs_info(fs_info, "found %llu extents, stage: %s",
3842430640e3SQu Wenruo 			   rc->extents_found, stage_to_string(finishes_stage));
38435d4f98a2SYan Zheng 	}
38445d4f98a2SYan Zheng 
38455d4f98a2SYan Zheng 	WARN_ON(rc->block_group->pinned > 0);
38465d4f98a2SYan Zheng 	WARN_ON(rc->block_group->reserved > 0);
3847bf38be65SDavid Sterba 	WARN_ON(rc->block_group->used > 0);
38485d4f98a2SYan Zheng out:
3849f0486c68SYan, Zheng 	if (err && rw)
38502ff7e61eSJeff Mahoney 		btrfs_dec_block_group_ro(rc->block_group);
38515d4f98a2SYan Zheng 	iput(rc->data_inode);
38525d4f98a2SYan Zheng 	btrfs_put_block_group(rc->block_group);
38531a0afa0eSJosef Bacik 	free_reloc_control(rc);
38545d4f98a2SYan Zheng 	return err;
38555d4f98a2SYan Zheng }
38565d4f98a2SYan Zheng 
385776dda93cSYan, Zheng static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
385876dda93cSYan, Zheng {
38590b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
386076dda93cSYan, Zheng 	struct btrfs_trans_handle *trans;
386179787eaaSJeff Mahoney 	int ret, err;
386276dda93cSYan, Zheng 
38630b246afaSJeff Mahoney 	trans = btrfs_start_transaction(fs_info->tree_root, 0);
386479787eaaSJeff Mahoney 	if (IS_ERR(trans))
386579787eaaSJeff Mahoney 		return PTR_ERR(trans);
386676dda93cSYan, Zheng 
386776dda93cSYan, Zheng 	memset(&root->root_item.drop_progress, 0,
386876dda93cSYan, Zheng 		sizeof(root->root_item.drop_progress));
386976dda93cSYan, Zheng 	root->root_item.drop_level = 0;
387076dda93cSYan, Zheng 	btrfs_set_root_refs(&root->root_item, 0);
38710b246afaSJeff Mahoney 	ret = btrfs_update_root(trans, fs_info->tree_root,
387276dda93cSYan, Zheng 				&root->root_key, &root->root_item);
387376dda93cSYan, Zheng 
38743a45bb20SJeff Mahoney 	err = btrfs_end_transaction(trans);
387579787eaaSJeff Mahoney 	if (err)
387679787eaaSJeff Mahoney 		return err;
387779787eaaSJeff Mahoney 	return ret;
387876dda93cSYan, Zheng }
387976dda93cSYan, Zheng 
38805d4f98a2SYan Zheng /*
38815d4f98a2SYan Zheng  * recover relocation interrupted by system crash.
38825d4f98a2SYan Zheng  *
38835d4f98a2SYan Zheng  * this function resumes merging reloc trees with corresponding fs trees.
38845d4f98a2SYan Zheng  * this is important for keeping the sharing of tree blocks
38855d4f98a2SYan Zheng  */
38865d4f98a2SYan Zheng int btrfs_recover_relocation(struct btrfs_root *root)
38875d4f98a2SYan Zheng {
38880b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
38895d4f98a2SYan Zheng 	LIST_HEAD(reloc_roots);
38905d4f98a2SYan Zheng 	struct btrfs_key key;
38915d4f98a2SYan Zheng 	struct btrfs_root *fs_root;
38925d4f98a2SYan Zheng 	struct btrfs_root *reloc_root;
38935d4f98a2SYan Zheng 	struct btrfs_path *path;
38945d4f98a2SYan Zheng 	struct extent_buffer *leaf;
38955d4f98a2SYan Zheng 	struct reloc_control *rc = NULL;
38965d4f98a2SYan Zheng 	struct btrfs_trans_handle *trans;
38975d4f98a2SYan Zheng 	int ret;
38985d4f98a2SYan Zheng 	int err = 0;
38995d4f98a2SYan Zheng 
39005d4f98a2SYan Zheng 	path = btrfs_alloc_path();
39015d4f98a2SYan Zheng 	if (!path)
39025d4f98a2SYan Zheng 		return -ENOMEM;
3903e4058b54SDavid Sterba 	path->reada = READA_BACK;
39045d4f98a2SYan Zheng 
39055d4f98a2SYan Zheng 	key.objectid = BTRFS_TREE_RELOC_OBJECTID;
39065d4f98a2SYan Zheng 	key.type = BTRFS_ROOT_ITEM_KEY;
39075d4f98a2SYan Zheng 	key.offset = (u64)-1;
39085d4f98a2SYan Zheng 
39095d4f98a2SYan Zheng 	while (1) {
39100b246afaSJeff Mahoney 		ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
39115d4f98a2SYan Zheng 					path, 0, 0);
39125d4f98a2SYan Zheng 		if (ret < 0) {
39135d4f98a2SYan Zheng 			err = ret;
39145d4f98a2SYan Zheng 			goto out;
39155d4f98a2SYan Zheng 		}
39165d4f98a2SYan Zheng 		if (ret > 0) {
39175d4f98a2SYan Zheng 			if (path->slots[0] == 0)
39185d4f98a2SYan Zheng 				break;
39195d4f98a2SYan Zheng 			path->slots[0]--;
39205d4f98a2SYan Zheng 		}
39215d4f98a2SYan Zheng 		leaf = path->nodes[0];
39225d4f98a2SYan Zheng 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3923b3b4aa74SDavid Sterba 		btrfs_release_path(path);
39245d4f98a2SYan Zheng 
39255d4f98a2SYan Zheng 		if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
39265d4f98a2SYan Zheng 		    key.type != BTRFS_ROOT_ITEM_KEY)
39275d4f98a2SYan Zheng 			break;
39285d4f98a2SYan Zheng 
39293dbf1738SJosef Bacik 		reloc_root = btrfs_read_tree_root(root, &key);
39305d4f98a2SYan Zheng 		if (IS_ERR(reloc_root)) {
39315d4f98a2SYan Zheng 			err = PTR_ERR(reloc_root);
39325d4f98a2SYan Zheng 			goto out;
39335d4f98a2SYan Zheng 		}
39345d4f98a2SYan Zheng 
39353dbf1738SJosef Bacik 		set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state);
39365d4f98a2SYan Zheng 		list_add(&reloc_root->root_list, &reloc_roots);
39375d4f98a2SYan Zheng 
39385d4f98a2SYan Zheng 		if (btrfs_root_refs(&reloc_root->root_item) > 0) {
39390b246afaSJeff Mahoney 			fs_root = read_fs_root(fs_info,
39405d4f98a2SYan Zheng 					       reloc_root->root_key.offset);
39415d4f98a2SYan Zheng 			if (IS_ERR(fs_root)) {
394276dda93cSYan, Zheng 				ret = PTR_ERR(fs_root);
394376dda93cSYan, Zheng 				if (ret != -ENOENT) {
394476dda93cSYan, Zheng 					err = ret;
39455d4f98a2SYan Zheng 					goto out;
39465d4f98a2SYan Zheng 				}
394779787eaaSJeff Mahoney 				ret = mark_garbage_root(reloc_root);
394879787eaaSJeff Mahoney 				if (ret < 0) {
394979787eaaSJeff Mahoney 					err = ret;
395079787eaaSJeff Mahoney 					goto out;
395179787eaaSJeff Mahoney 				}
3952932fd26dSJosef Bacik 			} else {
395300246528SJosef Bacik 				btrfs_put_root(fs_root);
395476dda93cSYan, Zheng 			}
39555d4f98a2SYan Zheng 		}
39565d4f98a2SYan Zheng 
39575d4f98a2SYan Zheng 		if (key.offset == 0)
39585d4f98a2SYan Zheng 			break;
39595d4f98a2SYan Zheng 
39605d4f98a2SYan Zheng 		key.offset--;
39615d4f98a2SYan Zheng 	}
3962b3b4aa74SDavid Sterba 	btrfs_release_path(path);
39635d4f98a2SYan Zheng 
39645d4f98a2SYan Zheng 	if (list_empty(&reloc_roots))
39655d4f98a2SYan Zheng 		goto out;
39665d4f98a2SYan Zheng 
3967c258d6e3SQu Wenruo 	rc = alloc_reloc_control(fs_info);
39685d4f98a2SYan Zheng 	if (!rc) {
39695d4f98a2SYan Zheng 		err = -ENOMEM;
39705d4f98a2SYan Zheng 		goto out;
39715d4f98a2SYan Zheng 	}
39725d4f98a2SYan Zheng 
39730b246afaSJeff Mahoney 	rc->extent_root = fs_info->extent_root;
39745d4f98a2SYan Zheng 
39755d4f98a2SYan Zheng 	set_reloc_control(rc);
39765d4f98a2SYan Zheng 
39777a7eaa40SJosef Bacik 	trans = btrfs_join_transaction(rc->extent_root);
39783612b495STsutomu Itoh 	if (IS_ERR(trans)) {
39793612b495STsutomu Itoh 		err = PTR_ERR(trans);
3980fb2d83eeSJosef Bacik 		goto out_unset;
39813612b495STsutomu Itoh 	}
39823fd0a558SYan, Zheng 
39833fd0a558SYan, Zheng 	rc->merge_reloc_tree = 1;
39843fd0a558SYan, Zheng 
39855d4f98a2SYan Zheng 	while (!list_empty(&reloc_roots)) {
39865d4f98a2SYan Zheng 		reloc_root = list_entry(reloc_roots.next,
39875d4f98a2SYan Zheng 					struct btrfs_root, root_list);
39885d4f98a2SYan Zheng 		list_del(&reloc_root->root_list);
39895d4f98a2SYan Zheng 
39905d4f98a2SYan Zheng 		if (btrfs_root_refs(&reloc_root->root_item) == 0) {
39915d4f98a2SYan Zheng 			list_add_tail(&reloc_root->root_list,
39925d4f98a2SYan Zheng 				      &rc->reloc_roots);
39935d4f98a2SYan Zheng 			continue;
39945d4f98a2SYan Zheng 		}
39955d4f98a2SYan Zheng 
39960b246afaSJeff Mahoney 		fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
399779787eaaSJeff Mahoney 		if (IS_ERR(fs_root)) {
399879787eaaSJeff Mahoney 			err = PTR_ERR(fs_root);
3999ca1aa281SJosef Bacik 			list_add_tail(&reloc_root->root_list, &reloc_roots);
40001402d17dSXiyu Yang 			btrfs_end_transaction(trans);
4001fb2d83eeSJosef Bacik 			goto out_unset;
400279787eaaSJeff Mahoney 		}
40035d4f98a2SYan Zheng 
4004ffd7b339SJeff Mahoney 		err = __add_reloc_root(reloc_root);
400579787eaaSJeff Mahoney 		BUG_ON(err < 0); /* -ENOMEM or logic error */
4006f44deb74SJosef Bacik 		fs_root->reloc_root = btrfs_grab_root(reloc_root);
400700246528SJosef Bacik 		btrfs_put_root(fs_root);
40085d4f98a2SYan Zheng 	}
40095d4f98a2SYan Zheng 
40103a45bb20SJeff Mahoney 	err = btrfs_commit_transaction(trans);
401179787eaaSJeff Mahoney 	if (err)
4012fb2d83eeSJosef Bacik 		goto out_unset;
40135d4f98a2SYan Zheng 
40145d4f98a2SYan Zheng 	merge_reloc_roots(rc);
40155d4f98a2SYan Zheng 
40165d4f98a2SYan Zheng 	unset_reloc_control(rc);
40175d4f98a2SYan Zheng 
40187a7eaa40SJosef Bacik 	trans = btrfs_join_transaction(rc->extent_root);
401962b99540SQu Wenruo 	if (IS_ERR(trans)) {
40203612b495STsutomu Itoh 		err = PTR_ERR(trans);
40216217b0faSJosef Bacik 		goto out_clean;
402262b99540SQu Wenruo 	}
40233a45bb20SJeff Mahoney 	err = btrfs_commit_transaction(trans);
40246217b0faSJosef Bacik out_clean:
4025d2311e69SQu Wenruo 	ret = clean_dirty_subvols(rc);
4026d2311e69SQu Wenruo 	if (ret < 0 && !err)
4027d2311e69SQu Wenruo 		err = ret;
4028fb2d83eeSJosef Bacik out_unset:
4029fb2d83eeSJosef Bacik 	unset_reloc_control(rc);
40301a0afa0eSJosef Bacik 	free_reloc_control(rc);
40313612b495STsutomu Itoh out:
4032aca1bba6SLiu Bo 	if (!list_empty(&reloc_roots))
4033aca1bba6SLiu Bo 		free_reloc_roots(&reloc_roots);
4034aca1bba6SLiu Bo 
40355d4f98a2SYan Zheng 	btrfs_free_path(path);
40365d4f98a2SYan Zheng 
40375d4f98a2SYan Zheng 	if (err == 0) {
40385d4f98a2SYan Zheng 		/* cleanup orphan inode in data relocation tree */
40390b246afaSJeff Mahoney 		fs_root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
4040932fd26dSJosef Bacik 		if (IS_ERR(fs_root)) {
40415d4f98a2SYan Zheng 			err = PTR_ERR(fs_root);
4042932fd26dSJosef Bacik 		} else {
404366b4ffd1SJosef Bacik 			err = btrfs_orphan_cleanup(fs_root);
404400246528SJosef Bacik 			btrfs_put_root(fs_root);
4045932fd26dSJosef Bacik 		}
4046932fd26dSJosef Bacik 	}
40475d4f98a2SYan Zheng 	return err;
40485d4f98a2SYan Zheng }
40495d4f98a2SYan Zheng 
40505d4f98a2SYan Zheng /*
40515d4f98a2SYan Zheng  * helper to add ordered checksum for data relocation.
40525d4f98a2SYan Zheng  *
40535d4f98a2SYan Zheng  * cloning checksum properly handles the nodatasum extents.
40545d4f98a2SYan Zheng  * it also saves CPU time to re-calculate the checksum.
40555d4f98a2SYan Zheng  */
40565d4f98a2SYan Zheng int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
40575d4f98a2SYan Zheng {
40580b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
40595d4f98a2SYan Zheng 	struct btrfs_ordered_sum *sums;
40605d4f98a2SYan Zheng 	struct btrfs_ordered_extent *ordered;
40615d4f98a2SYan Zheng 	int ret;
40625d4f98a2SYan Zheng 	u64 disk_bytenr;
40634577b014SJosef Bacik 	u64 new_bytenr;
40645d4f98a2SYan Zheng 	LIST_HEAD(list);
40655d4f98a2SYan Zheng 
40665d4f98a2SYan Zheng 	ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4067bffe633eSOmar Sandoval 	BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
40685d4f98a2SYan Zheng 
40695d4f98a2SYan Zheng 	disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
40700b246afaSJeff Mahoney 	ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
4071a2de733cSArne Jansen 				       disk_bytenr + len - 1, &list, 0);
407279787eaaSJeff Mahoney 	if (ret)
407379787eaaSJeff Mahoney 		goto out;
40745d4f98a2SYan Zheng 
40755d4f98a2SYan Zheng 	while (!list_empty(&list)) {
40765d4f98a2SYan Zheng 		sums = list_entry(list.next, struct btrfs_ordered_sum, list);
40775d4f98a2SYan Zheng 		list_del_init(&sums->list);
40785d4f98a2SYan Zheng 
40794577b014SJosef Bacik 		/*
40804577b014SJosef Bacik 		 * We need to offset the new_bytenr based on where the csum is.
40814577b014SJosef Bacik 		 * We need to do this because we will read in entire prealloc
40824577b014SJosef Bacik 		 * extents but we may have written to say the middle of the
40834577b014SJosef Bacik 		 * prealloc extent, so we need to make sure the csum goes with
40844577b014SJosef Bacik 		 * the right disk offset.
40854577b014SJosef Bacik 		 *
40864577b014SJosef Bacik 		 * We can do this because the data reloc inode refers strictly
40874577b014SJosef Bacik 		 * to the on disk bytes, so we don't have to worry about
40884577b014SJosef Bacik 		 * disk_len vs real len like with real inodes since it's all
40894577b014SJosef Bacik 		 * disk length.
40904577b014SJosef Bacik 		 */
4091bffe633eSOmar Sandoval 		new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
40924577b014SJosef Bacik 		sums->bytenr = new_bytenr;
40935d4f98a2SYan Zheng 
4094f9756261SNikolay Borisov 		btrfs_add_ordered_sum(ordered, sums);
40955d4f98a2SYan Zheng 	}
409679787eaaSJeff Mahoney out:
40975d4f98a2SYan Zheng 	btrfs_put_ordered_extent(ordered);
4098411fc6bcSAndi Kleen 	return ret;
40995d4f98a2SYan Zheng }
41003fd0a558SYan, Zheng 
410183d4cfd4SJosef Bacik int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
41023fd0a558SYan, Zheng 			  struct btrfs_root *root, struct extent_buffer *buf,
41033fd0a558SYan, Zheng 			  struct extent_buffer *cow)
41043fd0a558SYan, Zheng {
41050b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = root->fs_info;
41063fd0a558SYan, Zheng 	struct reloc_control *rc;
4107a26195a5SQu Wenruo 	struct btrfs_backref_node *node;
41083fd0a558SYan, Zheng 	int first_cow = 0;
41093fd0a558SYan, Zheng 	int level;
411083d4cfd4SJosef Bacik 	int ret = 0;
41113fd0a558SYan, Zheng 
41120b246afaSJeff Mahoney 	rc = fs_info->reloc_ctl;
41133fd0a558SYan, Zheng 	if (!rc)
411483d4cfd4SJosef Bacik 		return 0;
41153fd0a558SYan, Zheng 
41163fd0a558SYan, Zheng 	BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
41173fd0a558SYan, Zheng 	       root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
41183fd0a558SYan, Zheng 
41193fd0a558SYan, Zheng 	level = btrfs_header_level(buf);
41203fd0a558SYan, Zheng 	if (btrfs_header_generation(buf) <=
41213fd0a558SYan, Zheng 	    btrfs_root_last_snapshot(&root->root_item))
41223fd0a558SYan, Zheng 		first_cow = 1;
41233fd0a558SYan, Zheng 
41243fd0a558SYan, Zheng 	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
41253fd0a558SYan, Zheng 	    rc->create_reloc_tree) {
41263fd0a558SYan, Zheng 		WARN_ON(!first_cow && level == 0);
41273fd0a558SYan, Zheng 
41283fd0a558SYan, Zheng 		node = rc->backref_cache.path[level];
41293fd0a558SYan, Zheng 		BUG_ON(node->bytenr != buf->start &&
41303fd0a558SYan, Zheng 		       node->new_bytenr != buf->start);
41313fd0a558SYan, Zheng 
4132b0fe7078SQu Wenruo 		btrfs_backref_drop_node_buffer(node);
413367439dadSDavid Sterba 		atomic_inc(&cow->refs);
41343fd0a558SYan, Zheng 		node->eb = cow;
41353fd0a558SYan, Zheng 		node->new_bytenr = cow->start;
41363fd0a558SYan, Zheng 
41373fd0a558SYan, Zheng 		if (!node->pending) {
41383fd0a558SYan, Zheng 			list_move_tail(&node->list,
41393fd0a558SYan, Zheng 				       &rc->backref_cache.pending[level]);
41403fd0a558SYan, Zheng 			node->pending = 1;
41413fd0a558SYan, Zheng 		}
41423fd0a558SYan, Zheng 
41433fd0a558SYan, Zheng 		if (first_cow)
41449569cc20SQu Wenruo 			mark_block_processed(rc, node);
41453fd0a558SYan, Zheng 
41463fd0a558SYan, Zheng 		if (first_cow && level > 0)
41473fd0a558SYan, Zheng 			rc->nodes_relocated += buf->len;
41483fd0a558SYan, Zheng 	}
41493fd0a558SYan, Zheng 
415083d4cfd4SJosef Bacik 	if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
41513fd0a558SYan, Zheng 		ret = replace_file_extents(trans, rc, root, cow);
415283d4cfd4SJosef Bacik 	return ret;
41533fd0a558SYan, Zheng }
41543fd0a558SYan, Zheng 
41553fd0a558SYan, Zheng /*
41563fd0a558SYan, Zheng  * called before creating snapshot. it calculates metadata reservation
415701327610SNicholas D Steeves  * required for relocating tree blocks in the snapshot
41583fd0a558SYan, Zheng  */
4159147d256eSZhaolei void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
41603fd0a558SYan, Zheng 			      u64 *bytes_to_reserve)
41613fd0a558SYan, Zheng {
416210995c04SQu Wenruo 	struct btrfs_root *root = pending->root;
416310995c04SQu Wenruo 	struct reloc_control *rc = root->fs_info->reloc_ctl;
41643fd0a558SYan, Zheng 
41656282675eSQu Wenruo 	if (!rc || !have_reloc_root(root))
41663fd0a558SYan, Zheng 		return;
41673fd0a558SYan, Zheng 
41683fd0a558SYan, Zheng 	if (!rc->merge_reloc_tree)
41693fd0a558SYan, Zheng 		return;
41703fd0a558SYan, Zheng 
41713fd0a558SYan, Zheng 	root = root->reloc_root;
41723fd0a558SYan, Zheng 	BUG_ON(btrfs_root_refs(&root->root_item) == 0);
41733fd0a558SYan, Zheng 	/*
41743fd0a558SYan, Zheng 	 * relocation is in the stage of merging trees. the space
41753fd0a558SYan, Zheng 	 * used by merging a reloc tree is twice the size of
41763fd0a558SYan, Zheng 	 * relocated tree nodes in the worst case. half for cowing
41773fd0a558SYan, Zheng 	 * the reloc tree, half for cowing the fs tree. the space
41783fd0a558SYan, Zheng 	 * used by cowing the reloc tree will be freed after the
41793fd0a558SYan, Zheng 	 * tree is dropped. if we create snapshot, cowing the fs
41803fd0a558SYan, Zheng 	 * tree may use more space than it frees. so we need
41813fd0a558SYan, Zheng 	 * reserve extra space.
41823fd0a558SYan, Zheng 	 */
41833fd0a558SYan, Zheng 	*bytes_to_reserve += rc->nodes_relocated;
41843fd0a558SYan, Zheng }
41853fd0a558SYan, Zheng 
41863fd0a558SYan, Zheng /*
41873fd0a558SYan, Zheng  * called after snapshot is created. migrate block reservation
41883fd0a558SYan, Zheng  * and create reloc root for the newly created snapshot
4189f44deb74SJosef Bacik  *
4190f44deb74SJosef Bacik  * This is similar to btrfs_init_reloc_root(), we come out of here with two
4191f44deb74SJosef Bacik  * references held on the reloc_root, one for root->reloc_root and one for
4192f44deb74SJosef Bacik  * rc->reloc_roots.
41933fd0a558SYan, Zheng  */
419449b25e05SJeff Mahoney int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
41953fd0a558SYan, Zheng 			       struct btrfs_pending_snapshot *pending)
41963fd0a558SYan, Zheng {
41973fd0a558SYan, Zheng 	struct btrfs_root *root = pending->root;
41983fd0a558SYan, Zheng 	struct btrfs_root *reloc_root;
41993fd0a558SYan, Zheng 	struct btrfs_root *new_root;
420010995c04SQu Wenruo 	struct reloc_control *rc = root->fs_info->reloc_ctl;
42013fd0a558SYan, Zheng 	int ret;
42023fd0a558SYan, Zheng 
42036282675eSQu Wenruo 	if (!rc || !have_reloc_root(root))
420449b25e05SJeff Mahoney 		return 0;
42053fd0a558SYan, Zheng 
42063fd0a558SYan, Zheng 	rc = root->fs_info->reloc_ctl;
42073fd0a558SYan, Zheng 	rc->merging_rsv_size += rc->nodes_relocated;
42083fd0a558SYan, Zheng 
42093fd0a558SYan, Zheng 	if (rc->merge_reloc_tree) {
42103fd0a558SYan, Zheng 		ret = btrfs_block_rsv_migrate(&pending->block_rsv,
42113fd0a558SYan, Zheng 					      rc->block_rsv,
42123a584174SLu Fengqi 					      rc->nodes_relocated, true);
421349b25e05SJeff Mahoney 		if (ret)
421449b25e05SJeff Mahoney 			return ret;
42153fd0a558SYan, Zheng 	}
42163fd0a558SYan, Zheng 
42173fd0a558SYan, Zheng 	new_root = pending->snap;
42183fd0a558SYan, Zheng 	reloc_root = create_reloc_root(trans, root->reloc_root,
42193fd0a558SYan, Zheng 				       new_root->root_key.objectid);
422049b25e05SJeff Mahoney 	if (IS_ERR(reloc_root))
422149b25e05SJeff Mahoney 		return PTR_ERR(reloc_root);
42223fd0a558SYan, Zheng 
4223ffd7b339SJeff Mahoney 	ret = __add_reloc_root(reloc_root);
4224ffd7b339SJeff Mahoney 	BUG_ON(ret < 0);
4225f44deb74SJosef Bacik 	new_root->reloc_root = btrfs_grab_root(reloc_root);
42263fd0a558SYan, Zheng 
422749b25e05SJeff Mahoney 	if (rc->create_reloc_tree)
42283fd0a558SYan, Zheng 		ret = clone_backref_node(trans, rc, root, reloc_root);
422949b25e05SJeff Mahoney 	return ret;
42303fd0a558SYan, Zheng }
4231