xref: /openbmc/linux/fs/btrfs/send.c (revision fac59652993f075d57860769c99045b3ca18780d)
1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0
231db9f7cSAlexander Block /*
331db9f7cSAlexander Block  * Copyright (C) 2012 Alexander Block.  All rights reserved.
431db9f7cSAlexander Block  */
531db9f7cSAlexander Block 
631db9f7cSAlexander Block #include <linux/bsearch.h>
731db9f7cSAlexander Block #include <linux/fs.h>
831db9f7cSAlexander Block #include <linux/file.h>
931db9f7cSAlexander Block #include <linux/sort.h>
1031db9f7cSAlexander Block #include <linux/mount.h>
1131db9f7cSAlexander Block #include <linux/xattr.h>
1231db9f7cSAlexander Block #include <linux/posix_acl_xattr.h>
135b8418b8SDavid Sterba #include <linux/radix-tree.h>
14a1857ebeSStephen Rothwell #include <linux/vmalloc.h>
15ed84885dSAndy Shevchenko #include <linux/string.h>
162351f431SJosef Bacik #include <linux/compat.h>
179678c543SNikolay Borisov #include <linux/crc32c.h>
1838622010SBoris Burkov #include <linux/fsverity.h>
1931db9f7cSAlexander Block 
2031db9f7cSAlexander Block #include "send.h"
218234d3f6SDavid Sterba #include "ctree.h"
2231db9f7cSAlexander Block #include "backref.h"
2331db9f7cSAlexander Block #include "locking.h"
2431db9f7cSAlexander Block #include "disk-io.h"
2531db9f7cSAlexander Block #include "btrfs_inode.h"
2631db9f7cSAlexander Block #include "transaction.h"
27ebb8765bSAnand Jain #include "compression.h"
2889efda52SMarcos Paulo de Souza #include "xattr.h"
29d96b3424SFilipe Manana #include "print-tree.h"
3007e81dc9SJosef Bacik #include "accessors.h"
31f2b39277SJosef Bacik #include "dir-item.h"
327c8ede16SJosef Bacik #include "file-item.h"
337572dec8SJosef Bacik #include "ioctl.h"
345c11adccSJosef Bacik #include "verity.h"
3590b90d4aSFilipe Manana #include "lru_cache.h"
3631db9f7cSAlexander Block 
3731db9f7cSAlexander Block /*
38fd0ddbe2SFilipe Manana  * Maximum number of references an extent can have in order for us to attempt to
39fd0ddbe2SFilipe Manana  * issue clone operations instead of write operations. This currently exists to
40fd0ddbe2SFilipe Manana  * avoid hitting limitations of the backreference walking code (taking a lot of
41fd0ddbe2SFilipe Manana  * time and using too much memory for extents with large number of references).
42fd0ddbe2SFilipe Manana  */
43e2a04165SFilipe Manana #define SEND_MAX_EXTENT_REFS	1024
44fd0ddbe2SFilipe Manana 
45fd0ddbe2SFilipe Manana /*
4631db9f7cSAlexander Block  * A fs_path is a helper to dynamically build path names with unknown size.
4731db9f7cSAlexander Block  * It reallocates the internal buffer on demand.
4831db9f7cSAlexander Block  * It allows fast adding of path elements on the right side (normal path) and
4931db9f7cSAlexander Block  * fast adding to the left side (reversed path). A reversed path can also be
5031db9f7cSAlexander Block  * unreversed if needed.
5131db9f7cSAlexander Block  */
5231db9f7cSAlexander Block struct fs_path {
5331db9f7cSAlexander Block 	union {
5431db9f7cSAlexander Block 		struct {
5531db9f7cSAlexander Block 			char *start;
5631db9f7cSAlexander Block 			char *end;
5731db9f7cSAlexander Block 
5831db9f7cSAlexander Block 			char *buf;
591f5a7ff9SDavid Sterba 			unsigned short buf_len:15;
601f5a7ff9SDavid Sterba 			unsigned short reversed:1;
6131db9f7cSAlexander Block 			char inline_buf[];
6231db9f7cSAlexander Block 		};
63ace01050SDavid Sterba 		/*
64ace01050SDavid Sterba 		 * Average path length does not exceed 200 bytes, we'll have
65ace01050SDavid Sterba 		 * better packing in the slab and higher chance to satisfy
66ace01050SDavid Sterba 		 * a allocation later during send.
67ace01050SDavid Sterba 		 */
68ace01050SDavid Sterba 		char pad[256];
6931db9f7cSAlexander Block 	};
7031db9f7cSAlexander Block };
7131db9f7cSAlexander Block #define FS_PATH_INLINE_SIZE \
7231db9f7cSAlexander Block 	(sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
7331db9f7cSAlexander Block 
7431db9f7cSAlexander Block 
7531db9f7cSAlexander Block /* reused for each extent */
7631db9f7cSAlexander Block struct clone_root {
7731db9f7cSAlexander Block 	struct btrfs_root *root;
7831db9f7cSAlexander Block 	u64 ino;
7931db9f7cSAlexander Block 	u64 offset;
80c7499a64SFilipe Manana 	u64 num_bytes;
8188ffb665SFilipe Manana 	bool found_ref;
8231db9f7cSAlexander Block };
8331db9f7cSAlexander Block 
84c48545deSFilipe Manana #define SEND_MAX_NAME_CACHE_SIZE			256
8531db9f7cSAlexander Block 
8666d04209SFilipe Manana /*
87ace79df8SFilipe Manana  * Limit the root_ids array of struct backref_cache_entry to 17 elements.
88ace79df8SFilipe Manana  * This makes the size of a cache entry to be exactly 192 bytes on x86_64, which
89ace79df8SFilipe Manana  * can be satisfied from the kmalloc-192 slab, without wasting any space.
9066d04209SFilipe Manana  * The most common case is to have a single root for cloning, which corresponds
91ace79df8SFilipe Manana  * to the send root. Having the user specify more than 16 clone roots is not
9266d04209SFilipe Manana  * common, and in such rare cases we simply don't use caching if the number of
93ace79df8SFilipe Manana  * cloning roots that lead down to a leaf is more than 17.
9466d04209SFilipe Manana  */
95ace79df8SFilipe Manana #define SEND_MAX_BACKREF_CACHE_ROOTS			17
9666d04209SFilipe Manana 
9766d04209SFilipe Manana /*
9866d04209SFilipe Manana  * Max number of entries in the cache.
99ace79df8SFilipe Manana  * With SEND_MAX_BACKREF_CACHE_ROOTS as 17, the size in bytes, excluding
100ace79df8SFilipe Manana  * maple tree's internal nodes, is 24K.
10166d04209SFilipe Manana  */
10266d04209SFilipe Manana #define SEND_MAX_BACKREF_CACHE_SIZE 128
10366d04209SFilipe Manana 
10466d04209SFilipe Manana /*
10566d04209SFilipe Manana  * A backref cache entry maps a leaf to a list of IDs of roots from which the
10666d04209SFilipe Manana  * leaf is accessible and we can use for clone operations.
10766d04209SFilipe Manana  * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, each cache entry is 128 bytes (on
10866d04209SFilipe Manana  * x86_64).
10966d04209SFilipe Manana  */
11066d04209SFilipe Manana struct backref_cache_entry {
11190b90d4aSFilipe Manana 	struct btrfs_lru_cache_entry entry;
11266d04209SFilipe Manana 	u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS];
11366d04209SFilipe Manana 	/* Number of valid elements in the root_ids array. */
11466d04209SFilipe Manana 	int num_roots;
11566d04209SFilipe Manana };
11666d04209SFilipe Manana 
11790b90d4aSFilipe Manana /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
11890b90d4aSFilipe Manana static_assert(offsetof(struct backref_cache_entry, entry) == 0);
11990b90d4aSFilipe Manana 
120e8a7f49dSFilipe Manana /*
121e8a7f49dSFilipe Manana  * Max number of entries in the cache that stores directories that were already
122e8a7f49dSFilipe Manana  * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
1230da0c560SFilipe Manana  * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
124e8a7f49dSFilipe Manana  * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
125e8a7f49dSFilipe Manana  */
126e8a7f49dSFilipe Manana #define SEND_MAX_DIR_CREATED_CACHE_SIZE			64
127e8a7f49dSFilipe Manana 
1283e49363bSFilipe Manana /*
1293e49363bSFilipe Manana  * Max number of entries in the cache that stores directories that were already
1303e49363bSFilipe Manana  * created. The cache uses raw struct btrfs_lru_cache_entry entries, so it uses
1313e49363bSFilipe Manana  * at most 4096 bytes - sizeof(struct btrfs_lru_cache_entry) is 48 bytes, but
1323e49363bSFilipe Manana  * the kmalloc-64 slab is used, so we get 4096 bytes (64 bytes * 64).
1333e49363bSFilipe Manana  */
1343e49363bSFilipe Manana #define SEND_MAX_DIR_UTIMES_CACHE_SIZE			64
1353e49363bSFilipe Manana 
13631db9f7cSAlexander Block struct send_ctx {
13731db9f7cSAlexander Block 	struct file *send_filp;
13831db9f7cSAlexander Block 	loff_t send_off;
13931db9f7cSAlexander Block 	char *send_buf;
14031db9f7cSAlexander Block 	u32 send_size;
14131db9f7cSAlexander Block 	u32 send_max_size;
142356bbbb6SOmar Sandoval 	/*
143356bbbb6SOmar Sandoval 	 * Whether BTRFS_SEND_A_DATA attribute was already added to current
144356bbbb6SOmar Sandoval 	 * command (since protocol v2, data must be the last attribute).
145356bbbb6SOmar Sandoval 	 */
146356bbbb6SOmar Sandoval 	bool put_data;
147a4b333f2SOmar Sandoval 	struct page **send_buf_pages;
148cb95e7bfSMark Fasheh 	u64 flags;	/* 'flags' member of btrfs_ioctl_send_args is u64 */
149e77fbf99SDavid Sterba 	/* Protocol version compatibility requested */
150e77fbf99SDavid Sterba 	u32 proto;
15131db9f7cSAlexander Block 
15231db9f7cSAlexander Block 	struct btrfs_root *send_root;
15331db9f7cSAlexander Block 	struct btrfs_root *parent_root;
15431db9f7cSAlexander Block 	struct clone_root *clone_roots;
15531db9f7cSAlexander Block 	int clone_roots_cnt;
15631db9f7cSAlexander Block 
15731db9f7cSAlexander Block 	/* current state of the compare_tree call */
15831db9f7cSAlexander Block 	struct btrfs_path *left_path;
15931db9f7cSAlexander Block 	struct btrfs_path *right_path;
16031db9f7cSAlexander Block 	struct btrfs_key *cmp_key;
16131db9f7cSAlexander Block 
16231db9f7cSAlexander Block 	/*
163d96b3424SFilipe Manana 	 * Keep track of the generation of the last transaction that was used
164d96b3424SFilipe Manana 	 * for relocating a block group. This is periodically checked in order
165d96b3424SFilipe Manana 	 * to detect if a relocation happened since the last check, so that we
166d96b3424SFilipe Manana 	 * don't operate on stale extent buffers for nodes (level >= 1) or on
167d96b3424SFilipe Manana 	 * stale disk_bytenr values of file extent items.
168d96b3424SFilipe Manana 	 */
169d96b3424SFilipe Manana 	u64 last_reloc_trans;
170d96b3424SFilipe Manana 
171d96b3424SFilipe Manana 	/*
17231db9f7cSAlexander Block 	 * infos of the currently processed inode. In case of deleted inodes,
17331db9f7cSAlexander Block 	 * these are the values from the deleted inode.
17431db9f7cSAlexander Block 	 */
17531db9f7cSAlexander Block 	u64 cur_ino;
17631db9f7cSAlexander Block 	u64 cur_inode_gen;
17731db9f7cSAlexander Block 	u64 cur_inode_size;
17831db9f7cSAlexander Block 	u64 cur_inode_mode;
179644d1940SLiu Bo 	u64 cur_inode_rdev;
18016e7549fSJosef Bacik 	u64 cur_inode_last_extent;
181ffa7c429SFilipe Manana 	u64 cur_inode_next_write_offset;
1829555e1f1SDavid Sterba 	bool cur_inode_new;
1839555e1f1SDavid Sterba 	bool cur_inode_new_gen;
1849555e1f1SDavid Sterba 	bool cur_inode_deleted;
18546b2f459SFilipe Manana 	bool ignore_cur_inode;
18638622010SBoris Burkov 	bool cur_inode_needs_verity;
18738622010SBoris Burkov 	void *verity_descriptor;
18831db9f7cSAlexander Block 
18931db9f7cSAlexander Block 	u64 send_progress;
19031db9f7cSAlexander Block 
19131db9f7cSAlexander Block 	struct list_head new_refs;
19231db9f7cSAlexander Block 	struct list_head deleted_refs;
19331db9f7cSAlexander Block 
194c48545deSFilipe Manana 	struct btrfs_lru_cache name_cache;
19531db9f7cSAlexander Block 
196521b6803SFilipe Manana 	/*
197521b6803SFilipe Manana 	 * The inode we are currently processing. It's not NULL only when we
198521b6803SFilipe Manana 	 * need to issue write commands for data extents from this inode.
199521b6803SFilipe Manana 	 */
200521b6803SFilipe Manana 	struct inode *cur_inode;
2012131bcd3SLiu Bo 	struct file_ra_state ra;
202152555b3SFilipe Manana 	u64 page_cache_clear_start;
203152555b3SFilipe Manana 	bool clean_page_cache;
2042131bcd3SLiu Bo 
2059f03740aSFilipe David Borba Manana 	/*
2069f03740aSFilipe David Borba Manana 	 * We process inodes by their increasing order, so if before an
2079f03740aSFilipe David Borba Manana 	 * incremental send we reverse the parent/child relationship of
2089f03740aSFilipe David Borba Manana 	 * directories such that a directory with a lower inode number was
2099f03740aSFilipe David Borba Manana 	 * the parent of a directory with a higher inode number, and the one
2109f03740aSFilipe David Borba Manana 	 * becoming the new parent got renamed too, we can't rename/move the
2119f03740aSFilipe David Borba Manana 	 * directory with lower inode number when we finish processing it - we
2129f03740aSFilipe David Borba Manana 	 * must process the directory with higher inode number first, then
2139f03740aSFilipe David Borba Manana 	 * rename/move it and then rename/move the directory with lower inode
2149f03740aSFilipe David Borba Manana 	 * number. Example follows.
2159f03740aSFilipe David Borba Manana 	 *
2169f03740aSFilipe David Borba Manana 	 * Tree state when the first send was performed:
2179f03740aSFilipe David Borba Manana 	 *
2189f03740aSFilipe David Borba Manana 	 * .
2199f03740aSFilipe David Borba Manana 	 * |-- a                   (ino 257)
2209f03740aSFilipe David Borba Manana 	 *     |-- b               (ino 258)
2219f03740aSFilipe David Borba Manana 	 *         |
2229f03740aSFilipe David Borba Manana 	 *         |
2239f03740aSFilipe David Borba Manana 	 *         |-- c           (ino 259)
2249f03740aSFilipe David Borba Manana 	 *         |   |-- d       (ino 260)
2259f03740aSFilipe David Borba Manana 	 *         |
2269f03740aSFilipe David Borba Manana 	 *         |-- c2          (ino 261)
2279f03740aSFilipe David Borba Manana 	 *
2289f03740aSFilipe David Borba Manana 	 * Tree state when the second (incremental) send is performed:
2299f03740aSFilipe David Borba Manana 	 *
2309f03740aSFilipe David Borba Manana 	 * .
2319f03740aSFilipe David Borba Manana 	 * |-- a                   (ino 257)
2329f03740aSFilipe David Borba Manana 	 *     |-- b               (ino 258)
2339f03740aSFilipe David Borba Manana 	 *         |-- c2          (ino 261)
2349f03740aSFilipe David Borba Manana 	 *             |-- d2      (ino 260)
2359f03740aSFilipe David Borba Manana 	 *                 |-- cc  (ino 259)
2369f03740aSFilipe David Borba Manana 	 *
2379f03740aSFilipe David Borba Manana 	 * The sequence of steps that lead to the second state was:
2389f03740aSFilipe David Borba Manana 	 *
2399f03740aSFilipe David Borba Manana 	 * mv /a/b/c/d /a/b/c2/d2
2409f03740aSFilipe David Borba Manana 	 * mv /a/b/c /a/b/c2/d2/cc
2419f03740aSFilipe David Borba Manana 	 *
2429f03740aSFilipe David Borba Manana 	 * "c" has lower inode number, but we can't move it (2nd mv operation)
2439f03740aSFilipe David Borba Manana 	 * before we move "d", which has higher inode number.
2449f03740aSFilipe David Borba Manana 	 *
2459f03740aSFilipe David Borba Manana 	 * So we just memorize which move/rename operations must be performed
2469f03740aSFilipe David Borba Manana 	 * later when their respective parent is processed and moved/renamed.
2479f03740aSFilipe David Borba Manana 	 */
2489f03740aSFilipe David Borba Manana 
2499f03740aSFilipe David Borba Manana 	/* Indexed by parent directory inode number. */
2509f03740aSFilipe David Borba Manana 	struct rb_root pending_dir_moves;
2519f03740aSFilipe David Borba Manana 
2529f03740aSFilipe David Borba Manana 	/*
2539f03740aSFilipe David Borba Manana 	 * Reverse index, indexed by the inode number of a directory that
2549f03740aSFilipe David Borba Manana 	 * is waiting for the move/rename of its immediate parent before its
2559f03740aSFilipe David Borba Manana 	 * own move/rename can be performed.
2569f03740aSFilipe David Borba Manana 	 */
2579f03740aSFilipe David Borba Manana 	struct rb_root waiting_dir_moves;
2589dc44214SFilipe Manana 
2599dc44214SFilipe Manana 	/*
2609dc44214SFilipe Manana 	 * A directory that is going to be rm'ed might have a child directory
2619dc44214SFilipe Manana 	 * which is in the pending directory moves index above. In this case,
2629dc44214SFilipe Manana 	 * the directory can only be removed after the move/rename of its child
2639dc44214SFilipe Manana 	 * is performed. Example:
2649dc44214SFilipe Manana 	 *
2659dc44214SFilipe Manana 	 * Parent snapshot:
2669dc44214SFilipe Manana 	 *
2679dc44214SFilipe Manana 	 * .                        (ino 256)
2689dc44214SFilipe Manana 	 * |-- a/                   (ino 257)
2699dc44214SFilipe Manana 	 *     |-- b/               (ino 258)
2709dc44214SFilipe Manana 	 *         |-- c/           (ino 259)
2719dc44214SFilipe Manana 	 *         |   |-- x/       (ino 260)
2729dc44214SFilipe Manana 	 *         |
2739dc44214SFilipe Manana 	 *         |-- y/           (ino 261)
2749dc44214SFilipe Manana 	 *
2759dc44214SFilipe Manana 	 * Send snapshot:
2769dc44214SFilipe Manana 	 *
2779dc44214SFilipe Manana 	 * .                        (ino 256)
2789dc44214SFilipe Manana 	 * |-- a/                   (ino 257)
2799dc44214SFilipe Manana 	 *     |-- b/               (ino 258)
2809dc44214SFilipe Manana 	 *         |-- YY/          (ino 261)
2819dc44214SFilipe Manana 	 *              |-- x/      (ino 260)
2829dc44214SFilipe Manana 	 *
2839dc44214SFilipe Manana 	 * Sequence of steps that lead to the send snapshot:
2849dc44214SFilipe Manana 	 * rm -f /a/b/c/foo.txt
2859dc44214SFilipe Manana 	 * mv /a/b/y /a/b/YY
2869dc44214SFilipe Manana 	 * mv /a/b/c/x /a/b/YY
2879dc44214SFilipe Manana 	 * rmdir /a/b/c
2889dc44214SFilipe Manana 	 *
2899dc44214SFilipe Manana 	 * When the child is processed, its move/rename is delayed until its
2909dc44214SFilipe Manana 	 * parent is processed (as explained above), but all other operations
2919dc44214SFilipe Manana 	 * like update utimes, chown, chgrp, etc, are performed and the paths
2929dc44214SFilipe Manana 	 * that it uses for those operations must use the orphanized name of
2939dc44214SFilipe Manana 	 * its parent (the directory we're going to rm later), so we need to
2949dc44214SFilipe Manana 	 * memorize that name.
2959dc44214SFilipe Manana 	 *
2969dc44214SFilipe Manana 	 * Indexed by the inode number of the directory to be deleted.
2979dc44214SFilipe Manana 	 */
2989dc44214SFilipe Manana 	struct rb_root orphan_dirs;
2993aa5bd36SBingJing Chang 
3003aa5bd36SBingJing Chang 	struct rb_root rbtree_new_refs;
3013aa5bd36SBingJing Chang 	struct rb_root rbtree_deleted_refs;
30266d04209SFilipe Manana 
30390b90d4aSFilipe Manana 	struct btrfs_lru_cache backref_cache;
30490b90d4aSFilipe Manana 	u64 backref_cache_last_reloc_trans;
305e8a7f49dSFilipe Manana 
306e8a7f49dSFilipe Manana 	struct btrfs_lru_cache dir_created_cache;
3073e49363bSFilipe Manana 	struct btrfs_lru_cache dir_utimes_cache;
3089f03740aSFilipe David Borba Manana };
3099f03740aSFilipe David Borba Manana 
3109f03740aSFilipe David Borba Manana struct pending_dir_move {
3119f03740aSFilipe David Borba Manana 	struct rb_node node;
3129f03740aSFilipe David Borba Manana 	struct list_head list;
3139f03740aSFilipe David Borba Manana 	u64 parent_ino;
3149f03740aSFilipe David Borba Manana 	u64 ino;
3159f03740aSFilipe David Borba Manana 	u64 gen;
3169f03740aSFilipe David Borba Manana 	struct list_head update_refs;
3179f03740aSFilipe David Borba Manana };
3189f03740aSFilipe David Borba Manana 
3199f03740aSFilipe David Borba Manana struct waiting_dir_move {
3209f03740aSFilipe David Borba Manana 	struct rb_node node;
3219f03740aSFilipe David Borba Manana 	u64 ino;
3229dc44214SFilipe Manana 	/*
3239dc44214SFilipe Manana 	 * There might be some directory that could not be removed because it
3249dc44214SFilipe Manana 	 * was waiting for this directory inode to be moved first. Therefore
3259dc44214SFilipe Manana 	 * after this directory is moved, we can try to rmdir the ino rmdir_ino.
3269dc44214SFilipe Manana 	 */
3279dc44214SFilipe Manana 	u64 rmdir_ino;
3280b3f407eSFilipe Manana 	u64 rmdir_gen;
3298b191a68SFilipe Manana 	bool orphanized;
3309dc44214SFilipe Manana };
3319dc44214SFilipe Manana 
3329dc44214SFilipe Manana struct orphan_dir_info {
3339dc44214SFilipe Manana 	struct rb_node node;
3349dc44214SFilipe Manana 	u64 ino;
3359dc44214SFilipe Manana 	u64 gen;
3360f96f517SRobbie Ko 	u64 last_dir_index_offset;
337474e4761SFilipe Manana 	u64 dir_high_seq_ino;
33831db9f7cSAlexander Block };
33931db9f7cSAlexander Block 
34031db9f7cSAlexander Block struct name_cache_entry {
3417e0926feSAlexander Block 	/*
342c48545deSFilipe Manana 	 * The key in the entry is an inode number, and the generation matches
343c48545deSFilipe Manana 	 * the inode's generation.
3447e0926feSAlexander Block 	 */
345c48545deSFilipe Manana 	struct btrfs_lru_cache_entry entry;
34631db9f7cSAlexander Block 	u64 parent_ino;
34731db9f7cSAlexander Block 	u64 parent_gen;
34831db9f7cSAlexander Block 	int ret;
34931db9f7cSAlexander Block 	int need_later_update;
35031db9f7cSAlexander Block 	int name_len;
35131db9f7cSAlexander Block 	char name[];
35231db9f7cSAlexander Block };
35331db9f7cSAlexander Block 
354c48545deSFilipe Manana /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
355c48545deSFilipe Manana static_assert(offsetof(struct name_cache_entry, entry) == 0);
356c48545deSFilipe Manana 
35718d0f5c6SDavid Sterba #define ADVANCE							1
35818d0f5c6SDavid Sterba #define ADVANCE_ONLY_NEXT					-1
35918d0f5c6SDavid Sterba 
36018d0f5c6SDavid Sterba enum btrfs_compare_tree_result {
36118d0f5c6SDavid Sterba 	BTRFS_COMPARE_TREE_NEW,
36218d0f5c6SDavid Sterba 	BTRFS_COMPARE_TREE_DELETED,
36318d0f5c6SDavid Sterba 	BTRFS_COMPARE_TREE_CHANGED,
36418d0f5c6SDavid Sterba 	BTRFS_COMPARE_TREE_SAME,
36518d0f5c6SDavid Sterba };
36618d0f5c6SDavid Sterba 
367e67c718bSDavid Sterba __cold
inconsistent_snapshot_error(struct send_ctx * sctx,enum btrfs_compare_tree_result result,const char * what)36895155585SFilipe Manana static void inconsistent_snapshot_error(struct send_ctx *sctx,
36995155585SFilipe Manana 					enum btrfs_compare_tree_result result,
37095155585SFilipe Manana 					const char *what)
37195155585SFilipe Manana {
37295155585SFilipe Manana 	const char *result_string;
37395155585SFilipe Manana 
37495155585SFilipe Manana 	switch (result) {
37595155585SFilipe Manana 	case BTRFS_COMPARE_TREE_NEW:
37695155585SFilipe Manana 		result_string = "new";
37795155585SFilipe Manana 		break;
37895155585SFilipe Manana 	case BTRFS_COMPARE_TREE_DELETED:
37995155585SFilipe Manana 		result_string = "deleted";
38095155585SFilipe Manana 		break;
38195155585SFilipe Manana 	case BTRFS_COMPARE_TREE_CHANGED:
38295155585SFilipe Manana 		result_string = "updated";
38395155585SFilipe Manana 		break;
38495155585SFilipe Manana 	case BTRFS_COMPARE_TREE_SAME:
38595155585SFilipe Manana 		ASSERT(0);
38695155585SFilipe Manana 		result_string = "unchanged";
38795155585SFilipe Manana 		break;
38895155585SFilipe Manana 	default:
38995155585SFilipe Manana 		ASSERT(0);
39095155585SFilipe Manana 		result_string = "unexpected";
39195155585SFilipe Manana 	}
39295155585SFilipe Manana 
39395155585SFilipe Manana 	btrfs_err(sctx->send_root->fs_info,
39495155585SFilipe Manana 		  "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
39595155585SFilipe Manana 		  result_string, what, sctx->cmp_key->objectid,
39695155585SFilipe Manana 		  sctx->send_root->root_key.objectid,
39795155585SFilipe Manana 		  (sctx->parent_root ?
39895155585SFilipe Manana 		   sctx->parent_root->root_key.objectid : 0));
39995155585SFilipe Manana }
40095155585SFilipe Manana 
401e77fbf99SDavid Sterba __maybe_unused
proto_cmd_ok(const struct send_ctx * sctx,int cmd)402e77fbf99SDavid Sterba static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
403e77fbf99SDavid Sterba {
404e77fbf99SDavid Sterba 	switch (sctx->proto) {
40554cab6afSOmar Sandoval 	case 1:	 return cmd <= BTRFS_SEND_C_MAX_V1;
40654cab6afSOmar Sandoval 	case 2:	 return cmd <= BTRFS_SEND_C_MAX_V2;
407c86eab81SDavid Sterba 	case 3:	 return cmd <= BTRFS_SEND_C_MAX_V3;
408e77fbf99SDavid Sterba 	default: return false;
409e77fbf99SDavid Sterba 	}
410e77fbf99SDavid Sterba }
411e77fbf99SDavid Sterba 
4129f03740aSFilipe David Borba Manana static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
4139f03740aSFilipe David Borba Manana 
4149dc44214SFilipe Manana static struct waiting_dir_move *
4159dc44214SFilipe Manana get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
4169dc44214SFilipe Manana 
4170b3f407eSFilipe Manana static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen);
4189dc44214SFilipe Manana 
need_send_hole(struct send_ctx * sctx)41916e7549fSJosef Bacik static int need_send_hole(struct send_ctx *sctx)
42016e7549fSJosef Bacik {
42116e7549fSJosef Bacik 	return (sctx->parent_root && !sctx->cur_inode_new &&
42216e7549fSJosef Bacik 		!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted &&
42316e7549fSJosef Bacik 		S_ISREG(sctx->cur_inode_mode));
42416e7549fSJosef Bacik }
42516e7549fSJosef Bacik 
fs_path_reset(struct fs_path * p)42631db9f7cSAlexander Block static void fs_path_reset(struct fs_path *p)
42731db9f7cSAlexander Block {
42831db9f7cSAlexander Block 	if (p->reversed) {
42931db9f7cSAlexander Block 		p->start = p->buf + p->buf_len - 1;
43031db9f7cSAlexander Block 		p->end = p->start;
43131db9f7cSAlexander Block 		*p->start = 0;
43231db9f7cSAlexander Block 	} else {
43331db9f7cSAlexander Block 		p->start = p->buf;
43431db9f7cSAlexander Block 		p->end = p->start;
43531db9f7cSAlexander Block 		*p->start = 0;
43631db9f7cSAlexander Block 	}
43731db9f7cSAlexander Block }
43831db9f7cSAlexander Block 
fs_path_alloc(void)439924794c9STsutomu Itoh static struct fs_path *fs_path_alloc(void)
44031db9f7cSAlexander Block {
44131db9f7cSAlexander Block 	struct fs_path *p;
44231db9f7cSAlexander Block 
443e780b0d1SDavid Sterba 	p = kmalloc(sizeof(*p), GFP_KERNEL);
44431db9f7cSAlexander Block 	if (!p)
44531db9f7cSAlexander Block 		return NULL;
44631db9f7cSAlexander Block 	p->reversed = 0;
44731db9f7cSAlexander Block 	p->buf = p->inline_buf;
44831db9f7cSAlexander Block 	p->buf_len = FS_PATH_INLINE_SIZE;
44931db9f7cSAlexander Block 	fs_path_reset(p);
45031db9f7cSAlexander Block 	return p;
45131db9f7cSAlexander Block }
45231db9f7cSAlexander Block 
fs_path_alloc_reversed(void)453924794c9STsutomu Itoh static struct fs_path *fs_path_alloc_reversed(void)
45431db9f7cSAlexander Block {
45531db9f7cSAlexander Block 	struct fs_path *p;
45631db9f7cSAlexander Block 
457924794c9STsutomu Itoh 	p = fs_path_alloc();
45831db9f7cSAlexander Block 	if (!p)
45931db9f7cSAlexander Block 		return NULL;
46031db9f7cSAlexander Block 	p->reversed = 1;
46131db9f7cSAlexander Block 	fs_path_reset(p);
46231db9f7cSAlexander Block 	return p;
46331db9f7cSAlexander Block }
46431db9f7cSAlexander Block 
fs_path_free(struct fs_path * p)465924794c9STsutomu Itoh static void fs_path_free(struct fs_path *p)
46631db9f7cSAlexander Block {
46731db9f7cSAlexander Block 	if (!p)
46831db9f7cSAlexander Block 		return;
469ace01050SDavid Sterba 	if (p->buf != p->inline_buf)
47031db9f7cSAlexander Block 		kfree(p->buf);
47131db9f7cSAlexander Block 	kfree(p);
47231db9f7cSAlexander Block }
47331db9f7cSAlexander Block 
fs_path_len(struct fs_path * p)47431db9f7cSAlexander Block static int fs_path_len(struct fs_path *p)
47531db9f7cSAlexander Block {
47631db9f7cSAlexander Block 	return p->end - p->start;
47731db9f7cSAlexander Block }
47831db9f7cSAlexander Block 
fs_path_ensure_buf(struct fs_path * p,int len)47931db9f7cSAlexander Block static int fs_path_ensure_buf(struct fs_path *p, int len)
48031db9f7cSAlexander Block {
48131db9f7cSAlexander Block 	char *tmp_buf;
48231db9f7cSAlexander Block 	int path_len;
48331db9f7cSAlexander Block 	int old_buf_len;
48431db9f7cSAlexander Block 
48531db9f7cSAlexander Block 	len++;
48631db9f7cSAlexander Block 
48731db9f7cSAlexander Block 	if (p->buf_len >= len)
48831db9f7cSAlexander Block 		return 0;
48931db9f7cSAlexander Block 
490cfd4a535SChris Mason 	if (len > PATH_MAX) {
491cfd4a535SChris Mason 		WARN_ON(1);
492cfd4a535SChris Mason 		return -ENOMEM;
493cfd4a535SChris Mason 	}
494cfd4a535SChris Mason 
4951b2782c8SDavid Sterba 	path_len = p->end - p->start;
4961b2782c8SDavid Sterba 	old_buf_len = p->buf_len;
4971b2782c8SDavid Sterba 
498ace01050SDavid Sterba 	/*
499905889bcSKees Cook 	 * Allocate to the next largest kmalloc bucket size, to let
500905889bcSKees Cook 	 * the fast path happen most of the time.
501905889bcSKees Cook 	 */
502905889bcSKees Cook 	len = kmalloc_size_roundup(len);
503905889bcSKees Cook 	/*
504ace01050SDavid Sterba 	 * First time the inline_buf does not suffice
505ace01050SDavid Sterba 	 */
50601a9a8a9SFilipe Manana 	if (p->buf == p->inline_buf) {
507e780b0d1SDavid Sterba 		tmp_buf = kmalloc(len, GFP_KERNEL);
50801a9a8a9SFilipe Manana 		if (tmp_buf)
50901a9a8a9SFilipe Manana 			memcpy(tmp_buf, p->buf, old_buf_len);
51001a9a8a9SFilipe Manana 	} else {
511e780b0d1SDavid Sterba 		tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
51201a9a8a9SFilipe Manana 	}
5139c9ca00bSDavid Sterba 	if (!tmp_buf)
514ace01050SDavid Sterba 		return -ENOMEM;
5159c9ca00bSDavid Sterba 	p->buf = tmp_buf;
516905889bcSKees Cook 	p->buf_len = len;
517ace01050SDavid Sterba 
51831db9f7cSAlexander Block 	if (p->reversed) {
51931db9f7cSAlexander Block 		tmp_buf = p->buf + old_buf_len - path_len - 1;
52031db9f7cSAlexander Block 		p->end = p->buf + p->buf_len - 1;
52131db9f7cSAlexander Block 		p->start = p->end - path_len;
52231db9f7cSAlexander Block 		memmove(p->start, tmp_buf, path_len + 1);
52331db9f7cSAlexander Block 	} else {
52431db9f7cSAlexander Block 		p->start = p->buf;
52531db9f7cSAlexander Block 		p->end = p->start + path_len;
52631db9f7cSAlexander Block 	}
52731db9f7cSAlexander Block 	return 0;
52831db9f7cSAlexander Block }
52931db9f7cSAlexander Block 
fs_path_prepare_for_add(struct fs_path * p,int name_len,char ** prepared)530b23ab57dSDavid Sterba static int fs_path_prepare_for_add(struct fs_path *p, int name_len,
531b23ab57dSDavid Sterba 				   char **prepared)
53231db9f7cSAlexander Block {
53331db9f7cSAlexander Block 	int ret;
53431db9f7cSAlexander Block 	int new_len;
53531db9f7cSAlexander Block 
53631db9f7cSAlexander Block 	new_len = p->end - p->start + name_len;
53731db9f7cSAlexander Block 	if (p->start != p->end)
53831db9f7cSAlexander Block 		new_len++;
53931db9f7cSAlexander Block 	ret = fs_path_ensure_buf(p, new_len);
54031db9f7cSAlexander Block 	if (ret < 0)
54131db9f7cSAlexander Block 		goto out;
54231db9f7cSAlexander Block 
54331db9f7cSAlexander Block 	if (p->reversed) {
54431db9f7cSAlexander Block 		if (p->start != p->end)
54531db9f7cSAlexander Block 			*--p->start = '/';
54631db9f7cSAlexander Block 		p->start -= name_len;
547b23ab57dSDavid Sterba 		*prepared = p->start;
54831db9f7cSAlexander Block 	} else {
54931db9f7cSAlexander Block 		if (p->start != p->end)
55031db9f7cSAlexander Block 			*p->end++ = '/';
551b23ab57dSDavid Sterba 		*prepared = p->end;
55231db9f7cSAlexander Block 		p->end += name_len;
55331db9f7cSAlexander Block 		*p->end = 0;
55431db9f7cSAlexander Block 	}
55531db9f7cSAlexander Block 
55631db9f7cSAlexander Block out:
55731db9f7cSAlexander Block 	return ret;
55831db9f7cSAlexander Block }
55931db9f7cSAlexander Block 
fs_path_add(struct fs_path * p,const char * name,int name_len)56031db9f7cSAlexander Block static int fs_path_add(struct fs_path *p, const char *name, int name_len)
56131db9f7cSAlexander Block {
56231db9f7cSAlexander Block 	int ret;
563b23ab57dSDavid Sterba 	char *prepared;
56431db9f7cSAlexander Block 
565b23ab57dSDavid Sterba 	ret = fs_path_prepare_for_add(p, name_len, &prepared);
56631db9f7cSAlexander Block 	if (ret < 0)
56731db9f7cSAlexander Block 		goto out;
568b23ab57dSDavid Sterba 	memcpy(prepared, name, name_len);
56931db9f7cSAlexander Block 
57031db9f7cSAlexander Block out:
57131db9f7cSAlexander Block 	return ret;
57231db9f7cSAlexander Block }
57331db9f7cSAlexander Block 
fs_path_add_path(struct fs_path * p,struct fs_path * p2)57431db9f7cSAlexander Block static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
57531db9f7cSAlexander Block {
57631db9f7cSAlexander Block 	int ret;
577b23ab57dSDavid Sterba 	char *prepared;
57831db9f7cSAlexander Block 
579b23ab57dSDavid Sterba 	ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared);
58031db9f7cSAlexander Block 	if (ret < 0)
58131db9f7cSAlexander Block 		goto out;
582b23ab57dSDavid Sterba 	memcpy(prepared, p2->start, p2->end - p2->start);
58331db9f7cSAlexander Block 
58431db9f7cSAlexander Block out:
58531db9f7cSAlexander Block 	return ret;
58631db9f7cSAlexander Block }
58731db9f7cSAlexander Block 
fs_path_add_from_extent_buffer(struct fs_path * p,struct extent_buffer * eb,unsigned long off,int len)58831db9f7cSAlexander Block static int fs_path_add_from_extent_buffer(struct fs_path *p,
58931db9f7cSAlexander Block 					  struct extent_buffer *eb,
59031db9f7cSAlexander Block 					  unsigned long off, int len)
59131db9f7cSAlexander Block {
59231db9f7cSAlexander Block 	int ret;
593b23ab57dSDavid Sterba 	char *prepared;
59431db9f7cSAlexander Block 
595b23ab57dSDavid Sterba 	ret = fs_path_prepare_for_add(p, len, &prepared);
59631db9f7cSAlexander Block 	if (ret < 0)
59731db9f7cSAlexander Block 		goto out;
59831db9f7cSAlexander Block 
599b23ab57dSDavid Sterba 	read_extent_buffer(eb, prepared, off, len);
60031db9f7cSAlexander Block 
60131db9f7cSAlexander Block out:
60231db9f7cSAlexander Block 	return ret;
60331db9f7cSAlexander Block }
60431db9f7cSAlexander Block 
fs_path_copy(struct fs_path * p,struct fs_path * from)60531db9f7cSAlexander Block static int fs_path_copy(struct fs_path *p, struct fs_path *from)
60631db9f7cSAlexander Block {
60731db9f7cSAlexander Block 	p->reversed = from->reversed;
60831db9f7cSAlexander Block 	fs_path_reset(p);
60931db9f7cSAlexander Block 
6100292ecf1SMinghao Chi 	return fs_path_add_path(p, from);
61131db9f7cSAlexander Block }
61231db9f7cSAlexander Block 
fs_path_unreverse(struct fs_path * p)61331db9f7cSAlexander Block static void fs_path_unreverse(struct fs_path *p)
61431db9f7cSAlexander Block {
61531db9f7cSAlexander Block 	char *tmp;
61631db9f7cSAlexander Block 	int len;
61731db9f7cSAlexander Block 
61831db9f7cSAlexander Block 	if (!p->reversed)
61931db9f7cSAlexander Block 		return;
62031db9f7cSAlexander Block 
62131db9f7cSAlexander Block 	tmp = p->start;
62231db9f7cSAlexander Block 	len = p->end - p->start;
62331db9f7cSAlexander Block 	p->start = p->buf;
62431db9f7cSAlexander Block 	p->end = p->start + len;
62531db9f7cSAlexander Block 	memmove(p->start, tmp, len + 1);
62631db9f7cSAlexander Block 	p->reversed = 0;
62731db9f7cSAlexander Block }
62831db9f7cSAlexander Block 
alloc_path_for_send(void)62931db9f7cSAlexander Block static struct btrfs_path *alloc_path_for_send(void)
63031db9f7cSAlexander Block {
63131db9f7cSAlexander Block 	struct btrfs_path *path;
63231db9f7cSAlexander Block 
63331db9f7cSAlexander Block 	path = btrfs_alloc_path();
63431db9f7cSAlexander Block 	if (!path)
63531db9f7cSAlexander Block 		return NULL;
63631db9f7cSAlexander Block 	path->search_commit_root = 1;
63731db9f7cSAlexander Block 	path->skip_locking = 1;
6383f8a18ccSJosef Bacik 	path->need_commit_sem = 1;
63931db9f7cSAlexander Block 	return path;
64031db9f7cSAlexander Block }
64131db9f7cSAlexander Block 
write_buf(struct file * filp,const void * buf,u32 len,loff_t * off)64248a3b636SEric Sandeen static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
64331db9f7cSAlexander Block {
64431db9f7cSAlexander Block 	int ret;
64531db9f7cSAlexander Block 	u32 pos = 0;
64631db9f7cSAlexander Block 
64731db9f7cSAlexander Block 	while (pos < len) {
6488e93157bSChristoph Hellwig 		ret = kernel_write(filp, buf + pos, len - pos, off);
64931db9f7cSAlexander Block 		if (ret < 0)
6508e93157bSChristoph Hellwig 			return ret;
651cec3dad9SDavid Sterba 		if (ret == 0)
6528e93157bSChristoph Hellwig 			return -EIO;
65331db9f7cSAlexander Block 		pos += ret;
65431db9f7cSAlexander Block 	}
65531db9f7cSAlexander Block 
6568e93157bSChristoph Hellwig 	return 0;
65731db9f7cSAlexander Block }
65831db9f7cSAlexander Block 
tlv_put(struct send_ctx * sctx,u16 attr,const void * data,int len)65931db9f7cSAlexander Block static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
66031db9f7cSAlexander Block {
66131db9f7cSAlexander Block 	struct btrfs_tlv_header *hdr;
66231db9f7cSAlexander Block 	int total_len = sizeof(*hdr) + len;
66331db9f7cSAlexander Block 	int left = sctx->send_max_size - sctx->send_size;
66431db9f7cSAlexander Block 
665356bbbb6SOmar Sandoval 	if (WARN_ON_ONCE(sctx->put_data))
666356bbbb6SOmar Sandoval 		return -EINVAL;
667356bbbb6SOmar Sandoval 
66831db9f7cSAlexander Block 	if (unlikely(left < total_len))
66931db9f7cSAlexander Block 		return -EOVERFLOW;
67031db9f7cSAlexander Block 
67131db9f7cSAlexander Block 	hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
672e2f896b3SDavid Sterba 	put_unaligned_le16(attr, &hdr->tlv_type);
673e2f896b3SDavid Sterba 	put_unaligned_le16(len, &hdr->tlv_len);
67431db9f7cSAlexander Block 	memcpy(hdr + 1, data, len);
67531db9f7cSAlexander Block 	sctx->send_size += total_len;
67631db9f7cSAlexander Block 
67731db9f7cSAlexander Block 	return 0;
67831db9f7cSAlexander Block }
67931db9f7cSAlexander Block 
68095bc79d5SDavid Sterba #define TLV_PUT_DEFINE_INT(bits) \
68195bc79d5SDavid Sterba 	static int tlv_put_u##bits(struct send_ctx *sctx,	 	\
68295bc79d5SDavid Sterba 			u##bits attr, u##bits value)			\
68395bc79d5SDavid Sterba 	{								\
68495bc79d5SDavid Sterba 		__le##bits __tmp = cpu_to_le##bits(value);		\
68595bc79d5SDavid Sterba 		return tlv_put(sctx, attr, &__tmp, sizeof(__tmp));	\
68631db9f7cSAlexander Block 	}
68731db9f7cSAlexander Block 
68838622010SBoris Burkov TLV_PUT_DEFINE_INT(8)
6893ea4dc5bSOmar Sandoval TLV_PUT_DEFINE_INT(32)
69095bc79d5SDavid Sterba TLV_PUT_DEFINE_INT(64)
69131db9f7cSAlexander Block 
tlv_put_string(struct send_ctx * sctx,u16 attr,const char * str,int len)69231db9f7cSAlexander Block static int tlv_put_string(struct send_ctx *sctx, u16 attr,
69331db9f7cSAlexander Block 			  const char *str, int len)
69431db9f7cSAlexander Block {
69531db9f7cSAlexander Block 	if (len == -1)
69631db9f7cSAlexander Block 		len = strlen(str);
69731db9f7cSAlexander Block 	return tlv_put(sctx, attr, str, len);
69831db9f7cSAlexander Block }
69931db9f7cSAlexander Block 
tlv_put_uuid(struct send_ctx * sctx,u16 attr,const u8 * uuid)70031db9f7cSAlexander Block static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
70131db9f7cSAlexander Block 			const u8 *uuid)
70231db9f7cSAlexander Block {
70331db9f7cSAlexander Block 	return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
70431db9f7cSAlexander Block }
70531db9f7cSAlexander Block 
tlv_put_btrfs_timespec(struct send_ctx * sctx,u16 attr,struct extent_buffer * eb,struct btrfs_timespec * ts)70631db9f7cSAlexander Block static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
70731db9f7cSAlexander Block 				  struct extent_buffer *eb,
70831db9f7cSAlexander Block 				  struct btrfs_timespec *ts)
70931db9f7cSAlexander Block {
71031db9f7cSAlexander Block 	struct btrfs_timespec bts;
71131db9f7cSAlexander Block 	read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
71231db9f7cSAlexander Block 	return tlv_put(sctx, attr, &bts, sizeof(bts));
71331db9f7cSAlexander Block }
71431db9f7cSAlexander Block 
71531db9f7cSAlexander Block 
716895a72beSLiu Bo #define TLV_PUT(sctx, attrtype, data, attrlen) \
71731db9f7cSAlexander Block 	do { \
718895a72beSLiu Bo 		ret = tlv_put(sctx, attrtype, data, attrlen); \
71931db9f7cSAlexander Block 		if (ret < 0) \
72031db9f7cSAlexander Block 			goto tlv_put_failure; \
72131db9f7cSAlexander Block 	} while (0)
72231db9f7cSAlexander Block 
72331db9f7cSAlexander Block #define TLV_PUT_INT(sctx, attrtype, bits, value) \
72431db9f7cSAlexander Block 	do { \
72531db9f7cSAlexander Block 		ret = tlv_put_u##bits(sctx, attrtype, value); \
72631db9f7cSAlexander Block 		if (ret < 0) \
72731db9f7cSAlexander Block 			goto tlv_put_failure; \
72831db9f7cSAlexander Block 	} while (0)
72931db9f7cSAlexander Block 
73031db9f7cSAlexander Block #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
73131db9f7cSAlexander Block #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
73231db9f7cSAlexander Block #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
73331db9f7cSAlexander Block #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
73431db9f7cSAlexander Block #define TLV_PUT_STRING(sctx, attrtype, str, len) \
73531db9f7cSAlexander Block 	do { \
73631db9f7cSAlexander Block 		ret = tlv_put_string(sctx, attrtype, str, len); \
73731db9f7cSAlexander Block 		if (ret < 0) \
73831db9f7cSAlexander Block 			goto tlv_put_failure; \
73931db9f7cSAlexander Block 	} while (0)
74031db9f7cSAlexander Block #define TLV_PUT_PATH(sctx, attrtype, p) \
74131db9f7cSAlexander Block 	do { \
74231db9f7cSAlexander Block 		ret = tlv_put_string(sctx, attrtype, p->start, \
74331db9f7cSAlexander Block 			p->end - p->start); \
74431db9f7cSAlexander Block 		if (ret < 0) \
74531db9f7cSAlexander Block 			goto tlv_put_failure; \
74631db9f7cSAlexander Block 	} while(0)
74731db9f7cSAlexander Block #define TLV_PUT_UUID(sctx, attrtype, uuid) \
74831db9f7cSAlexander Block 	do { \
74931db9f7cSAlexander Block 		ret = tlv_put_uuid(sctx, attrtype, uuid); \
75031db9f7cSAlexander Block 		if (ret < 0) \
75131db9f7cSAlexander Block 			goto tlv_put_failure; \
75231db9f7cSAlexander Block 	} while (0)
75331db9f7cSAlexander Block #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
75431db9f7cSAlexander Block 	do { \
75531db9f7cSAlexander Block 		ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
75631db9f7cSAlexander Block 		if (ret < 0) \
75731db9f7cSAlexander Block 			goto tlv_put_failure; \
75831db9f7cSAlexander Block 	} while (0)
75931db9f7cSAlexander Block 
send_header(struct send_ctx * sctx)76031db9f7cSAlexander Block static int send_header(struct send_ctx *sctx)
76131db9f7cSAlexander Block {
76231db9f7cSAlexander Block 	struct btrfs_stream_header hdr;
76331db9f7cSAlexander Block 
76431db9f7cSAlexander Block 	strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
765d6815592SOmar Sandoval 	hdr.version = cpu_to_le32(sctx->proto);
7661bcea355SAnand Jain 	return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
7671bcea355SAnand Jain 					&sctx->send_off);
76831db9f7cSAlexander Block }
76931db9f7cSAlexander Block 
77031db9f7cSAlexander Block /*
77131db9f7cSAlexander Block  * For each command/item we want to send to userspace, we call this function.
77231db9f7cSAlexander Block  */
begin_cmd(struct send_ctx * sctx,int cmd)77331db9f7cSAlexander Block static int begin_cmd(struct send_ctx *sctx, int cmd)
77431db9f7cSAlexander Block {
77531db9f7cSAlexander Block 	struct btrfs_cmd_header *hdr;
77631db9f7cSAlexander Block 
777fae7f21cSDulshani Gunawardhana 	if (WARN_ON(!sctx->send_buf))
77831db9f7cSAlexander Block 		return -EINVAL;
77931db9f7cSAlexander Block 
7804eb8be94SDavid Sterba 	if (unlikely(sctx->send_size != 0)) {
7814eb8be94SDavid Sterba 		btrfs_err(sctx->send_root->fs_info,
7824eb8be94SDavid Sterba 			  "send: command header buffer not empty cmd %d offset %llu",
7834eb8be94SDavid Sterba 			  cmd, sctx->send_off);
7844eb8be94SDavid Sterba 		return -EINVAL;
7854eb8be94SDavid Sterba 	}
78631db9f7cSAlexander Block 
78731db9f7cSAlexander Block 	sctx->send_size += sizeof(*hdr);
78831db9f7cSAlexander Block 	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
789e2f896b3SDavid Sterba 	put_unaligned_le16(cmd, &hdr->cmd);
79031db9f7cSAlexander Block 
79131db9f7cSAlexander Block 	return 0;
79231db9f7cSAlexander Block }
79331db9f7cSAlexander Block 
send_cmd(struct send_ctx * sctx)79431db9f7cSAlexander Block static int send_cmd(struct send_ctx *sctx)
79531db9f7cSAlexander Block {
79631db9f7cSAlexander Block 	int ret;
79731db9f7cSAlexander Block 	struct btrfs_cmd_header *hdr;
79831db9f7cSAlexander Block 	u32 crc;
79931db9f7cSAlexander Block 
80031db9f7cSAlexander Block 	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
801e2f896b3SDavid Sterba 	put_unaligned_le32(sctx->send_size - sizeof(*hdr), &hdr->len);
802e2f896b3SDavid Sterba 	put_unaligned_le32(0, &hdr->crc);
80331db9f7cSAlexander Block 
80465019df8SJohannes Thumshirn 	crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
805e2f896b3SDavid Sterba 	put_unaligned_le32(crc, &hdr->crc);
80631db9f7cSAlexander Block 
8071bcea355SAnand Jain 	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
8081bcea355SAnand Jain 					&sctx->send_off);
80931db9f7cSAlexander Block 
81031db9f7cSAlexander Block 	sctx->send_size = 0;
811356bbbb6SOmar Sandoval 	sctx->put_data = false;
81231db9f7cSAlexander Block 
81331db9f7cSAlexander Block 	return ret;
81431db9f7cSAlexander Block }
81531db9f7cSAlexander Block 
81631db9f7cSAlexander Block /*
81731db9f7cSAlexander Block  * Sends a move instruction to user space
81831db9f7cSAlexander Block  */
send_rename(struct send_ctx * sctx,struct fs_path * from,struct fs_path * to)81931db9f7cSAlexander Block static int send_rename(struct send_ctx *sctx,
82031db9f7cSAlexander Block 		     struct fs_path *from, struct fs_path *to)
82131db9f7cSAlexander Block {
82204ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
82331db9f7cSAlexander Block 	int ret;
82431db9f7cSAlexander Block 
82504ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start);
82631db9f7cSAlexander Block 
82731db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
82831db9f7cSAlexander Block 	if (ret < 0)
82931db9f7cSAlexander Block 		goto out;
83031db9f7cSAlexander Block 
83131db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
83231db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
83331db9f7cSAlexander Block 
83431db9f7cSAlexander Block 	ret = send_cmd(sctx);
83531db9f7cSAlexander Block 
83631db9f7cSAlexander Block tlv_put_failure:
83731db9f7cSAlexander Block out:
83831db9f7cSAlexander Block 	return ret;
83931db9f7cSAlexander Block }
84031db9f7cSAlexander Block 
84131db9f7cSAlexander Block /*
84231db9f7cSAlexander Block  * Sends a link instruction to user space
84331db9f7cSAlexander Block  */
send_link(struct send_ctx * sctx,struct fs_path * path,struct fs_path * lnk)84431db9f7cSAlexander Block static int send_link(struct send_ctx *sctx,
84531db9f7cSAlexander Block 		     struct fs_path *path, struct fs_path *lnk)
84631db9f7cSAlexander Block {
84704ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
84831db9f7cSAlexander Block 	int ret;
84931db9f7cSAlexander Block 
85004ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start);
85131db9f7cSAlexander Block 
85231db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
85331db9f7cSAlexander Block 	if (ret < 0)
85431db9f7cSAlexander Block 		goto out;
85531db9f7cSAlexander Block 
85631db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
85731db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
85831db9f7cSAlexander Block 
85931db9f7cSAlexander Block 	ret = send_cmd(sctx);
86031db9f7cSAlexander Block 
86131db9f7cSAlexander Block tlv_put_failure:
86231db9f7cSAlexander Block out:
86331db9f7cSAlexander Block 	return ret;
86431db9f7cSAlexander Block }
86531db9f7cSAlexander Block 
86631db9f7cSAlexander Block /*
86731db9f7cSAlexander Block  * Sends an unlink instruction to user space
86831db9f7cSAlexander Block  */
send_unlink(struct send_ctx * sctx,struct fs_path * path)86931db9f7cSAlexander Block static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
87031db9f7cSAlexander Block {
87104ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
87231db9f7cSAlexander Block 	int ret;
87331db9f7cSAlexander Block 
87404ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_unlink %s", path->start);
87531db9f7cSAlexander Block 
87631db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
87731db9f7cSAlexander Block 	if (ret < 0)
87831db9f7cSAlexander Block 		goto out;
87931db9f7cSAlexander Block 
88031db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
88131db9f7cSAlexander Block 
88231db9f7cSAlexander Block 	ret = send_cmd(sctx);
88331db9f7cSAlexander Block 
88431db9f7cSAlexander Block tlv_put_failure:
88531db9f7cSAlexander Block out:
88631db9f7cSAlexander Block 	return ret;
88731db9f7cSAlexander Block }
88831db9f7cSAlexander Block 
88931db9f7cSAlexander Block /*
89031db9f7cSAlexander Block  * Sends a rmdir instruction to user space
89131db9f7cSAlexander Block  */
send_rmdir(struct send_ctx * sctx,struct fs_path * path)89231db9f7cSAlexander Block static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
89331db9f7cSAlexander Block {
89404ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
89531db9f7cSAlexander Block 	int ret;
89631db9f7cSAlexander Block 
89704ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_rmdir %s", path->start);
89831db9f7cSAlexander Block 
89931db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
90031db9f7cSAlexander Block 	if (ret < 0)
90131db9f7cSAlexander Block 		goto out;
90231db9f7cSAlexander Block 
90331db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
90431db9f7cSAlexander Block 
90531db9f7cSAlexander Block 	ret = send_cmd(sctx);
90631db9f7cSAlexander Block 
90731db9f7cSAlexander Block tlv_put_failure:
90831db9f7cSAlexander Block out:
90931db9f7cSAlexander Block 	return ret;
91031db9f7cSAlexander Block }
91131db9f7cSAlexander Block 
9127e93f6dcSBingJing Chang struct btrfs_inode_info {
9137e93f6dcSBingJing Chang 	u64 size;
9147e93f6dcSBingJing Chang 	u64 gen;
9157e93f6dcSBingJing Chang 	u64 mode;
9167e93f6dcSBingJing Chang 	u64 uid;
9177e93f6dcSBingJing Chang 	u64 gid;
9187e93f6dcSBingJing Chang 	u64 rdev;
9197e93f6dcSBingJing Chang 	u64 fileattr;
9209ed0a72eSBingJing Chang 	u64 nlink;
9217e93f6dcSBingJing Chang };
9227e93f6dcSBingJing Chang 
92331db9f7cSAlexander Block /*
92431db9f7cSAlexander Block  * Helper function to retrieve some fields from an inode item.
92531db9f7cSAlexander Block  */
get_inode_info(struct btrfs_root * root,u64 ino,struct btrfs_inode_info * info)9267e93f6dcSBingJing Chang static int get_inode_info(struct btrfs_root *root, u64 ino,
9277e93f6dcSBingJing Chang 			  struct btrfs_inode_info *info)
92831db9f7cSAlexander Block {
92931db9f7cSAlexander Block 	int ret;
9307e93f6dcSBingJing Chang 	struct btrfs_path *path;
93131db9f7cSAlexander Block 	struct btrfs_inode_item *ii;
93231db9f7cSAlexander Block 	struct btrfs_key key;
93331db9f7cSAlexander Block 
9347e93f6dcSBingJing Chang 	path = alloc_path_for_send();
9357e93f6dcSBingJing Chang 	if (!path)
9367e93f6dcSBingJing Chang 		return -ENOMEM;
9377e93f6dcSBingJing Chang 
93831db9f7cSAlexander Block 	key.objectid = ino;
93931db9f7cSAlexander Block 	key.type = BTRFS_INODE_ITEM_KEY;
94031db9f7cSAlexander Block 	key.offset = 0;
94131db9f7cSAlexander Block 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
94231db9f7cSAlexander Block 	if (ret) {
9433f8a18ccSJosef Bacik 		if (ret > 0)
94431db9f7cSAlexander Block 			ret = -ENOENT;
9457e93f6dcSBingJing Chang 		goto out;
94631db9f7cSAlexander Block 	}
94731db9f7cSAlexander Block 
9487e93f6dcSBingJing Chang 	if (!info)
9497e93f6dcSBingJing Chang 		goto out;
9507e93f6dcSBingJing Chang 
95131db9f7cSAlexander Block 	ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
95231db9f7cSAlexander Block 			struct btrfs_inode_item);
9537e93f6dcSBingJing Chang 	info->size = btrfs_inode_size(path->nodes[0], ii);
9547e93f6dcSBingJing Chang 	info->gen = btrfs_inode_generation(path->nodes[0], ii);
9557e93f6dcSBingJing Chang 	info->mode = btrfs_inode_mode(path->nodes[0], ii);
9567e93f6dcSBingJing Chang 	info->uid = btrfs_inode_uid(path->nodes[0], ii);
9577e93f6dcSBingJing Chang 	info->gid = btrfs_inode_gid(path->nodes[0], ii);
9587e93f6dcSBingJing Chang 	info->rdev = btrfs_inode_rdev(path->nodes[0], ii);
9599ed0a72eSBingJing Chang 	info->nlink = btrfs_inode_nlink(path->nodes[0], ii);
96048247359SDavid Sterba 	/*
96148247359SDavid Sterba 	 * Transfer the unchanged u64 value of btrfs_inode_item::flags, that's
96248247359SDavid Sterba 	 * otherwise logically split to 32/32 parts.
96348247359SDavid Sterba 	 */
9647e93f6dcSBingJing Chang 	info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
96531db9f7cSAlexander Block 
9667e93f6dcSBingJing Chang out:
9677e93f6dcSBingJing Chang 	btrfs_free_path(path);
9683f8a18ccSJosef Bacik 	return ret;
9693f8a18ccSJosef Bacik }
9703f8a18ccSJosef Bacik 
get_inode_gen(struct btrfs_root * root,u64 ino,u64 * gen)9717e93f6dcSBingJing Chang static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
9723f8a18ccSJosef Bacik {
9733f8a18ccSJosef Bacik 	int ret;
974ab199013SJosef Bacik 	struct btrfs_inode_info info = { 0 };
9753f8a18ccSJosef Bacik 
976ab199013SJosef Bacik 	ASSERT(gen);
9777e93f6dcSBingJing Chang 
9787e93f6dcSBingJing Chang 	ret = get_inode_info(root, ino, &info);
9797e93f6dcSBingJing Chang 	*gen = info.gen;
98031db9f7cSAlexander Block 	return ret;
98131db9f7cSAlexander Block }
98231db9f7cSAlexander Block 
98331db9f7cSAlexander Block typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
98431db9f7cSAlexander Block 				   struct fs_path *p,
98531db9f7cSAlexander Block 				   void *ctx);
98631db9f7cSAlexander Block 
98731db9f7cSAlexander Block /*
98896b5bd77SJan Schmidt  * Helper function to iterate the entries in ONE btrfs_inode_ref or
98996b5bd77SJan Schmidt  * btrfs_inode_extref.
99031db9f7cSAlexander Block  * The iterate callback may return a non zero value to stop iteration. This can
99131db9f7cSAlexander Block  * be a negative value for error codes or 1 to simply stop it.
99231db9f7cSAlexander Block  *
99396b5bd77SJan Schmidt  * path must point to the INODE_REF or INODE_EXTREF when called.
99431db9f7cSAlexander Block  */
iterate_inode_ref(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * found_key,int resolve,iterate_inode_ref_t iterate,void * ctx)995924794c9STsutomu Itoh static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
99631db9f7cSAlexander Block 			     struct btrfs_key *found_key, int resolve,
99731db9f7cSAlexander Block 			     iterate_inode_ref_t iterate, void *ctx)
99831db9f7cSAlexander Block {
99996b5bd77SJan Schmidt 	struct extent_buffer *eb = path->nodes[0];
100031db9f7cSAlexander Block 	struct btrfs_inode_ref *iref;
100196b5bd77SJan Schmidt 	struct btrfs_inode_extref *extref;
100231db9f7cSAlexander Block 	struct btrfs_path *tmp_path;
100331db9f7cSAlexander Block 	struct fs_path *p;
100496b5bd77SJan Schmidt 	u32 cur = 0;
100531db9f7cSAlexander Block 	u32 total;
100696b5bd77SJan Schmidt 	int slot = path->slots[0];
100731db9f7cSAlexander Block 	u32 name_len;
100831db9f7cSAlexander Block 	char *start;
100931db9f7cSAlexander Block 	int ret = 0;
101096b5bd77SJan Schmidt 	int num = 0;
101131db9f7cSAlexander Block 	int index;
101296b5bd77SJan Schmidt 	u64 dir;
101396b5bd77SJan Schmidt 	unsigned long name_off;
101496b5bd77SJan Schmidt 	unsigned long elem_size;
101596b5bd77SJan Schmidt 	unsigned long ptr;
101631db9f7cSAlexander Block 
1017924794c9STsutomu Itoh 	p = fs_path_alloc_reversed();
101831db9f7cSAlexander Block 	if (!p)
101931db9f7cSAlexander Block 		return -ENOMEM;
102031db9f7cSAlexander Block 
102131db9f7cSAlexander Block 	tmp_path = alloc_path_for_send();
102231db9f7cSAlexander Block 	if (!tmp_path) {
1023924794c9STsutomu Itoh 		fs_path_free(p);
102431db9f7cSAlexander Block 		return -ENOMEM;
102531db9f7cSAlexander Block 	}
102631db9f7cSAlexander Block 
102731db9f7cSAlexander Block 
102896b5bd77SJan Schmidt 	if (found_key->type == BTRFS_INODE_REF_KEY) {
102996b5bd77SJan Schmidt 		ptr = (unsigned long)btrfs_item_ptr(eb, slot,
103096b5bd77SJan Schmidt 						    struct btrfs_inode_ref);
10313212fa14SJosef Bacik 		total = btrfs_item_size(eb, slot);
103296b5bd77SJan Schmidt 		elem_size = sizeof(*iref);
103396b5bd77SJan Schmidt 	} else {
103496b5bd77SJan Schmidt 		ptr = btrfs_item_ptr_offset(eb, slot);
10353212fa14SJosef Bacik 		total = btrfs_item_size(eb, slot);
103696b5bd77SJan Schmidt 		elem_size = sizeof(*extref);
103796b5bd77SJan Schmidt 	}
103896b5bd77SJan Schmidt 
103931db9f7cSAlexander Block 	while (cur < total) {
104031db9f7cSAlexander Block 		fs_path_reset(p);
104131db9f7cSAlexander Block 
104296b5bd77SJan Schmidt 		if (found_key->type == BTRFS_INODE_REF_KEY) {
104396b5bd77SJan Schmidt 			iref = (struct btrfs_inode_ref *)(ptr + cur);
104431db9f7cSAlexander Block 			name_len = btrfs_inode_ref_name_len(eb, iref);
104596b5bd77SJan Schmidt 			name_off = (unsigned long)(iref + 1);
104631db9f7cSAlexander Block 			index = btrfs_inode_ref_index(eb, iref);
104796b5bd77SJan Schmidt 			dir = found_key->offset;
104896b5bd77SJan Schmidt 		} else {
104996b5bd77SJan Schmidt 			extref = (struct btrfs_inode_extref *)(ptr + cur);
105096b5bd77SJan Schmidt 			name_len = btrfs_inode_extref_name_len(eb, extref);
105196b5bd77SJan Schmidt 			name_off = (unsigned long)&extref->name;
105296b5bd77SJan Schmidt 			index = btrfs_inode_extref_index(eb, extref);
105396b5bd77SJan Schmidt 			dir = btrfs_inode_extref_parent(eb, extref);
105496b5bd77SJan Schmidt 		}
105596b5bd77SJan Schmidt 
105631db9f7cSAlexander Block 		if (resolve) {
105796b5bd77SJan Schmidt 			start = btrfs_ref_to_path(root, tmp_path, name_len,
105896b5bd77SJan Schmidt 						  name_off, eb, dir,
105996b5bd77SJan Schmidt 						  p->buf, p->buf_len);
106031db9f7cSAlexander Block 			if (IS_ERR(start)) {
106131db9f7cSAlexander Block 				ret = PTR_ERR(start);
106231db9f7cSAlexander Block 				goto out;
106331db9f7cSAlexander Block 			}
106431db9f7cSAlexander Block 			if (start < p->buf) {
106531db9f7cSAlexander Block 				/* overflow , try again with larger buffer */
106631db9f7cSAlexander Block 				ret = fs_path_ensure_buf(p,
106731db9f7cSAlexander Block 						p->buf_len + p->buf - start);
106831db9f7cSAlexander Block 				if (ret < 0)
106931db9f7cSAlexander Block 					goto out;
107096b5bd77SJan Schmidt 				start = btrfs_ref_to_path(root, tmp_path,
107196b5bd77SJan Schmidt 							  name_len, name_off,
107296b5bd77SJan Schmidt 							  eb, dir,
107396b5bd77SJan Schmidt 							  p->buf, p->buf_len);
107431db9f7cSAlexander Block 				if (IS_ERR(start)) {
107531db9f7cSAlexander Block 					ret = PTR_ERR(start);
107631db9f7cSAlexander Block 					goto out;
107731db9f7cSAlexander Block 				}
1078c1363ed8SDavid Sterba 				if (unlikely(start < p->buf)) {
1079c1363ed8SDavid Sterba 					btrfs_err(root->fs_info,
1080c1363ed8SDavid Sterba 			"send: path ref buffer underflow for key (%llu %u %llu)",
1081c1363ed8SDavid Sterba 						  found_key->objectid,
1082c1363ed8SDavid Sterba 						  found_key->type,
1083c1363ed8SDavid Sterba 						  found_key->offset);
1084c1363ed8SDavid Sterba 					ret = -EINVAL;
1085c1363ed8SDavid Sterba 					goto out;
1086c1363ed8SDavid Sterba 				}
108731db9f7cSAlexander Block 			}
108831db9f7cSAlexander Block 			p->start = start;
108931db9f7cSAlexander Block 		} else {
109096b5bd77SJan Schmidt 			ret = fs_path_add_from_extent_buffer(p, eb, name_off,
109196b5bd77SJan Schmidt 							     name_len);
109231db9f7cSAlexander Block 			if (ret < 0)
109331db9f7cSAlexander Block 				goto out;
109431db9f7cSAlexander Block 		}
109531db9f7cSAlexander Block 
109696b5bd77SJan Schmidt 		cur += elem_size + name_len;
109796b5bd77SJan Schmidt 		ret = iterate(num, dir, index, p, ctx);
109831db9f7cSAlexander Block 		if (ret)
109931db9f7cSAlexander Block 			goto out;
110031db9f7cSAlexander Block 		num++;
110131db9f7cSAlexander Block 	}
110231db9f7cSAlexander Block 
110331db9f7cSAlexander Block out:
110431db9f7cSAlexander Block 	btrfs_free_path(tmp_path);
1105924794c9STsutomu Itoh 	fs_path_free(p);
110631db9f7cSAlexander Block 	return ret;
110731db9f7cSAlexander Block }
110831db9f7cSAlexander Block 
110931db9f7cSAlexander Block typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
111031db9f7cSAlexander Block 				  const char *name, int name_len,
111131db9f7cSAlexander Block 				  const char *data, int data_len,
1112b1dea4e7SOmar Sandoval 				  void *ctx);
111331db9f7cSAlexander Block 
111431db9f7cSAlexander Block /*
111531db9f7cSAlexander Block  * Helper function to iterate the entries in ONE btrfs_dir_item.
111631db9f7cSAlexander Block  * The iterate callback may return a non zero value to stop iteration. This can
111731db9f7cSAlexander Block  * be a negative value for error codes or 1 to simply stop it.
111831db9f7cSAlexander Block  *
111931db9f7cSAlexander Block  * path must point to the dir item when called.
112031db9f7cSAlexander Block  */
iterate_dir_item(struct btrfs_root * root,struct btrfs_path * path,iterate_dir_item_t iterate,void * ctx)1121924794c9STsutomu Itoh static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
112231db9f7cSAlexander Block 			    iterate_dir_item_t iterate, void *ctx)
112331db9f7cSAlexander Block {
112431db9f7cSAlexander Block 	int ret = 0;
112531db9f7cSAlexander Block 	struct extent_buffer *eb;
112631db9f7cSAlexander Block 	struct btrfs_dir_item *di;
112731db9f7cSAlexander Block 	struct btrfs_key di_key;
112831db9f7cSAlexander Block 	char *buf = NULL;
11297e3ae33eSFilipe Manana 	int buf_len;
113031db9f7cSAlexander Block 	u32 name_len;
113131db9f7cSAlexander Block 	u32 data_len;
113231db9f7cSAlexander Block 	u32 cur;
113331db9f7cSAlexander Block 	u32 len;
113431db9f7cSAlexander Block 	u32 total;
113531db9f7cSAlexander Block 	int slot;
113631db9f7cSAlexander Block 	int num;
113731db9f7cSAlexander Block 
11384395e0c4SFilipe Manana 	/*
11394395e0c4SFilipe Manana 	 * Start with a small buffer (1 page). If later we end up needing more
11404395e0c4SFilipe Manana 	 * space, which can happen for xattrs on a fs with a leaf size greater
11414395e0c4SFilipe Manana 	 * then the page size, attempt to increase the buffer. Typically xattr
11424395e0c4SFilipe Manana 	 * values are small.
11434395e0c4SFilipe Manana 	 */
11447e3ae33eSFilipe Manana 	buf_len = PATH_MAX;
1145e780b0d1SDavid Sterba 	buf = kmalloc(buf_len, GFP_KERNEL);
114631db9f7cSAlexander Block 	if (!buf) {
114731db9f7cSAlexander Block 		ret = -ENOMEM;
114831db9f7cSAlexander Block 		goto out;
114931db9f7cSAlexander Block 	}
115031db9f7cSAlexander Block 
115131db9f7cSAlexander Block 	eb = path->nodes[0];
115231db9f7cSAlexander Block 	slot = path->slots[0];
115331db9f7cSAlexander Block 	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
115431db9f7cSAlexander Block 	cur = 0;
115531db9f7cSAlexander Block 	len = 0;
11563212fa14SJosef Bacik 	total = btrfs_item_size(eb, slot);
115731db9f7cSAlexander Block 
115831db9f7cSAlexander Block 	num = 0;
115931db9f7cSAlexander Block 	while (cur < total) {
116031db9f7cSAlexander Block 		name_len = btrfs_dir_name_len(eb, di);
116131db9f7cSAlexander Block 		data_len = btrfs_dir_data_len(eb, di);
116231db9f7cSAlexander Block 		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
116331db9f7cSAlexander Block 
116494a48aefSOmar Sandoval 		if (btrfs_dir_ftype(eb, di) == BTRFS_FT_XATTR) {
11657e3ae33eSFilipe Manana 			if (name_len > XATTR_NAME_MAX) {
11667e3ae33eSFilipe Manana 				ret = -ENAMETOOLONG;
11677e3ae33eSFilipe Manana 				goto out;
11687e3ae33eSFilipe Manana 			}
1169da17066cSJeff Mahoney 			if (name_len + data_len >
1170da17066cSJeff Mahoney 					BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
11717e3ae33eSFilipe Manana 				ret = -E2BIG;
11727e3ae33eSFilipe Manana 				goto out;
11737e3ae33eSFilipe Manana 			}
11747e3ae33eSFilipe Manana 		} else {
1175ace01050SDavid Sterba 			/*
1176ace01050SDavid Sterba 			 * Path too long
1177ace01050SDavid Sterba 			 */
11784395e0c4SFilipe Manana 			if (name_len + data_len > PATH_MAX) {
1179ace01050SDavid Sterba 				ret = -ENAMETOOLONG;
118031db9f7cSAlexander Block 				goto out;
118131db9f7cSAlexander Block 			}
11827e3ae33eSFilipe Manana 		}
118331db9f7cSAlexander Block 
11844395e0c4SFilipe Manana 		if (name_len + data_len > buf_len) {
11854395e0c4SFilipe Manana 			buf_len = name_len + data_len;
11864395e0c4SFilipe Manana 			if (is_vmalloc_addr(buf)) {
11874395e0c4SFilipe Manana 				vfree(buf);
11884395e0c4SFilipe Manana 				buf = NULL;
11894395e0c4SFilipe Manana 			} else {
11904395e0c4SFilipe Manana 				char *tmp = krealloc(buf, buf_len,
1191e780b0d1SDavid Sterba 						GFP_KERNEL | __GFP_NOWARN);
11924395e0c4SFilipe Manana 
11934395e0c4SFilipe Manana 				if (!tmp)
11944395e0c4SFilipe Manana 					kfree(buf);
11954395e0c4SFilipe Manana 				buf = tmp;
11964395e0c4SFilipe Manana 			}
11974395e0c4SFilipe Manana 			if (!buf) {
1198f11f7441SDavid Sterba 				buf = kvmalloc(buf_len, GFP_KERNEL);
11994395e0c4SFilipe Manana 				if (!buf) {
12004395e0c4SFilipe Manana 					ret = -ENOMEM;
12014395e0c4SFilipe Manana 					goto out;
12024395e0c4SFilipe Manana 				}
12034395e0c4SFilipe Manana 			}
12044395e0c4SFilipe Manana 		}
12054395e0c4SFilipe Manana 
120631db9f7cSAlexander Block 		read_extent_buffer(eb, buf, (unsigned long)(di + 1),
120731db9f7cSAlexander Block 				name_len + data_len);
120831db9f7cSAlexander Block 
120931db9f7cSAlexander Block 		len = sizeof(*di) + name_len + data_len;
121031db9f7cSAlexander Block 		di = (struct btrfs_dir_item *)((char *)di + len);
121131db9f7cSAlexander Block 		cur += len;
121231db9f7cSAlexander Block 
121331db9f7cSAlexander Block 		ret = iterate(num, &di_key, buf, name_len, buf + name_len,
1214b1dea4e7SOmar Sandoval 			      data_len, ctx);
121531db9f7cSAlexander Block 		if (ret < 0)
121631db9f7cSAlexander Block 			goto out;
121731db9f7cSAlexander Block 		if (ret) {
121831db9f7cSAlexander Block 			ret = 0;
121931db9f7cSAlexander Block 			goto out;
122031db9f7cSAlexander Block 		}
122131db9f7cSAlexander Block 
122231db9f7cSAlexander Block 		num++;
122331db9f7cSAlexander Block 	}
122431db9f7cSAlexander Block 
122531db9f7cSAlexander Block out:
12264395e0c4SFilipe Manana 	kvfree(buf);
122731db9f7cSAlexander Block 	return ret;
122831db9f7cSAlexander Block }
122931db9f7cSAlexander Block 
__copy_first_ref(int num,u64 dir,int index,struct fs_path * p,void * ctx)123031db9f7cSAlexander Block static int __copy_first_ref(int num, u64 dir, int index,
123131db9f7cSAlexander Block 			    struct fs_path *p, void *ctx)
123231db9f7cSAlexander Block {
123331db9f7cSAlexander Block 	int ret;
123431db9f7cSAlexander Block 	struct fs_path *pt = ctx;
123531db9f7cSAlexander Block 
123631db9f7cSAlexander Block 	ret = fs_path_copy(pt, p);
123731db9f7cSAlexander Block 	if (ret < 0)
123831db9f7cSAlexander Block 		return ret;
123931db9f7cSAlexander Block 
124031db9f7cSAlexander Block 	/* we want the first only */
124131db9f7cSAlexander Block 	return 1;
124231db9f7cSAlexander Block }
124331db9f7cSAlexander Block 
124431db9f7cSAlexander Block /*
124531db9f7cSAlexander Block  * Retrieve the first path of an inode. If an inode has more then one
124631db9f7cSAlexander Block  * ref/hardlink, this is ignored.
124731db9f7cSAlexander Block  */
get_inode_path(struct btrfs_root * root,u64 ino,struct fs_path * path)1248924794c9STsutomu Itoh static int get_inode_path(struct btrfs_root *root,
124931db9f7cSAlexander Block 			  u64 ino, struct fs_path *path)
125031db9f7cSAlexander Block {
125131db9f7cSAlexander Block 	int ret;
125231db9f7cSAlexander Block 	struct btrfs_key key, found_key;
125331db9f7cSAlexander Block 	struct btrfs_path *p;
125431db9f7cSAlexander Block 
125531db9f7cSAlexander Block 	p = alloc_path_for_send();
125631db9f7cSAlexander Block 	if (!p)
125731db9f7cSAlexander Block 		return -ENOMEM;
125831db9f7cSAlexander Block 
125931db9f7cSAlexander Block 	fs_path_reset(path);
126031db9f7cSAlexander Block 
126131db9f7cSAlexander Block 	key.objectid = ino;
126231db9f7cSAlexander Block 	key.type = BTRFS_INODE_REF_KEY;
126331db9f7cSAlexander Block 	key.offset = 0;
126431db9f7cSAlexander Block 
126531db9f7cSAlexander Block 	ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
126631db9f7cSAlexander Block 	if (ret < 0)
126731db9f7cSAlexander Block 		goto out;
126831db9f7cSAlexander Block 	if (ret) {
126931db9f7cSAlexander Block 		ret = 1;
127031db9f7cSAlexander Block 		goto out;
127131db9f7cSAlexander Block 	}
127231db9f7cSAlexander Block 	btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
127331db9f7cSAlexander Block 	if (found_key.objectid != ino ||
127496b5bd77SJan Schmidt 	    (found_key.type != BTRFS_INODE_REF_KEY &&
127596b5bd77SJan Schmidt 	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
127631db9f7cSAlexander Block 		ret = -ENOENT;
127731db9f7cSAlexander Block 		goto out;
127831db9f7cSAlexander Block 	}
127931db9f7cSAlexander Block 
1280924794c9STsutomu Itoh 	ret = iterate_inode_ref(root, p, &found_key, 1,
128131db9f7cSAlexander Block 				__copy_first_ref, path);
128231db9f7cSAlexander Block 	if (ret < 0)
128331db9f7cSAlexander Block 		goto out;
128431db9f7cSAlexander Block 	ret = 0;
128531db9f7cSAlexander Block 
128631db9f7cSAlexander Block out:
128731db9f7cSAlexander Block 	btrfs_free_path(p);
128831db9f7cSAlexander Block 	return ret;
128931db9f7cSAlexander Block }
129031db9f7cSAlexander Block 
129131db9f7cSAlexander Block struct backref_ctx {
129231db9f7cSAlexander Block 	struct send_ctx *sctx;
129331db9f7cSAlexander Block 
129431db9f7cSAlexander Block 	/* number of total found references */
129531db9f7cSAlexander Block 	u64 found;
129631db9f7cSAlexander Block 
129731db9f7cSAlexander Block 	/*
129831db9f7cSAlexander Block 	 * used for clones found in send_root. clones found behind cur_objectid
129931db9f7cSAlexander Block 	 * and cur_offset are not considered as allowed clones.
130031db9f7cSAlexander Block 	 */
130131db9f7cSAlexander Block 	u64 cur_objectid;
130231db9f7cSAlexander Block 	u64 cur_offset;
130331db9f7cSAlexander Block 
130431db9f7cSAlexander Block 	/* may be truncated in case it's the last extent in a file */
130531db9f7cSAlexander Block 	u64 extent_len;
1306f73853c7SFilipe Manana 
1307f73853c7SFilipe Manana 	/* The bytenr the file extent item we are processing refers to. */
1308f73853c7SFilipe Manana 	u64 bytenr;
1309adf02418SFilipe Manana 	/* The owner (root id) of the data backref for the current extent. */
1310adf02418SFilipe Manana 	u64 backref_owner;
1311adf02418SFilipe Manana 	/* The offset of the data backref for the current extent. */
1312adf02418SFilipe Manana 	u64 backref_offset;
131331db9f7cSAlexander Block };
131431db9f7cSAlexander Block 
__clone_root_cmp_bsearch(const void * key,const void * elt)131531db9f7cSAlexander Block static int __clone_root_cmp_bsearch(const void *key, const void *elt)
131631db9f7cSAlexander Block {
1317995e01b7SJan Schmidt 	u64 root = (u64)(uintptr_t)key;
1318214cc184SDavid Sterba 	const struct clone_root *cr = elt;
131931db9f7cSAlexander Block 
13204fd786e6SMisono Tomohiro 	if (root < cr->root->root_key.objectid)
132131db9f7cSAlexander Block 		return -1;
13224fd786e6SMisono Tomohiro 	if (root > cr->root->root_key.objectid)
132331db9f7cSAlexander Block 		return 1;
132431db9f7cSAlexander Block 	return 0;
132531db9f7cSAlexander Block }
132631db9f7cSAlexander Block 
__clone_root_cmp_sort(const void * e1,const void * e2)132731db9f7cSAlexander Block static int __clone_root_cmp_sort(const void *e1, const void *e2)
132831db9f7cSAlexander Block {
1329214cc184SDavid Sterba 	const struct clone_root *cr1 = e1;
1330214cc184SDavid Sterba 	const struct clone_root *cr2 = e2;
133131db9f7cSAlexander Block 
13324fd786e6SMisono Tomohiro 	if (cr1->root->root_key.objectid < cr2->root->root_key.objectid)
133331db9f7cSAlexander Block 		return -1;
13344fd786e6SMisono Tomohiro 	if (cr1->root->root_key.objectid > cr2->root->root_key.objectid)
133531db9f7cSAlexander Block 		return 1;
133631db9f7cSAlexander Block 	return 0;
133731db9f7cSAlexander Block }
133831db9f7cSAlexander Block 
133931db9f7cSAlexander Block /*
134031db9f7cSAlexander Block  * Called for every backref that is found for the current extent.
134188ffb665SFilipe Manana  * Results are collected in sctx->clone_roots->ino/offset.
134231db9f7cSAlexander Block  */
iterate_backrefs(u64 ino,u64 offset,u64 num_bytes,u64 root_id,void * ctx_)134388ffb665SFilipe Manana static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id,
1344c7499a64SFilipe Manana 			    void *ctx_)
134531db9f7cSAlexander Block {
134631db9f7cSAlexander Block 	struct backref_ctx *bctx = ctx_;
134788ffb665SFilipe Manana 	struct clone_root *clone_root;
134831db9f7cSAlexander Block 
134931db9f7cSAlexander Block 	/* First check if the root is in the list of accepted clone sources */
135088ffb665SFilipe Manana 	clone_root = bsearch((void *)(uintptr_t)root_id, bctx->sctx->clone_roots,
135131db9f7cSAlexander Block 			     bctx->sctx->clone_roots_cnt,
135231db9f7cSAlexander Block 			     sizeof(struct clone_root),
135331db9f7cSAlexander Block 			     __clone_root_cmp_bsearch);
135488ffb665SFilipe Manana 	if (!clone_root)
135531db9f7cSAlexander Block 		return 0;
135631db9f7cSAlexander Block 
135788ffb665SFilipe Manana 	/* This is our own reference, bail out as we can't clone from it. */
135888ffb665SFilipe Manana 	if (clone_root->root == bctx->sctx->send_root &&
135931db9f7cSAlexander Block 	    ino == bctx->cur_objectid &&
136088ffb665SFilipe Manana 	    offset == bctx->cur_offset)
136188ffb665SFilipe Manana 		return 0;
136231db9f7cSAlexander Block 
136331db9f7cSAlexander Block 	/*
136431db9f7cSAlexander Block 	 * Make sure we don't consider clones from send_root that are
136531db9f7cSAlexander Block 	 * behind the current inode/offset.
136631db9f7cSAlexander Block 	 */
136788ffb665SFilipe Manana 	if (clone_root->root == bctx->sctx->send_root) {
136831db9f7cSAlexander Block 		/*
136911f2069cSFilipe Manana 		 * If the source inode was not yet processed we can't issue a
137011f2069cSFilipe Manana 		 * clone operation, as the source extent does not exist yet at
137111f2069cSFilipe Manana 		 * the destination of the stream.
137231db9f7cSAlexander Block 		 */
137311f2069cSFilipe Manana 		if (ino > bctx->cur_objectid)
137411f2069cSFilipe Manana 			return 0;
137511f2069cSFilipe Manana 		/*
137611f2069cSFilipe Manana 		 * We clone from the inode currently being sent as long as the
137711f2069cSFilipe Manana 		 * source extent is already processed, otherwise we could try
137811f2069cSFilipe Manana 		 * to clone from an extent that does not exist yet at the
137911f2069cSFilipe Manana 		 * destination of the stream.
138011f2069cSFilipe Manana 		 */
138111f2069cSFilipe Manana 		if (ino == bctx->cur_objectid &&
13829722b101SFilipe Manana 		    offset + bctx->extent_len >
13839722b101SFilipe Manana 		    bctx->sctx->cur_inode_next_write_offset)
138431db9f7cSAlexander Block 			return 0;
138531db9f7cSAlexander Block 	}
138631db9f7cSAlexander Block 
138731db9f7cSAlexander Block 	bctx->found++;
138888ffb665SFilipe Manana 	clone_root->found_ref = true;
1389c7499a64SFilipe Manana 
1390c7499a64SFilipe Manana 	/*
1391c7499a64SFilipe Manana 	 * If the given backref refers to a file extent item with a larger
1392c7499a64SFilipe Manana 	 * number of bytes than what we found before, use the new one so that
1393c7499a64SFilipe Manana 	 * we clone more optimally and end up doing less writes and getting
1394c7499a64SFilipe Manana 	 * less exclusive, non-shared extents at the destination.
1395c7499a64SFilipe Manana 	 */
139688ffb665SFilipe Manana 	if (num_bytes > clone_root->num_bytes) {
139788ffb665SFilipe Manana 		clone_root->ino = ino;
139888ffb665SFilipe Manana 		clone_root->offset = offset;
139988ffb665SFilipe Manana 		clone_root->num_bytes = num_bytes;
140088ffb665SFilipe Manana 
140188ffb665SFilipe Manana 		/*
140288ffb665SFilipe Manana 		 * Found a perfect candidate, so there's no need to continue
140388ffb665SFilipe Manana 		 * backref walking.
140488ffb665SFilipe Manana 		 */
140588ffb665SFilipe Manana 		if (num_bytes >= bctx->extent_len)
140688ffb665SFilipe Manana 			return BTRFS_ITERATE_EXTENT_INODES_STOP;
140731db9f7cSAlexander Block 	}
140831db9f7cSAlexander Block 
140931db9f7cSAlexander Block 	return 0;
141031db9f7cSAlexander Block }
141131db9f7cSAlexander Block 
lookup_backref_cache(u64 leaf_bytenr,void * ctx,const u64 ** root_ids_ret,int * root_count_ret)141266d04209SFilipe Manana static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
141366d04209SFilipe Manana 				 const u64 **root_ids_ret, int *root_count_ret)
141466d04209SFilipe Manana {
141588ffb665SFilipe Manana 	struct backref_ctx *bctx = ctx;
141688ffb665SFilipe Manana 	struct send_ctx *sctx = bctx->sctx;
141766d04209SFilipe Manana 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
141866d04209SFilipe Manana 	const u64 key = leaf_bytenr >> fs_info->sectorsize_bits;
141990b90d4aSFilipe Manana 	struct btrfs_lru_cache_entry *raw_entry;
142066d04209SFilipe Manana 	struct backref_cache_entry *entry;
142166d04209SFilipe Manana 
142290b90d4aSFilipe Manana 	if (btrfs_lru_cache_size(&sctx->backref_cache) == 0)
142366d04209SFilipe Manana 		return false;
142466d04209SFilipe Manana 
142566d04209SFilipe Manana 	/*
142666d04209SFilipe Manana 	 * If relocation happened since we first filled the cache, then we must
142766d04209SFilipe Manana 	 * empty the cache and can not use it, because even though we operate on
142866d04209SFilipe Manana 	 * read-only roots, their leaves and nodes may have been reallocated and
142966d04209SFilipe Manana 	 * now be used for different nodes/leaves of the same tree or some other
143066d04209SFilipe Manana 	 * tree.
143166d04209SFilipe Manana 	 *
143266d04209SFilipe Manana 	 * We are called from iterate_extent_inodes() while either holding a
143366d04209SFilipe Manana 	 * transaction handle or holding fs_info->commit_root_sem, so no need
143466d04209SFilipe Manana 	 * to take any lock here.
143566d04209SFilipe Manana 	 */
143690b90d4aSFilipe Manana 	if (fs_info->last_reloc_trans > sctx->backref_cache_last_reloc_trans) {
143790b90d4aSFilipe Manana 		btrfs_lru_cache_clear(&sctx->backref_cache);
143866d04209SFilipe Manana 		return false;
143966d04209SFilipe Manana 	}
144066d04209SFilipe Manana 
14410da0c560SFilipe Manana 	raw_entry = btrfs_lru_cache_lookup(&sctx->backref_cache, key, 0);
144290b90d4aSFilipe Manana 	if (!raw_entry)
144366d04209SFilipe Manana 		return false;
144466d04209SFilipe Manana 
144590b90d4aSFilipe Manana 	entry = container_of(raw_entry, struct backref_cache_entry, entry);
144666d04209SFilipe Manana 	*root_ids_ret = entry->root_ids;
144766d04209SFilipe Manana 	*root_count_ret = entry->num_roots;
144866d04209SFilipe Manana 
144966d04209SFilipe Manana 	return true;
145066d04209SFilipe Manana }
145166d04209SFilipe Manana 
store_backref_cache(u64 leaf_bytenr,const struct ulist * root_ids,void * ctx)145266d04209SFilipe Manana static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
145366d04209SFilipe Manana 				void *ctx)
145466d04209SFilipe Manana {
145588ffb665SFilipe Manana 	struct backref_ctx *bctx = ctx;
145688ffb665SFilipe Manana 	struct send_ctx *sctx = bctx->sctx;
145766d04209SFilipe Manana 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
145866d04209SFilipe Manana 	struct backref_cache_entry *new_entry;
145966d04209SFilipe Manana 	struct ulist_iterator uiter;
146066d04209SFilipe Manana 	struct ulist_node *node;
146166d04209SFilipe Manana 	int ret;
146266d04209SFilipe Manana 
146366d04209SFilipe Manana 	/*
146466d04209SFilipe Manana 	 * We're called while holding a transaction handle or while holding
146566d04209SFilipe Manana 	 * fs_info->commit_root_sem (at iterate_extent_inodes()), so must do a
146666d04209SFilipe Manana 	 * NOFS allocation.
146766d04209SFilipe Manana 	 */
146866d04209SFilipe Manana 	new_entry = kmalloc(sizeof(struct backref_cache_entry), GFP_NOFS);
146966d04209SFilipe Manana 	/* No worries, cache is optional. */
147066d04209SFilipe Manana 	if (!new_entry)
147166d04209SFilipe Manana 		return;
147266d04209SFilipe Manana 
147390b90d4aSFilipe Manana 	new_entry->entry.key = leaf_bytenr >> fs_info->sectorsize_bits;
14740da0c560SFilipe Manana 	new_entry->entry.gen = 0;
147566d04209SFilipe Manana 	new_entry->num_roots = 0;
147666d04209SFilipe Manana 	ULIST_ITER_INIT(&uiter);
147766d04209SFilipe Manana 	while ((node = ulist_next(root_ids, &uiter)) != NULL) {
147866d04209SFilipe Manana 		const u64 root_id = node->val;
147966d04209SFilipe Manana 		struct clone_root *root;
148066d04209SFilipe Manana 
148166d04209SFilipe Manana 		root = bsearch((void *)(uintptr_t)root_id, sctx->clone_roots,
148266d04209SFilipe Manana 			       sctx->clone_roots_cnt, sizeof(struct clone_root),
148366d04209SFilipe Manana 			       __clone_root_cmp_bsearch);
148466d04209SFilipe Manana 		if (!root)
148566d04209SFilipe Manana 			continue;
148666d04209SFilipe Manana 
148766d04209SFilipe Manana 		/* Too many roots, just exit, no worries as caching is optional. */
148866d04209SFilipe Manana 		if (new_entry->num_roots >= SEND_MAX_BACKREF_CACHE_ROOTS) {
148966d04209SFilipe Manana 			kfree(new_entry);
149066d04209SFilipe Manana 			return;
149166d04209SFilipe Manana 		}
149266d04209SFilipe Manana 
149366d04209SFilipe Manana 		new_entry->root_ids[new_entry->num_roots] = root_id;
149466d04209SFilipe Manana 		new_entry->num_roots++;
149566d04209SFilipe Manana 	}
149666d04209SFilipe Manana 
149766d04209SFilipe Manana 	/*
149866d04209SFilipe Manana 	 * We may have not added any roots to the new cache entry, which means
149966d04209SFilipe Manana 	 * none of the roots is part of the list of roots from which we are
150066d04209SFilipe Manana 	 * allowed to clone. Cache the new entry as it's still useful to avoid
150166d04209SFilipe Manana 	 * backref walking to determine which roots have a path to the leaf.
150290b90d4aSFilipe Manana 	 *
150390b90d4aSFilipe Manana 	 * Also use GFP_NOFS because we're called while holding a transaction
150490b90d4aSFilipe Manana 	 * handle or while holding fs_info->commit_root_sem.
150566d04209SFilipe Manana 	 */
150690b90d4aSFilipe Manana 	ret = btrfs_lru_cache_store(&sctx->backref_cache, &new_entry->entry,
150790b90d4aSFilipe Manana 				    GFP_NOFS);
150866d04209SFilipe Manana 	ASSERT(ret == 0 || ret == -ENOMEM);
150966d04209SFilipe Manana 	if (ret) {
151066d04209SFilipe Manana 		/* Caching is optional, no worries. */
151166d04209SFilipe Manana 		kfree(new_entry);
151266d04209SFilipe Manana 		return;
151366d04209SFilipe Manana 	}
151466d04209SFilipe Manana 
151566d04209SFilipe Manana 	/*
151666d04209SFilipe Manana 	 * We are called from iterate_extent_inodes() while either holding a
151766d04209SFilipe Manana 	 * transaction handle or holding fs_info->commit_root_sem, so no need
151866d04209SFilipe Manana 	 * to take any lock here.
151966d04209SFilipe Manana 	 */
152090b90d4aSFilipe Manana 	if (btrfs_lru_cache_size(&sctx->backref_cache) == 1)
152190b90d4aSFilipe Manana 		sctx->backref_cache_last_reloc_trans = fs_info->last_reloc_trans;
152266d04209SFilipe Manana }
152366d04209SFilipe Manana 
check_extent_item(u64 bytenr,const struct btrfs_extent_item * ei,const struct extent_buffer * leaf,void * ctx)1524f73853c7SFilipe Manana static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei,
1525f73853c7SFilipe Manana 			     const struct extent_buffer *leaf, void *ctx)
1526f73853c7SFilipe Manana {
1527f73853c7SFilipe Manana 	const u64 refs = btrfs_extent_refs(leaf, ei);
1528f73853c7SFilipe Manana 	const struct backref_ctx *bctx = ctx;
1529f73853c7SFilipe Manana 	const struct send_ctx *sctx = bctx->sctx;
1530f73853c7SFilipe Manana 
1531f73853c7SFilipe Manana 	if (bytenr == bctx->bytenr) {
1532f73853c7SFilipe Manana 		const u64 flags = btrfs_extent_flags(leaf, ei);
1533f73853c7SFilipe Manana 
1534f73853c7SFilipe Manana 		if (WARN_ON(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
1535f73853c7SFilipe Manana 			return -EUCLEAN;
1536f73853c7SFilipe Manana 
1537f73853c7SFilipe Manana 		/*
1538f73853c7SFilipe Manana 		 * If we have only one reference and only the send root as a
1539f73853c7SFilipe Manana 		 * clone source - meaning no clone roots were given in the
1540f73853c7SFilipe Manana 		 * struct btrfs_ioctl_send_args passed to the send ioctl - then
1541f73853c7SFilipe Manana 		 * it's our reference and there's no point in doing backref
1542f73853c7SFilipe Manana 		 * walking which is expensive, so exit early.
1543f73853c7SFilipe Manana 		 */
1544f73853c7SFilipe Manana 		if (refs == 1 && sctx->clone_roots_cnt == 1)
1545f73853c7SFilipe Manana 			return -ENOENT;
1546f73853c7SFilipe Manana 	}
1547f73853c7SFilipe Manana 
1548f73853c7SFilipe Manana 	/*
1549f73853c7SFilipe Manana 	 * Backreference walking (iterate_extent_inodes() below) is currently
1550f73853c7SFilipe Manana 	 * too expensive when an extent has a large number of references, both
1551f73853c7SFilipe Manana 	 * in time spent and used memory. So for now just fallback to write
1552f73853c7SFilipe Manana 	 * operations instead of clone operations when an extent has more than
1553f73853c7SFilipe Manana 	 * a certain amount of references.
1554f73853c7SFilipe Manana 	 */
1555f73853c7SFilipe Manana 	if (refs > SEND_MAX_EXTENT_REFS)
1556f73853c7SFilipe Manana 		return -ENOENT;
1557f73853c7SFilipe Manana 
1558f73853c7SFilipe Manana 	return 0;
1559f73853c7SFilipe Manana }
1560f73853c7SFilipe Manana 
skip_self_data_ref(u64 root,u64 ino,u64 offset,void * ctx)1561adf02418SFilipe Manana static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx)
1562adf02418SFilipe Manana {
1563adf02418SFilipe Manana 	const struct backref_ctx *bctx = ctx;
1564adf02418SFilipe Manana 
1565adf02418SFilipe Manana 	if (ino == bctx->cur_objectid &&
1566adf02418SFilipe Manana 	    root == bctx->backref_owner &&
1567adf02418SFilipe Manana 	    offset == bctx->backref_offset)
1568adf02418SFilipe Manana 		return true;
1569adf02418SFilipe Manana 
1570adf02418SFilipe Manana 	return false;
1571adf02418SFilipe Manana }
1572adf02418SFilipe Manana 
157331db9f7cSAlexander Block /*
1574766702efSAlexander Block  * Given an inode, offset and extent item, it finds a good clone for a clone
1575766702efSAlexander Block  * instruction. Returns -ENOENT when none could be found. The function makes
1576766702efSAlexander Block  * sure that the returned clone is usable at the point where sending is at the
1577766702efSAlexander Block  * moment. This means, that no clones are accepted which lie behind the current
1578766702efSAlexander Block  * inode+offset.
1579766702efSAlexander Block  *
158031db9f7cSAlexander Block  * path must point to the extent item when called.
158131db9f7cSAlexander Block  */
find_extent_clone(struct send_ctx * sctx,struct btrfs_path * path,u64 ino,u64 data_offset,u64 ino_size,struct clone_root ** found)158231db9f7cSAlexander Block static int find_extent_clone(struct send_ctx *sctx,
158331db9f7cSAlexander Block 			     struct btrfs_path *path,
158431db9f7cSAlexander Block 			     u64 ino, u64 data_offset,
158531db9f7cSAlexander Block 			     u64 ino_size,
158631db9f7cSAlexander Block 			     struct clone_root **found)
158731db9f7cSAlexander Block {
158804ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
158931db9f7cSAlexander Block 	int ret;
159031db9f7cSAlexander Block 	int extent_type;
159131db9f7cSAlexander Block 	u64 logical;
159274dd17fbSChris Mason 	u64 disk_byte;
159331db9f7cSAlexander Block 	u64 num_bytes;
159431db9f7cSAlexander Block 	struct btrfs_file_extent_item *fi;
159531db9f7cSAlexander Block 	struct extent_buffer *eb = path->nodes[0];
1596dce28150SGoldwyn Rodrigues 	struct backref_ctx backref_ctx = { 0 };
1597a2c8d27eSFilipe Manana 	struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 };
159831db9f7cSAlexander Block 	struct clone_root *cur_clone_root;
159974dd17fbSChris Mason 	int compressed;
160031db9f7cSAlexander Block 	u32 i;
160131db9f7cSAlexander Block 
160231db9f7cSAlexander Block 	/*
1603d3f41317SFilipe Manana 	 * With fallocate we can get prealloc extents beyond the inode's i_size,
1604d3f41317SFilipe Manana 	 * so we don't do anything here because clone operations can not clone
1605d3f41317SFilipe Manana 	 * to a range beyond i_size without increasing the i_size of the
1606d3f41317SFilipe Manana 	 * destination inode.
160731db9f7cSAlexander Block 	 */
1608d3f41317SFilipe Manana 	if (data_offset >= ino_size)
160961ce908aSFilipe Manana 		return 0;
161031db9f7cSAlexander Block 
161161ce908aSFilipe Manana 	fi = btrfs_item_ptr(eb, path->slots[0], struct btrfs_file_extent_item);
161231db9f7cSAlexander Block 	extent_type = btrfs_file_extent_type(eb, fi);
161361ce908aSFilipe Manana 	if (extent_type == BTRFS_FILE_EXTENT_INLINE)
161461ce908aSFilipe Manana 		return -ENOENT;
161531db9f7cSAlexander Block 
161674dd17fbSChris Mason 	disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
161761ce908aSFilipe Manana 	if (disk_byte == 0)
161861ce908aSFilipe Manana 		return -ENOENT;
161961ce908aSFilipe Manana 
162061ce908aSFilipe Manana 	compressed = btrfs_file_extent_compression(eb, fi);
162161ce908aSFilipe Manana 	num_bytes = btrfs_file_extent_num_bytes(eb, fi);
162274dd17fbSChris Mason 	logical = disk_byte + btrfs_file_extent_offset(eb, fi);
162331db9f7cSAlexander Block 
162431db9f7cSAlexander Block 	/*
162531db9f7cSAlexander Block 	 * Setup the clone roots.
162631db9f7cSAlexander Block 	 */
162731db9f7cSAlexander Block 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
162831db9f7cSAlexander Block 		cur_clone_root = sctx->clone_roots + i;
162931db9f7cSAlexander Block 		cur_clone_root->ino = (u64)-1;
163031db9f7cSAlexander Block 		cur_clone_root->offset = 0;
1631c7499a64SFilipe Manana 		cur_clone_root->num_bytes = 0;
163288ffb665SFilipe Manana 		cur_clone_root->found_ref = false;
163331db9f7cSAlexander Block 	}
163431db9f7cSAlexander Block 
1635dce28150SGoldwyn Rodrigues 	backref_ctx.sctx = sctx;
1636dce28150SGoldwyn Rodrigues 	backref_ctx.cur_objectid = ino;
1637dce28150SGoldwyn Rodrigues 	backref_ctx.cur_offset = data_offset;
1638f73853c7SFilipe Manana 	backref_ctx.bytenr = disk_byte;
1639adf02418SFilipe Manana 	/*
1640adf02418SFilipe Manana 	 * Use the header owner and not the send root's id, because in case of a
1641adf02418SFilipe Manana 	 * snapshot we can have shared subtrees.
1642adf02418SFilipe Manana 	 */
1643adf02418SFilipe Manana 	backref_ctx.backref_owner = btrfs_header_owner(eb);
1644adf02418SFilipe Manana 	backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi);
164531db9f7cSAlexander Block 
164631db9f7cSAlexander Block 	/*
164731db9f7cSAlexander Block 	 * The last extent of a file may be too large due to page alignment.
164831db9f7cSAlexander Block 	 * We need to adjust extent_len in this case so that the checks in
164988ffb665SFilipe Manana 	 * iterate_backrefs() work.
165031db9f7cSAlexander Block 	 */
165131db9f7cSAlexander Block 	if (data_offset + num_bytes >= ino_size)
1652dce28150SGoldwyn Rodrigues 		backref_ctx.extent_len = ino_size - data_offset;
1653344174a1SFilipe Manana 	else
1654344174a1SFilipe Manana 		backref_ctx.extent_len = num_bytes;
165531db9f7cSAlexander Block 
165631db9f7cSAlexander Block 	/*
165731db9f7cSAlexander Block 	 * Now collect all backrefs.
165831db9f7cSAlexander Block 	 */
1659f73853c7SFilipe Manana 	backref_walk_ctx.bytenr = disk_byte;
166074dd17fbSChris Mason 	if (compressed == BTRFS_COMPRESS_NONE)
1661f73853c7SFilipe Manana 		backref_walk_ctx.extent_item_pos = btrfs_file_extent_offset(eb, fi);
1662a2c8d27eSFilipe Manana 	backref_walk_ctx.fs_info = fs_info;
166366d04209SFilipe Manana 	backref_walk_ctx.cache_lookup = lookup_backref_cache;
166466d04209SFilipe Manana 	backref_walk_ctx.cache_store = store_backref_cache;
166588ffb665SFilipe Manana 	backref_walk_ctx.indirect_ref_iterator = iterate_backrefs;
1666f73853c7SFilipe Manana 	backref_walk_ctx.check_extent_item = check_extent_item;
166788ffb665SFilipe Manana 	backref_walk_ctx.user_ctx = &backref_ctx;
166874dd17fbSChris Mason 
1669adf02418SFilipe Manana 	/*
1670adf02418SFilipe Manana 	 * If have a single clone root, then it's the send root and we can tell
1671adf02418SFilipe Manana 	 * the backref walking code to skip our own backref and not resolve it,
1672adf02418SFilipe Manana 	 * since we can not use it for cloning - the source and destination
1673adf02418SFilipe Manana 	 * ranges can't overlap and in case the leaf is shared through a subtree
1674adf02418SFilipe Manana 	 * due to snapshots, we can't use those other roots since they are not
1675adf02418SFilipe Manana 	 * in the list of clone roots.
1676adf02418SFilipe Manana 	 */
1677adf02418SFilipe Manana 	if (sctx->clone_roots_cnt == 1)
1678adf02418SFilipe Manana 		backref_walk_ctx.skip_data_ref = skip_self_data_ref;
1679adf02418SFilipe Manana 
168088ffb665SFilipe Manana 	ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs,
1681a2c8d27eSFilipe Manana 				    &backref_ctx);
168231db9f7cSAlexander Block 	if (ret < 0)
1683f73853c7SFilipe Manana 		return ret;
168431db9f7cSAlexander Block 
1685d96b3424SFilipe Manana 	down_read(&fs_info->commit_root_sem);
1686d96b3424SFilipe Manana 	if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
1687d96b3424SFilipe Manana 		/*
1688d96b3424SFilipe Manana 		 * A transaction commit for a transaction in which block group
1689d96b3424SFilipe Manana 		 * relocation was done just happened.
1690d96b3424SFilipe Manana 		 * The disk_bytenr of the file extent item we processed is
1691d96b3424SFilipe Manana 		 * possibly stale, referring to the extent's location before
1692d96b3424SFilipe Manana 		 * relocation. So act as if we haven't found any clone sources
1693d96b3424SFilipe Manana 		 * and fallback to write commands, which will read the correct
1694d96b3424SFilipe Manana 		 * data from the new extent location. Otherwise we will fail
1695d96b3424SFilipe Manana 		 * below because we haven't found our own back reference or we
1696d96b3424SFilipe Manana 		 * could be getting incorrect sources in case the old extent
1697d96b3424SFilipe Manana 		 * was already reallocated after the relocation.
1698d96b3424SFilipe Manana 		 */
1699d96b3424SFilipe Manana 		up_read(&fs_info->commit_root_sem);
1700f73853c7SFilipe Manana 		return -ENOENT;
1701d96b3424SFilipe Manana 	}
1702d96b3424SFilipe Manana 	up_read(&fs_info->commit_root_sem);
1703d96b3424SFilipe Manana 
170404ab956eSJeff Mahoney 	btrfs_debug(fs_info,
170504ab956eSJeff Mahoney 		    "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu",
170631db9f7cSAlexander Block 		    data_offset, ino, num_bytes, logical);
170731db9f7cSAlexander Block 
170888ffb665SFilipe Manana 	if (!backref_ctx.found) {
170904ab956eSJeff Mahoney 		btrfs_debug(fs_info, "no clones found");
1710f73853c7SFilipe Manana 		return -ENOENT;
171188ffb665SFilipe Manana 	}
171231db9f7cSAlexander Block 
171331db9f7cSAlexander Block 	cur_clone_root = NULL;
171431db9f7cSAlexander Block 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1715c7499a64SFilipe Manana 		struct clone_root *clone_root = &sctx->clone_roots[i];
171631db9f7cSAlexander Block 
171788ffb665SFilipe Manana 		if (!clone_root->found_ref)
1718c7499a64SFilipe Manana 			continue;
1719c7499a64SFilipe Manana 
1720c7499a64SFilipe Manana 		/*
1721c7499a64SFilipe Manana 		 * Choose the root from which we can clone more bytes, to
1722c7499a64SFilipe Manana 		 * minimize write operations and therefore have more extent
1723c7499a64SFilipe Manana 		 * sharing at the destination (the same as in the source).
1724c7499a64SFilipe Manana 		 */
1725c7499a64SFilipe Manana 		if (!cur_clone_root ||
1726c7499a64SFilipe Manana 		    clone_root->num_bytes > cur_clone_root->num_bytes) {
1727c7499a64SFilipe Manana 			cur_clone_root = clone_root;
1728c7499a64SFilipe Manana 
1729c7499a64SFilipe Manana 			/*
1730c7499a64SFilipe Manana 			 * We found an optimal clone candidate (any inode from
1731c7499a64SFilipe Manana 			 * any root is fine), so we're done.
1732c7499a64SFilipe Manana 			 */
1733c7499a64SFilipe Manana 			if (clone_root->num_bytes >= backref_ctx.extent_len)
1734c7499a64SFilipe Manana 				break;
1735c7499a64SFilipe Manana 		}
173631db9f7cSAlexander Block 	}
173731db9f7cSAlexander Block 
173831db9f7cSAlexander Block 	if (cur_clone_root) {
173931db9f7cSAlexander Block 		*found = cur_clone_root;
174031db9f7cSAlexander Block 		ret = 0;
174131db9f7cSAlexander Block 	} else {
174231db9f7cSAlexander Block 		ret = -ENOENT;
174331db9f7cSAlexander Block 	}
174431db9f7cSAlexander Block 
174531db9f7cSAlexander Block 	return ret;
174631db9f7cSAlexander Block }
174731db9f7cSAlexander Block 
read_symlink(struct btrfs_root * root,u64 ino,struct fs_path * dest)1748924794c9STsutomu Itoh static int read_symlink(struct btrfs_root *root,
174931db9f7cSAlexander Block 			u64 ino,
175031db9f7cSAlexander Block 			struct fs_path *dest)
175131db9f7cSAlexander Block {
175231db9f7cSAlexander Block 	int ret;
175331db9f7cSAlexander Block 	struct btrfs_path *path;
175431db9f7cSAlexander Block 	struct btrfs_key key;
175531db9f7cSAlexander Block 	struct btrfs_file_extent_item *ei;
175631db9f7cSAlexander Block 	u8 type;
175731db9f7cSAlexander Block 	u8 compression;
175831db9f7cSAlexander Block 	unsigned long off;
175931db9f7cSAlexander Block 	int len;
176031db9f7cSAlexander Block 
176131db9f7cSAlexander Block 	path = alloc_path_for_send();
176231db9f7cSAlexander Block 	if (!path)
176331db9f7cSAlexander Block 		return -ENOMEM;
176431db9f7cSAlexander Block 
176531db9f7cSAlexander Block 	key.objectid = ino;
176631db9f7cSAlexander Block 	key.type = BTRFS_EXTENT_DATA_KEY;
176731db9f7cSAlexander Block 	key.offset = 0;
176831db9f7cSAlexander Block 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
176931db9f7cSAlexander Block 	if (ret < 0)
177031db9f7cSAlexander Block 		goto out;
1771a879719bSFilipe Manana 	if (ret) {
1772a879719bSFilipe Manana 		/*
1773a879719bSFilipe Manana 		 * An empty symlink inode. Can happen in rare error paths when
1774a879719bSFilipe Manana 		 * creating a symlink (transaction committed before the inode
1775a879719bSFilipe Manana 		 * eviction handler removed the symlink inode items and a crash
1776a879719bSFilipe Manana 		 * happened in between or the subvol was snapshoted in between).
1777a879719bSFilipe Manana 		 * Print an informative message to dmesg/syslog so that the user
1778a879719bSFilipe Manana 		 * can delete the symlink.
1779a879719bSFilipe Manana 		 */
1780a879719bSFilipe Manana 		btrfs_err(root->fs_info,
1781a879719bSFilipe Manana 			  "Found empty symlink inode %llu at root %llu",
1782a879719bSFilipe Manana 			  ino, root->root_key.objectid);
1783a879719bSFilipe Manana 		ret = -EIO;
1784a879719bSFilipe Manana 		goto out;
1785a879719bSFilipe Manana 	}
178631db9f7cSAlexander Block 
178731db9f7cSAlexander Block 	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
178831db9f7cSAlexander Block 			struct btrfs_file_extent_item);
178931db9f7cSAlexander Block 	type = btrfs_file_extent_type(path->nodes[0], ei);
17906f3eb72aSFilipe Manana 	if (unlikely(type != BTRFS_FILE_EXTENT_INLINE)) {
17916f3eb72aSFilipe Manana 		ret = -EUCLEAN;
17926f3eb72aSFilipe Manana 		btrfs_crit(root->fs_info,
17936f3eb72aSFilipe Manana "send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
17946f3eb72aSFilipe Manana 			   ino, btrfs_root_id(root), type);
17956f3eb72aSFilipe Manana 		goto out;
17966f3eb72aSFilipe Manana 	}
179731db9f7cSAlexander Block 	compression = btrfs_file_extent_compression(path->nodes[0], ei);
17986f3eb72aSFilipe Manana 	if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
17996f3eb72aSFilipe Manana 		ret = -EUCLEAN;
18006f3eb72aSFilipe Manana 		btrfs_crit(root->fs_info,
18016f3eb72aSFilipe Manana "send: found symlink extent with compression, ino %llu root %llu compression type %d",
18026f3eb72aSFilipe Manana 			   ino, btrfs_root_id(root), compression);
18036f3eb72aSFilipe Manana 		goto out;
18046f3eb72aSFilipe Manana 	}
180531db9f7cSAlexander Block 
180631db9f7cSAlexander Block 	off = btrfs_file_extent_inline_start(ei);
1807e41ca589SQu Wenruo 	len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
180831db9f7cSAlexander Block 
180931db9f7cSAlexander Block 	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
181031db9f7cSAlexander Block 
181131db9f7cSAlexander Block out:
181231db9f7cSAlexander Block 	btrfs_free_path(path);
181331db9f7cSAlexander Block 	return ret;
181431db9f7cSAlexander Block }
181531db9f7cSAlexander Block 
181631db9f7cSAlexander Block /*
181731db9f7cSAlexander Block  * Helper function to generate a file name that is unique in the root of
181831db9f7cSAlexander Block  * send_root and parent_root. This is used to generate names for orphan inodes.
181931db9f7cSAlexander Block  */
gen_unique_name(struct send_ctx * sctx,u64 ino,u64 gen,struct fs_path * dest)182031db9f7cSAlexander Block static int gen_unique_name(struct send_ctx *sctx,
182131db9f7cSAlexander Block 			   u64 ino, u64 gen,
182231db9f7cSAlexander Block 			   struct fs_path *dest)
182331db9f7cSAlexander Block {
182431db9f7cSAlexander Block 	int ret = 0;
182531db9f7cSAlexander Block 	struct btrfs_path *path;
182631db9f7cSAlexander Block 	struct btrfs_dir_item *di;
182731db9f7cSAlexander Block 	char tmp[64];
182831db9f7cSAlexander Block 	int len;
182931db9f7cSAlexander Block 	u64 idx = 0;
183031db9f7cSAlexander Block 
183131db9f7cSAlexander Block 	path = alloc_path_for_send();
183231db9f7cSAlexander Block 	if (!path)
183331db9f7cSAlexander Block 		return -ENOMEM;
183431db9f7cSAlexander Block 
183531db9f7cSAlexander Block 	while (1) {
18366db75318SSweet Tea Dorminy 		struct fscrypt_str tmp_name;
1837e43eec81SSweet Tea Dorminy 
1838f74b86d8SFilipe David Borba Manana 		len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
183931db9f7cSAlexander Block 				ino, gen, idx);
184064792f25SDavid Sterba 		ASSERT(len < sizeof(tmp));
1841e43eec81SSweet Tea Dorminy 		tmp_name.name = tmp;
1842e43eec81SSweet Tea Dorminy 		tmp_name.len = strlen(tmp);
184331db9f7cSAlexander Block 
184431db9f7cSAlexander Block 		di = btrfs_lookup_dir_item(NULL, sctx->send_root,
184531db9f7cSAlexander Block 				path, BTRFS_FIRST_FREE_OBJECTID,
1846e43eec81SSweet Tea Dorminy 				&tmp_name, 0);
184731db9f7cSAlexander Block 		btrfs_release_path(path);
184831db9f7cSAlexander Block 		if (IS_ERR(di)) {
184931db9f7cSAlexander Block 			ret = PTR_ERR(di);
185031db9f7cSAlexander Block 			goto out;
185131db9f7cSAlexander Block 		}
185231db9f7cSAlexander Block 		if (di) {
185331db9f7cSAlexander Block 			/* not unique, try again */
185431db9f7cSAlexander Block 			idx++;
185531db9f7cSAlexander Block 			continue;
185631db9f7cSAlexander Block 		}
185731db9f7cSAlexander Block 
185831db9f7cSAlexander Block 		if (!sctx->parent_root) {
185931db9f7cSAlexander Block 			/* unique */
186031db9f7cSAlexander Block 			ret = 0;
186131db9f7cSAlexander Block 			break;
186231db9f7cSAlexander Block 		}
186331db9f7cSAlexander Block 
186431db9f7cSAlexander Block 		di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
186531db9f7cSAlexander Block 				path, BTRFS_FIRST_FREE_OBJECTID,
1866e43eec81SSweet Tea Dorminy 				&tmp_name, 0);
186731db9f7cSAlexander Block 		btrfs_release_path(path);
186831db9f7cSAlexander Block 		if (IS_ERR(di)) {
186931db9f7cSAlexander Block 			ret = PTR_ERR(di);
187031db9f7cSAlexander Block 			goto out;
187131db9f7cSAlexander Block 		}
187231db9f7cSAlexander Block 		if (di) {
187331db9f7cSAlexander Block 			/* not unique, try again */
187431db9f7cSAlexander Block 			idx++;
187531db9f7cSAlexander Block 			continue;
187631db9f7cSAlexander Block 		}
187731db9f7cSAlexander Block 		/* unique */
187831db9f7cSAlexander Block 		break;
187931db9f7cSAlexander Block 	}
188031db9f7cSAlexander Block 
188131db9f7cSAlexander Block 	ret = fs_path_add(dest, tmp, strlen(tmp));
188231db9f7cSAlexander Block 
188331db9f7cSAlexander Block out:
188431db9f7cSAlexander Block 	btrfs_free_path(path);
188531db9f7cSAlexander Block 	return ret;
188631db9f7cSAlexander Block }
188731db9f7cSAlexander Block 
188831db9f7cSAlexander Block enum inode_state {
188931db9f7cSAlexander Block 	inode_state_no_change,
189031db9f7cSAlexander Block 	inode_state_will_create,
189131db9f7cSAlexander Block 	inode_state_did_create,
189231db9f7cSAlexander Block 	inode_state_will_delete,
189331db9f7cSAlexander Block 	inode_state_did_delete,
189431db9f7cSAlexander Block };
189531db9f7cSAlexander Block 
get_cur_inode_state(struct send_ctx * sctx,u64 ino,u64 gen,u64 * send_gen,u64 * parent_gen)1896498581f3SFilipe Manana static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen,
1897498581f3SFilipe Manana 			       u64 *send_gen, u64 *parent_gen)
189831db9f7cSAlexander Block {
189931db9f7cSAlexander Block 	int ret;
190031db9f7cSAlexander Block 	int left_ret;
190131db9f7cSAlexander Block 	int right_ret;
190231db9f7cSAlexander Block 	u64 left_gen;
19038ba7d5f5SGenjian Zhang 	u64 right_gen = 0;
19049ed0a72eSBingJing Chang 	struct btrfs_inode_info info;
190531db9f7cSAlexander Block 
19069ed0a72eSBingJing Chang 	ret = get_inode_info(sctx->send_root, ino, &info);
190731db9f7cSAlexander Block 	if (ret < 0 && ret != -ENOENT)
190831db9f7cSAlexander Block 		goto out;
19099ed0a72eSBingJing Chang 	left_ret = (info.nlink == 0) ? -ENOENT : ret;
19109ed0a72eSBingJing Chang 	left_gen = info.gen;
1911498581f3SFilipe Manana 	if (send_gen)
1912498581f3SFilipe Manana 		*send_gen = ((left_ret == -ENOENT) ? 0 : info.gen);
191331db9f7cSAlexander Block 
191431db9f7cSAlexander Block 	if (!sctx->parent_root) {
191531db9f7cSAlexander Block 		right_ret = -ENOENT;
191631db9f7cSAlexander Block 	} else {
19179ed0a72eSBingJing Chang 		ret = get_inode_info(sctx->parent_root, ino, &info);
191831db9f7cSAlexander Block 		if (ret < 0 && ret != -ENOENT)
191931db9f7cSAlexander Block 			goto out;
19209ed0a72eSBingJing Chang 		right_ret = (info.nlink == 0) ? -ENOENT : ret;
19219ed0a72eSBingJing Chang 		right_gen = info.gen;
1922498581f3SFilipe Manana 		if (parent_gen)
1923498581f3SFilipe Manana 			*parent_gen = ((right_ret == -ENOENT) ? 0 : info.gen);
192431db9f7cSAlexander Block 	}
192531db9f7cSAlexander Block 
192631db9f7cSAlexander Block 	if (!left_ret && !right_ret) {
1927e938c8adSAlexander Block 		if (left_gen == gen && right_gen == gen) {
192831db9f7cSAlexander Block 			ret = inode_state_no_change;
1929e938c8adSAlexander Block 		} else if (left_gen == gen) {
193031db9f7cSAlexander Block 			if (ino < sctx->send_progress)
193131db9f7cSAlexander Block 				ret = inode_state_did_create;
193231db9f7cSAlexander Block 			else
193331db9f7cSAlexander Block 				ret = inode_state_will_create;
193431db9f7cSAlexander Block 		} else if (right_gen == gen) {
193531db9f7cSAlexander Block 			if (ino < sctx->send_progress)
193631db9f7cSAlexander Block 				ret = inode_state_did_delete;
193731db9f7cSAlexander Block 			else
193831db9f7cSAlexander Block 				ret = inode_state_will_delete;
193931db9f7cSAlexander Block 		} else  {
194031db9f7cSAlexander Block 			ret = -ENOENT;
194131db9f7cSAlexander Block 		}
194231db9f7cSAlexander Block 	} else if (!left_ret) {
194331db9f7cSAlexander Block 		if (left_gen == gen) {
194431db9f7cSAlexander Block 			if (ino < sctx->send_progress)
194531db9f7cSAlexander Block 				ret = inode_state_did_create;
194631db9f7cSAlexander Block 			else
194731db9f7cSAlexander Block 				ret = inode_state_will_create;
194831db9f7cSAlexander Block 		} else {
194931db9f7cSAlexander Block 			ret = -ENOENT;
195031db9f7cSAlexander Block 		}
195131db9f7cSAlexander Block 	} else if (!right_ret) {
195231db9f7cSAlexander Block 		if (right_gen == gen) {
195331db9f7cSAlexander Block 			if (ino < sctx->send_progress)
195431db9f7cSAlexander Block 				ret = inode_state_did_delete;
195531db9f7cSAlexander Block 			else
195631db9f7cSAlexander Block 				ret = inode_state_will_delete;
195731db9f7cSAlexander Block 		} else {
195831db9f7cSAlexander Block 			ret = -ENOENT;
195931db9f7cSAlexander Block 		}
196031db9f7cSAlexander Block 	} else {
196131db9f7cSAlexander Block 		ret = -ENOENT;
196231db9f7cSAlexander Block 	}
196331db9f7cSAlexander Block 
196431db9f7cSAlexander Block out:
196531db9f7cSAlexander Block 	return ret;
196631db9f7cSAlexander Block }
196731db9f7cSAlexander Block 
is_inode_existent(struct send_ctx * sctx,u64 ino,u64 gen,u64 * send_gen,u64 * parent_gen)1968498581f3SFilipe Manana static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen,
1969498581f3SFilipe Manana 			     u64 *send_gen, u64 *parent_gen)
197031db9f7cSAlexander Block {
197131db9f7cSAlexander Block 	int ret;
197231db9f7cSAlexander Block 
19734dd9920dSRobbie Ko 	if (ino == BTRFS_FIRST_FREE_OBJECTID)
19744dd9920dSRobbie Ko 		return 1;
19754dd9920dSRobbie Ko 
1976498581f3SFilipe Manana 	ret = get_cur_inode_state(sctx, ino, gen, send_gen, parent_gen);
197731db9f7cSAlexander Block 	if (ret < 0)
197831db9f7cSAlexander Block 		goto out;
197931db9f7cSAlexander Block 
198031db9f7cSAlexander Block 	if (ret == inode_state_no_change ||
198131db9f7cSAlexander Block 	    ret == inode_state_did_create ||
198231db9f7cSAlexander Block 	    ret == inode_state_will_delete)
198331db9f7cSAlexander Block 		ret = 1;
198431db9f7cSAlexander Block 	else
198531db9f7cSAlexander Block 		ret = 0;
198631db9f7cSAlexander Block 
198731db9f7cSAlexander Block out:
198831db9f7cSAlexander Block 	return ret;
198931db9f7cSAlexander Block }
199031db9f7cSAlexander Block 
199131db9f7cSAlexander Block /*
199231db9f7cSAlexander Block  * Helper function to lookup a dir item in a dir.
199331db9f7cSAlexander Block  */
lookup_dir_item_inode(struct btrfs_root * root,u64 dir,const char * name,int name_len,u64 * found_inode)199431db9f7cSAlexander Block static int lookup_dir_item_inode(struct btrfs_root *root,
199531db9f7cSAlexander Block 				 u64 dir, const char *name, int name_len,
1996eab67c06SOmar Sandoval 				 u64 *found_inode)
199731db9f7cSAlexander Block {
199831db9f7cSAlexander Block 	int ret = 0;
199931db9f7cSAlexander Block 	struct btrfs_dir_item *di;
200031db9f7cSAlexander Block 	struct btrfs_key key;
200131db9f7cSAlexander Block 	struct btrfs_path *path;
20026db75318SSweet Tea Dorminy 	struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
200331db9f7cSAlexander Block 
200431db9f7cSAlexander Block 	path = alloc_path_for_send();
200531db9f7cSAlexander Block 	if (!path)
200631db9f7cSAlexander Block 		return -ENOMEM;
200731db9f7cSAlexander Block 
2008e43eec81SSweet Tea Dorminy 	di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
20093cf5068fSLiu Bo 	if (IS_ERR_OR_NULL(di)) {
20103cf5068fSLiu Bo 		ret = di ? PTR_ERR(di) : -ENOENT;
201131db9f7cSAlexander Block 		goto out;
201231db9f7cSAlexander Block 	}
201331db9f7cSAlexander Block 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
20141af56070SFilipe Manana 	if (key.type == BTRFS_ROOT_ITEM_KEY) {
20151af56070SFilipe Manana 		ret = -ENOENT;
20161af56070SFilipe Manana 		goto out;
20171af56070SFilipe Manana 	}
201831db9f7cSAlexander Block 	*found_inode = key.objectid;
201931db9f7cSAlexander Block 
202031db9f7cSAlexander Block out:
202131db9f7cSAlexander Block 	btrfs_free_path(path);
202231db9f7cSAlexander Block 	return ret;
202331db9f7cSAlexander Block }
202431db9f7cSAlexander Block 
2025766702efSAlexander Block /*
2026766702efSAlexander Block  * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
2027766702efSAlexander Block  * generation of the parent dir and the name of the dir entry.
2028766702efSAlexander Block  */
get_first_ref(struct btrfs_root * root,u64 ino,u64 * dir,u64 * dir_gen,struct fs_path * name)2029924794c9STsutomu Itoh static int get_first_ref(struct btrfs_root *root, u64 ino,
203031db9f7cSAlexander Block 			 u64 *dir, u64 *dir_gen, struct fs_path *name)
203131db9f7cSAlexander Block {
203231db9f7cSAlexander Block 	int ret;
203331db9f7cSAlexander Block 	struct btrfs_key key;
203431db9f7cSAlexander Block 	struct btrfs_key found_key;
203531db9f7cSAlexander Block 	struct btrfs_path *path;
203631db9f7cSAlexander Block 	int len;
203796b5bd77SJan Schmidt 	u64 parent_dir;
203831db9f7cSAlexander Block 
203931db9f7cSAlexander Block 	path = alloc_path_for_send();
204031db9f7cSAlexander Block 	if (!path)
204131db9f7cSAlexander Block 		return -ENOMEM;
204231db9f7cSAlexander Block 
204331db9f7cSAlexander Block 	key.objectid = ino;
204431db9f7cSAlexander Block 	key.type = BTRFS_INODE_REF_KEY;
204531db9f7cSAlexander Block 	key.offset = 0;
204631db9f7cSAlexander Block 
204731db9f7cSAlexander Block 	ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
204831db9f7cSAlexander Block 	if (ret < 0)
204931db9f7cSAlexander Block 		goto out;
205031db9f7cSAlexander Block 	if (!ret)
205131db9f7cSAlexander Block 		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
205231db9f7cSAlexander Block 				path->slots[0]);
205396b5bd77SJan Schmidt 	if (ret || found_key.objectid != ino ||
205496b5bd77SJan Schmidt 	    (found_key.type != BTRFS_INODE_REF_KEY &&
205596b5bd77SJan Schmidt 	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
205631db9f7cSAlexander Block 		ret = -ENOENT;
205731db9f7cSAlexander Block 		goto out;
205831db9f7cSAlexander Block 	}
205931db9f7cSAlexander Block 
206051a60253SFilipe Manana 	if (found_key.type == BTRFS_INODE_REF_KEY) {
206196b5bd77SJan Schmidt 		struct btrfs_inode_ref *iref;
206231db9f7cSAlexander Block 		iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
206331db9f7cSAlexander Block 				      struct btrfs_inode_ref);
206431db9f7cSAlexander Block 		len = btrfs_inode_ref_name_len(path->nodes[0], iref);
206531db9f7cSAlexander Block 		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
206696b5bd77SJan Schmidt 						     (unsigned long)(iref + 1),
206796b5bd77SJan Schmidt 						     len);
206896b5bd77SJan Schmidt 		parent_dir = found_key.offset;
206996b5bd77SJan Schmidt 	} else {
207096b5bd77SJan Schmidt 		struct btrfs_inode_extref *extref;
207196b5bd77SJan Schmidt 		extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
207296b5bd77SJan Schmidt 					struct btrfs_inode_extref);
207396b5bd77SJan Schmidt 		len = btrfs_inode_extref_name_len(path->nodes[0], extref);
207496b5bd77SJan Schmidt 		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
207596b5bd77SJan Schmidt 					(unsigned long)&extref->name, len);
207696b5bd77SJan Schmidt 		parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
207796b5bd77SJan Schmidt 	}
207831db9f7cSAlexander Block 	if (ret < 0)
207931db9f7cSAlexander Block 		goto out;
208031db9f7cSAlexander Block 	btrfs_release_path(path);
208131db9f7cSAlexander Block 
2082b46ab97bSFilipe Manana 	if (dir_gen) {
20837e93f6dcSBingJing Chang 		ret = get_inode_gen(root, parent_dir, dir_gen);
208431db9f7cSAlexander Block 		if (ret < 0)
208531db9f7cSAlexander Block 			goto out;
2086b46ab97bSFilipe Manana 	}
208731db9f7cSAlexander Block 
208896b5bd77SJan Schmidt 	*dir = parent_dir;
208931db9f7cSAlexander Block 
209031db9f7cSAlexander Block out:
209131db9f7cSAlexander Block 	btrfs_free_path(path);
209231db9f7cSAlexander Block 	return ret;
209331db9f7cSAlexander Block }
209431db9f7cSAlexander Block 
is_first_ref(struct btrfs_root * root,u64 ino,u64 dir,const char * name,int name_len)2095924794c9STsutomu Itoh static int is_first_ref(struct btrfs_root *root,
209631db9f7cSAlexander Block 			u64 ino, u64 dir,
209731db9f7cSAlexander Block 			const char *name, int name_len)
209831db9f7cSAlexander Block {
209931db9f7cSAlexander Block 	int ret;
210031db9f7cSAlexander Block 	struct fs_path *tmp_name;
210131db9f7cSAlexander Block 	u64 tmp_dir;
210231db9f7cSAlexander Block 
2103924794c9STsutomu Itoh 	tmp_name = fs_path_alloc();
210431db9f7cSAlexander Block 	if (!tmp_name)
210531db9f7cSAlexander Block 		return -ENOMEM;
210631db9f7cSAlexander Block 
2107b46ab97bSFilipe Manana 	ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name);
210831db9f7cSAlexander Block 	if (ret < 0)
210931db9f7cSAlexander Block 		goto out;
211031db9f7cSAlexander Block 
2111b9291affSAlexander Block 	if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
211231db9f7cSAlexander Block 		ret = 0;
211331db9f7cSAlexander Block 		goto out;
211431db9f7cSAlexander Block 	}
211531db9f7cSAlexander Block 
2116e938c8adSAlexander Block 	ret = !memcmp(tmp_name->start, name, name_len);
211731db9f7cSAlexander Block 
211831db9f7cSAlexander Block out:
2119924794c9STsutomu Itoh 	fs_path_free(tmp_name);
212031db9f7cSAlexander Block 	return ret;
212131db9f7cSAlexander Block }
212231db9f7cSAlexander Block 
2123766702efSAlexander Block /*
2124766702efSAlexander Block  * Used by process_recorded_refs to determine if a new ref would overwrite an
2125766702efSAlexander Block  * already existing ref. In case it detects an overwrite, it returns the
2126766702efSAlexander Block  * inode/gen in who_ino/who_gen.
2127766702efSAlexander Block  * When an overwrite is detected, process_recorded_refs does proper orphanizing
2128766702efSAlexander Block  * to make sure later references to the overwritten inode are possible.
2129766702efSAlexander Block  * Orphanizing is however only required for the first ref of an inode.
2130766702efSAlexander Block  * process_recorded_refs does an additional is_first_ref check to see if
2131766702efSAlexander Block  * orphanizing is really required.
2132766702efSAlexander Block  */
will_overwrite_ref(struct send_ctx * sctx,u64 dir,u64 dir_gen,const char * name,int name_len,u64 * who_ino,u64 * who_gen,u64 * who_mode)213331db9f7cSAlexander Block static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
213431db9f7cSAlexander Block 			      const char *name, int name_len,
2135f5962781SFilipe Manana 			      u64 *who_ino, u64 *who_gen, u64 *who_mode)
213631db9f7cSAlexander Block {
2137b3047a42SFilipe Manana 	int ret;
2138498581f3SFilipe Manana 	u64 parent_root_dir_gen;
213931db9f7cSAlexander Block 	u64 other_inode = 0;
21407e93f6dcSBingJing Chang 	struct btrfs_inode_info info;
214131db9f7cSAlexander Block 
214231db9f7cSAlexander Block 	if (!sctx->parent_root)
2143b3047a42SFilipe Manana 		return 0;
214431db9f7cSAlexander Block 
2145498581f3SFilipe Manana 	ret = is_inode_existent(sctx, dir, dir_gen, NULL, &parent_root_dir_gen);
214631db9f7cSAlexander Block 	if (ret <= 0)
2147b3047a42SFilipe Manana 		return 0;
214831db9f7cSAlexander Block 
2149ebdad913SJosef Bacik 	/*
2150ebdad913SJosef Bacik 	 * If we have a parent root we need to verify that the parent dir was
215101327610SNicholas D Steeves 	 * not deleted and then re-created, if it was then we have no overwrite
2152ebdad913SJosef Bacik 	 * and we can just unlink this entry.
2153498581f3SFilipe Manana 	 *
2154498581f3SFilipe Manana 	 * @parent_root_dir_gen was set to 0 if the inode does not exist in the
2155498581f3SFilipe Manana 	 * parent root.
2156ebdad913SJosef Bacik 	 */
2157498581f3SFilipe Manana 	if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID &&
2158498581f3SFilipe Manana 	    parent_root_dir_gen != dir_gen)
2159b3047a42SFilipe Manana 		return 0;
2160ebdad913SJosef Bacik 
216131db9f7cSAlexander Block 	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
2162eab67c06SOmar Sandoval 				    &other_inode);
2163b3047a42SFilipe Manana 	if (ret == -ENOENT)
2164b3047a42SFilipe Manana 		return 0;
2165b3047a42SFilipe Manana 	else if (ret < 0)
2166b3047a42SFilipe Manana 		return ret;
216731db9f7cSAlexander Block 
2168766702efSAlexander Block 	/*
2169766702efSAlexander Block 	 * Check if the overwritten ref was already processed. If yes, the ref
2170766702efSAlexander Block 	 * was already unlinked/moved, so we can safely assume that we will not
2171766702efSAlexander Block 	 * overwrite anything at this point in time.
2172766702efSAlexander Block 	 */
2173801bec36SRobbie Ko 	if (other_inode > sctx->send_progress ||
2174801bec36SRobbie Ko 	    is_waiting_for_move(sctx, other_inode)) {
21757e93f6dcSBingJing Chang 		ret = get_inode_info(sctx->parent_root, other_inode, &info);
217631db9f7cSAlexander Block 		if (ret < 0)
2177b3047a42SFilipe Manana 			return ret;
217831db9f7cSAlexander Block 
217931db9f7cSAlexander Block 		*who_ino = other_inode;
21807e93f6dcSBingJing Chang 		*who_gen = info.gen;
21817e93f6dcSBingJing Chang 		*who_mode = info.mode;
2182b3047a42SFilipe Manana 		return 1;
218331db9f7cSAlexander Block 	}
218431db9f7cSAlexander Block 
2185b3047a42SFilipe Manana 	return 0;
218631db9f7cSAlexander Block }
218731db9f7cSAlexander Block 
2188766702efSAlexander Block /*
2189766702efSAlexander Block  * Checks if the ref was overwritten by an already processed inode. This is
2190766702efSAlexander Block  * used by __get_cur_name_and_parent to find out if the ref was orphanized and
2191766702efSAlexander Block  * thus the orphan name needs be used.
2192766702efSAlexander Block  * process_recorded_refs also uses it to avoid unlinking of refs that were
2193766702efSAlexander Block  * overwritten.
2194766702efSAlexander Block  */
did_overwrite_ref(struct send_ctx * sctx,u64 dir,u64 dir_gen,u64 ino,u64 ino_gen,const char * name,int name_len)219531db9f7cSAlexander Block static int did_overwrite_ref(struct send_ctx *sctx,
219631db9f7cSAlexander Block 			    u64 dir, u64 dir_gen,
219731db9f7cSAlexander Block 			    u64 ino, u64 ino_gen,
219831db9f7cSAlexander Block 			    const char *name, int name_len)
219931db9f7cSAlexander Block {
2200e739ba30SFilipe Manana 	int ret;
220131db9f7cSAlexander Block 	u64 ow_inode;
2202cb689481SFilipe Manana 	u64 ow_gen = 0;
2203498581f3SFilipe Manana 	u64 send_root_dir_gen;
220431db9f7cSAlexander Block 
220531db9f7cSAlexander Block 	if (!sctx->parent_root)
2206e739ba30SFilipe Manana 		return 0;
220731db9f7cSAlexander Block 
2208498581f3SFilipe Manana 	ret = is_inode_existent(sctx, dir, dir_gen, &send_root_dir_gen, NULL);
220931db9f7cSAlexander Block 	if (ret <= 0)
2210e739ba30SFilipe Manana 		return ret;
221131db9f7cSAlexander Block 
2212498581f3SFilipe Manana 	/*
2213498581f3SFilipe Manana 	 * @send_root_dir_gen was set to 0 if the inode does not exist in the
2214498581f3SFilipe Manana 	 * send root.
2215498581f3SFilipe Manana 	 */
2216498581f3SFilipe Manana 	if (dir != BTRFS_FIRST_FREE_OBJECTID && send_root_dir_gen != dir_gen)
2217e739ba30SFilipe Manana 		return 0;
221801914101SRobbie Ko 
221931db9f7cSAlexander Block 	/* check if the ref was overwritten by another ref */
222031db9f7cSAlexander Block 	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
2221eab67c06SOmar Sandoval 				    &ow_inode);
2222e739ba30SFilipe Manana 	if (ret == -ENOENT) {
222331db9f7cSAlexander Block 		/* was never and will never be overwritten */
2224e739ba30SFilipe Manana 		return 0;
2225e739ba30SFilipe Manana 	} else if (ret < 0) {
2226e739ba30SFilipe Manana 		return ret;
222731db9f7cSAlexander Block 	}
222831db9f7cSAlexander Block 
2229cb689481SFilipe Manana 	if (ow_inode == ino) {
2230cb689481SFilipe Manana 		ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
223131db9f7cSAlexander Block 		if (ret < 0)
2232e739ba30SFilipe Manana 			return ret;
223331db9f7cSAlexander Block 
2234cb689481SFilipe Manana 		/* It's the same inode, so no overwrite happened. */
2235cb689481SFilipe Manana 		if (ow_gen == ino_gen)
2236e739ba30SFilipe Manana 			return 0;
2237cb689481SFilipe Manana 	}
223831db9f7cSAlexander Block 
22398b191a68SFilipe Manana 	/*
22408b191a68SFilipe Manana 	 * We know that it is or will be overwritten. Check this now.
22418b191a68SFilipe Manana 	 * The current inode being processed might have been the one that caused
2242b786f16aSFilipe Manana 	 * inode 'ino' to be orphanized, therefore check if ow_inode matches
2243b786f16aSFilipe Manana 	 * the current inode being processed.
22448b191a68SFilipe Manana 	 */
2245cb689481SFilipe Manana 	if (ow_inode < sctx->send_progress)
2246e739ba30SFilipe Manana 		return 1;
224731db9f7cSAlexander Block 
2248cb689481SFilipe Manana 	if (ino != sctx->cur_ino && ow_inode == sctx->cur_ino) {
2249cb689481SFilipe Manana 		if (ow_gen == 0) {
2250cb689481SFilipe Manana 			ret = get_inode_gen(sctx->send_root, ow_inode, &ow_gen);
2251cb689481SFilipe Manana 			if (ret < 0)
2252cb689481SFilipe Manana 				return ret;
2253cb689481SFilipe Manana 		}
2254cb689481SFilipe Manana 		if (ow_gen == sctx->cur_inode_gen)
2255cb689481SFilipe Manana 			return 1;
2256cb689481SFilipe Manana 	}
2257cb689481SFilipe Manana 
2258e739ba30SFilipe Manana 	return 0;
225931db9f7cSAlexander Block }
226031db9f7cSAlexander Block 
2261766702efSAlexander Block /*
2262766702efSAlexander Block  * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
2263766702efSAlexander Block  * that got overwritten. This is used by process_recorded_refs to determine
2264766702efSAlexander Block  * if it has to use the path as returned by get_cur_path or the orphan name.
2265766702efSAlexander Block  */
did_overwrite_first_ref(struct send_ctx * sctx,u64 ino,u64 gen)226631db9f7cSAlexander Block static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
226731db9f7cSAlexander Block {
226831db9f7cSAlexander Block 	int ret = 0;
226931db9f7cSAlexander Block 	struct fs_path *name = NULL;
227031db9f7cSAlexander Block 	u64 dir;
227131db9f7cSAlexander Block 	u64 dir_gen;
227231db9f7cSAlexander Block 
227331db9f7cSAlexander Block 	if (!sctx->parent_root)
227431db9f7cSAlexander Block 		goto out;
227531db9f7cSAlexander Block 
2276924794c9STsutomu Itoh 	name = fs_path_alloc();
227731db9f7cSAlexander Block 	if (!name)
227831db9f7cSAlexander Block 		return -ENOMEM;
227931db9f7cSAlexander Block 
2280924794c9STsutomu Itoh 	ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
228131db9f7cSAlexander Block 	if (ret < 0)
228231db9f7cSAlexander Block 		goto out;
228331db9f7cSAlexander Block 
228431db9f7cSAlexander Block 	ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
228531db9f7cSAlexander Block 			name->start, fs_path_len(name));
228631db9f7cSAlexander Block 
228731db9f7cSAlexander Block out:
2288924794c9STsutomu Itoh 	fs_path_free(name);
228931db9f7cSAlexander Block 	return ret;
229031db9f7cSAlexander Block }
229131db9f7cSAlexander Block 
name_cache_search(struct send_ctx * sctx,u64 ino,u64 gen)2292c48545deSFilipe Manana static inline struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
229331db9f7cSAlexander Block 							 u64 ino, u64 gen)
229431db9f7cSAlexander Block {
2295c48545deSFilipe Manana 	struct btrfs_lru_cache_entry *entry;
229631db9f7cSAlexander Block 
2297c48545deSFilipe Manana 	entry = btrfs_lru_cache_lookup(&sctx->name_cache, ino, gen);
2298c48545deSFilipe Manana 	if (!entry)
229931db9f7cSAlexander Block 		return NULL;
230031db9f7cSAlexander Block 
2301c48545deSFilipe Manana 	return container_of(entry, struct name_cache_entry, entry);
230231db9f7cSAlexander Block }
230331db9f7cSAlexander Block 
2304766702efSAlexander Block /*
2305766702efSAlexander Block  * Used by get_cur_path for each ref up to the root.
2306766702efSAlexander Block  * Returns 0 if it succeeded.
2307766702efSAlexander Block  * Returns 1 if the inode is not existent or got overwritten. In that case, the
2308766702efSAlexander Block  * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
2309766702efSAlexander Block  * is returned, parent_ino/parent_gen are not guaranteed to be valid.
2310766702efSAlexander Block  * Returns <0 in case of error.
2311766702efSAlexander Block  */
__get_cur_name_and_parent(struct send_ctx * sctx,u64 ino,u64 gen,u64 * parent_ino,u64 * parent_gen,struct fs_path * dest)231231db9f7cSAlexander Block static int __get_cur_name_and_parent(struct send_ctx *sctx,
231331db9f7cSAlexander Block 				     u64 ino, u64 gen,
231431db9f7cSAlexander Block 				     u64 *parent_ino,
231531db9f7cSAlexander Block 				     u64 *parent_gen,
231631db9f7cSAlexander Block 				     struct fs_path *dest)
231731db9f7cSAlexander Block {
231831db9f7cSAlexander Block 	int ret;
231931db9f7cSAlexander Block 	int nce_ret;
2320c48545deSFilipe Manana 	struct name_cache_entry *nce;
232131db9f7cSAlexander Block 
2322766702efSAlexander Block 	/*
2323766702efSAlexander Block 	 * First check if we already did a call to this function with the same
2324766702efSAlexander Block 	 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
2325766702efSAlexander Block 	 * return the cached result.
2326766702efSAlexander Block 	 */
232731db9f7cSAlexander Block 	nce = name_cache_search(sctx, ino, gen);
232831db9f7cSAlexander Block 	if (nce) {
232931db9f7cSAlexander Block 		if (ino < sctx->send_progress && nce->need_later_update) {
2330c48545deSFilipe Manana 			btrfs_lru_cache_remove(&sctx->name_cache, &nce->entry);
233131db9f7cSAlexander Block 			nce = NULL;
233231db9f7cSAlexander Block 		} else {
233331db9f7cSAlexander Block 			*parent_ino = nce->parent_ino;
233431db9f7cSAlexander Block 			*parent_gen = nce->parent_gen;
233531db9f7cSAlexander Block 			ret = fs_path_add(dest, nce->name, nce->name_len);
233631db9f7cSAlexander Block 			if (ret < 0)
233731db9f7cSAlexander Block 				goto out;
233831db9f7cSAlexander Block 			ret = nce->ret;
233931db9f7cSAlexander Block 			goto out;
234031db9f7cSAlexander Block 		}
234131db9f7cSAlexander Block 	}
234231db9f7cSAlexander Block 
2343766702efSAlexander Block 	/*
2344766702efSAlexander Block 	 * If the inode is not existent yet, add the orphan name and return 1.
2345766702efSAlexander Block 	 * This should only happen for the parent dir that we determine in
23460d8869fbSFilipe Manana 	 * record_new_ref_if_needed().
2347766702efSAlexander Block 	 */
2348498581f3SFilipe Manana 	ret = is_inode_existent(sctx, ino, gen, NULL, NULL);
234931db9f7cSAlexander Block 	if (ret < 0)
235031db9f7cSAlexander Block 		goto out;
235131db9f7cSAlexander Block 
235231db9f7cSAlexander Block 	if (!ret) {
235331db9f7cSAlexander Block 		ret = gen_unique_name(sctx, ino, gen, dest);
235431db9f7cSAlexander Block 		if (ret < 0)
235531db9f7cSAlexander Block 			goto out;
235631db9f7cSAlexander Block 		ret = 1;
235731db9f7cSAlexander Block 		goto out_cache;
235831db9f7cSAlexander Block 	}
235931db9f7cSAlexander Block 
2360766702efSAlexander Block 	/*
2361766702efSAlexander Block 	 * Depending on whether the inode was already processed or not, use
2362766702efSAlexander Block 	 * send_root or parent_root for ref lookup.
2363766702efSAlexander Block 	 */
2364bf0d1f44SFilipe Manana 	if (ino < sctx->send_progress)
2365924794c9STsutomu Itoh 		ret = get_first_ref(sctx->send_root, ino,
236631db9f7cSAlexander Block 				    parent_ino, parent_gen, dest);
236731db9f7cSAlexander Block 	else
2368924794c9STsutomu Itoh 		ret = get_first_ref(sctx->parent_root, ino,
236931db9f7cSAlexander Block 				    parent_ino, parent_gen, dest);
237031db9f7cSAlexander Block 	if (ret < 0)
237131db9f7cSAlexander Block 		goto out;
237231db9f7cSAlexander Block 
2373766702efSAlexander Block 	/*
2374766702efSAlexander Block 	 * Check if the ref was overwritten by an inode's ref that was processed
2375766702efSAlexander Block 	 * earlier. If yes, treat as orphan and return 1.
2376766702efSAlexander Block 	 */
237731db9f7cSAlexander Block 	ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
237831db9f7cSAlexander Block 			dest->start, dest->end - dest->start);
237931db9f7cSAlexander Block 	if (ret < 0)
238031db9f7cSAlexander Block 		goto out;
238131db9f7cSAlexander Block 	if (ret) {
238231db9f7cSAlexander Block 		fs_path_reset(dest);
238331db9f7cSAlexander Block 		ret = gen_unique_name(sctx, ino, gen, dest);
238431db9f7cSAlexander Block 		if (ret < 0)
238531db9f7cSAlexander Block 			goto out;
238631db9f7cSAlexander Block 		ret = 1;
238731db9f7cSAlexander Block 	}
238831db9f7cSAlexander Block 
238931db9f7cSAlexander Block out_cache:
2390766702efSAlexander Block 	/*
2391766702efSAlexander Block 	 * Store the result of the lookup in the name cache.
2392766702efSAlexander Block 	 */
2393e780b0d1SDavid Sterba 	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
239431db9f7cSAlexander Block 	if (!nce) {
239531db9f7cSAlexander Block 		ret = -ENOMEM;
239631db9f7cSAlexander Block 		goto out;
239731db9f7cSAlexander Block 	}
239831db9f7cSAlexander Block 
2399c48545deSFilipe Manana 	nce->entry.key = ino;
2400c48545deSFilipe Manana 	nce->entry.gen = gen;
240131db9f7cSAlexander Block 	nce->parent_ino = *parent_ino;
240231db9f7cSAlexander Block 	nce->parent_gen = *parent_gen;
240331db9f7cSAlexander Block 	nce->name_len = fs_path_len(dest);
240431db9f7cSAlexander Block 	nce->ret = ret;
240531db9f7cSAlexander Block 	strcpy(nce->name, dest->start);
240631db9f7cSAlexander Block 
240731db9f7cSAlexander Block 	if (ino < sctx->send_progress)
240831db9f7cSAlexander Block 		nce->need_later_update = 0;
240931db9f7cSAlexander Block 	else
241031db9f7cSAlexander Block 		nce->need_later_update = 1;
241131db9f7cSAlexander Block 
2412c48545deSFilipe Manana 	nce_ret = btrfs_lru_cache_store(&sctx->name_cache, &nce->entry, GFP_KERNEL);
2413c48545deSFilipe Manana 	if (nce_ret < 0) {
2414c48545deSFilipe Manana 		kfree(nce);
241531db9f7cSAlexander Block 		ret = nce_ret;
2416c48545deSFilipe Manana 	}
241731db9f7cSAlexander Block 
241831db9f7cSAlexander Block out:
241931db9f7cSAlexander Block 	return ret;
242031db9f7cSAlexander Block }
242131db9f7cSAlexander Block 
242231db9f7cSAlexander Block /*
242331db9f7cSAlexander Block  * Magic happens here. This function returns the first ref to an inode as it
242431db9f7cSAlexander Block  * would look like while receiving the stream at this point in time.
242531db9f7cSAlexander Block  * We walk the path up to the root. For every inode in between, we check if it
242631db9f7cSAlexander Block  * was already processed/sent. If yes, we continue with the parent as found
242731db9f7cSAlexander Block  * in send_root. If not, we continue with the parent as found in parent_root.
242831db9f7cSAlexander Block  * If we encounter an inode that was deleted at this point in time, we use the
242931db9f7cSAlexander Block  * inodes "orphan" name instead of the real name and stop. Same with new inodes
243031db9f7cSAlexander Block  * that were not created yet and overwritten inodes/refs.
243131db9f7cSAlexander Block  *
243252042d8eSAndrea Gelmini  * When do we have orphan inodes:
243331db9f7cSAlexander Block  * 1. When an inode is freshly created and thus no valid refs are available yet
243431db9f7cSAlexander Block  * 2. When a directory lost all it's refs (deleted) but still has dir items
243531db9f7cSAlexander Block  *    inside which were not processed yet (pending for move/delete). If anyone
243631db9f7cSAlexander Block  *    tried to get the path to the dir items, it would get a path inside that
243731db9f7cSAlexander Block  *    orphan directory.
243831db9f7cSAlexander Block  * 3. When an inode is moved around or gets new links, it may overwrite the ref
243931db9f7cSAlexander Block  *    of an unprocessed inode. If in that case the first ref would be
244031db9f7cSAlexander Block  *    overwritten, the overwritten inode gets "orphanized". Later when we
244131db9f7cSAlexander Block  *    process this overwritten inode, it is restored at a new place by moving
244231db9f7cSAlexander Block  *    the orphan inode.
244331db9f7cSAlexander Block  *
244431db9f7cSAlexander Block  * sctx->send_progress tells this function at which point in time receiving
244531db9f7cSAlexander Block  * would be.
244631db9f7cSAlexander Block  */
get_cur_path(struct send_ctx * sctx,u64 ino,u64 gen,struct fs_path * dest)244731db9f7cSAlexander Block static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
244831db9f7cSAlexander Block 			struct fs_path *dest)
244931db9f7cSAlexander Block {
245031db9f7cSAlexander Block 	int ret = 0;
245131db9f7cSAlexander Block 	struct fs_path *name = NULL;
245231db9f7cSAlexander Block 	u64 parent_inode = 0;
245331db9f7cSAlexander Block 	u64 parent_gen = 0;
245431db9f7cSAlexander Block 	int stop = 0;
245531db9f7cSAlexander Block 
2456924794c9STsutomu Itoh 	name = fs_path_alloc();
245731db9f7cSAlexander Block 	if (!name) {
245831db9f7cSAlexander Block 		ret = -ENOMEM;
245931db9f7cSAlexander Block 		goto out;
246031db9f7cSAlexander Block 	}
246131db9f7cSAlexander Block 
246231db9f7cSAlexander Block 	dest->reversed = 1;
246331db9f7cSAlexander Block 	fs_path_reset(dest);
246431db9f7cSAlexander Block 
246531db9f7cSAlexander Block 	while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
24668b191a68SFilipe Manana 		struct waiting_dir_move *wdm;
24678b191a68SFilipe Manana 
246831db9f7cSAlexander Block 		fs_path_reset(name);
246931db9f7cSAlexander Block 
24700b3f407eSFilipe Manana 		if (is_waiting_for_rm(sctx, ino, gen)) {
24719dc44214SFilipe Manana 			ret = gen_unique_name(sctx, ino, gen, name);
24729dc44214SFilipe Manana 			if (ret < 0)
24739dc44214SFilipe Manana 				goto out;
24749dc44214SFilipe Manana 			ret = fs_path_add_path(dest, name);
24759dc44214SFilipe Manana 			break;
24769dc44214SFilipe Manana 		}
24779dc44214SFilipe Manana 
24788b191a68SFilipe Manana 		wdm = get_waiting_dir_move(sctx, ino);
24798b191a68SFilipe Manana 		if (wdm && wdm->orphanized) {
24808b191a68SFilipe Manana 			ret = gen_unique_name(sctx, ino, gen, name);
24818b191a68SFilipe Manana 			stop = 1;
24828b191a68SFilipe Manana 		} else if (wdm) {
2483bf0d1f44SFilipe Manana 			ret = get_first_ref(sctx->parent_root, ino,
248431db9f7cSAlexander Block 					    &parent_inode, &parent_gen, name);
2485bf0d1f44SFilipe Manana 		} else {
2486bf0d1f44SFilipe Manana 			ret = __get_cur_name_and_parent(sctx, ino, gen,
2487bf0d1f44SFilipe Manana 							&parent_inode,
2488bf0d1f44SFilipe Manana 							&parent_gen, name);
248931db9f7cSAlexander Block 			if (ret)
249031db9f7cSAlexander Block 				stop = 1;
2491bf0d1f44SFilipe Manana 		}
249231db9f7cSAlexander Block 
2493bf0d1f44SFilipe Manana 		if (ret < 0)
2494bf0d1f44SFilipe Manana 			goto out;
24959f03740aSFilipe David Borba Manana 
249631db9f7cSAlexander Block 		ret = fs_path_add_path(dest, name);
249731db9f7cSAlexander Block 		if (ret < 0)
249831db9f7cSAlexander Block 			goto out;
249931db9f7cSAlexander Block 
250031db9f7cSAlexander Block 		ino = parent_inode;
250131db9f7cSAlexander Block 		gen = parent_gen;
250231db9f7cSAlexander Block 	}
250331db9f7cSAlexander Block 
250431db9f7cSAlexander Block out:
2505924794c9STsutomu Itoh 	fs_path_free(name);
250631db9f7cSAlexander Block 	if (!ret)
250731db9f7cSAlexander Block 		fs_path_unreverse(dest);
250831db9f7cSAlexander Block 	return ret;
250931db9f7cSAlexander Block }
251031db9f7cSAlexander Block 
251131db9f7cSAlexander Block /*
251231db9f7cSAlexander Block  * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
251331db9f7cSAlexander Block  */
send_subvol_begin(struct send_ctx * sctx)251431db9f7cSAlexander Block static int send_subvol_begin(struct send_ctx *sctx)
251531db9f7cSAlexander Block {
251631db9f7cSAlexander Block 	int ret;
251731db9f7cSAlexander Block 	struct btrfs_root *send_root = sctx->send_root;
251831db9f7cSAlexander Block 	struct btrfs_root *parent_root = sctx->parent_root;
251931db9f7cSAlexander Block 	struct btrfs_path *path;
252031db9f7cSAlexander Block 	struct btrfs_key key;
252131db9f7cSAlexander Block 	struct btrfs_root_ref *ref;
252231db9f7cSAlexander Block 	struct extent_buffer *leaf;
252331db9f7cSAlexander Block 	char *name = NULL;
252431db9f7cSAlexander Block 	int namelen;
252531db9f7cSAlexander Block 
2526ffcfaf81SWang Shilong 	path = btrfs_alloc_path();
252731db9f7cSAlexander Block 	if (!path)
252831db9f7cSAlexander Block 		return -ENOMEM;
252931db9f7cSAlexander Block 
2530e780b0d1SDavid Sterba 	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
253131db9f7cSAlexander Block 	if (!name) {
253231db9f7cSAlexander Block 		btrfs_free_path(path);
253331db9f7cSAlexander Block 		return -ENOMEM;
253431db9f7cSAlexander Block 	}
253531db9f7cSAlexander Block 
25364fd786e6SMisono Tomohiro 	key.objectid = send_root->root_key.objectid;
253731db9f7cSAlexander Block 	key.type = BTRFS_ROOT_BACKREF_KEY;
253831db9f7cSAlexander Block 	key.offset = 0;
253931db9f7cSAlexander Block 
254031db9f7cSAlexander Block 	ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
254131db9f7cSAlexander Block 				&key, path, 1, 0);
254231db9f7cSAlexander Block 	if (ret < 0)
254331db9f7cSAlexander Block 		goto out;
254431db9f7cSAlexander Block 	if (ret) {
254531db9f7cSAlexander Block 		ret = -ENOENT;
254631db9f7cSAlexander Block 		goto out;
254731db9f7cSAlexander Block 	}
254831db9f7cSAlexander Block 
254931db9f7cSAlexander Block 	leaf = path->nodes[0];
255031db9f7cSAlexander Block 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
255131db9f7cSAlexander Block 	if (key.type != BTRFS_ROOT_BACKREF_KEY ||
25524fd786e6SMisono Tomohiro 	    key.objectid != send_root->root_key.objectid) {
255331db9f7cSAlexander Block 		ret = -ENOENT;
255431db9f7cSAlexander Block 		goto out;
255531db9f7cSAlexander Block 	}
255631db9f7cSAlexander Block 	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
255731db9f7cSAlexander Block 	namelen = btrfs_root_ref_name_len(leaf, ref);
255831db9f7cSAlexander Block 	read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
255931db9f7cSAlexander Block 	btrfs_release_path(path);
256031db9f7cSAlexander Block 
256131db9f7cSAlexander Block 	if (parent_root) {
256231db9f7cSAlexander Block 		ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
256331db9f7cSAlexander Block 		if (ret < 0)
256431db9f7cSAlexander Block 			goto out;
256531db9f7cSAlexander Block 	} else {
256631db9f7cSAlexander Block 		ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
256731db9f7cSAlexander Block 		if (ret < 0)
256831db9f7cSAlexander Block 			goto out;
256931db9f7cSAlexander Block 	}
257031db9f7cSAlexander Block 
257131db9f7cSAlexander Block 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2572b96b1db0SRobin Ruede 
2573b96b1db0SRobin Ruede 	if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
2574b96b1db0SRobin Ruede 		TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2575b96b1db0SRobin Ruede 			    sctx->send_root->root_item.received_uuid);
2576b96b1db0SRobin Ruede 	else
257731db9f7cSAlexander Block 		TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
257831db9f7cSAlexander Block 			    sctx->send_root->root_item.uuid);
2579b96b1db0SRobin Ruede 
258031db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
258109e3a288SDavid Sterba 		    btrfs_root_ctransid(&sctx->send_root->root_item));
258231db9f7cSAlexander Block 	if (parent_root) {
258337b8d27dSJosef Bacik 		if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
258431db9f7cSAlexander Block 			TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
258537b8d27dSJosef Bacik 				     parent_root->root_item.received_uuid);
258637b8d27dSJosef Bacik 		else
258737b8d27dSJosef Bacik 			TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
258837b8d27dSJosef Bacik 				     parent_root->root_item.uuid);
258931db9f7cSAlexander Block 		TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
259009e3a288SDavid Sterba 			    btrfs_root_ctransid(&sctx->parent_root->root_item));
259131db9f7cSAlexander Block 	}
259231db9f7cSAlexander Block 
259331db9f7cSAlexander Block 	ret = send_cmd(sctx);
259431db9f7cSAlexander Block 
259531db9f7cSAlexander Block tlv_put_failure:
259631db9f7cSAlexander Block out:
259731db9f7cSAlexander Block 	btrfs_free_path(path);
259831db9f7cSAlexander Block 	kfree(name);
259931db9f7cSAlexander Block 	return ret;
260031db9f7cSAlexander Block }
260131db9f7cSAlexander Block 
send_truncate(struct send_ctx * sctx,u64 ino,u64 gen,u64 size)260231db9f7cSAlexander Block static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
260331db9f7cSAlexander Block {
260404ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
260531db9f7cSAlexander Block 	int ret = 0;
260631db9f7cSAlexander Block 	struct fs_path *p;
260731db9f7cSAlexander Block 
260804ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size);
260931db9f7cSAlexander Block 
2610924794c9STsutomu Itoh 	p = fs_path_alloc();
261131db9f7cSAlexander Block 	if (!p)
261231db9f7cSAlexander Block 		return -ENOMEM;
261331db9f7cSAlexander Block 
261431db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
261531db9f7cSAlexander Block 	if (ret < 0)
261631db9f7cSAlexander Block 		goto out;
261731db9f7cSAlexander Block 
261831db9f7cSAlexander Block 	ret = get_cur_path(sctx, ino, gen, p);
261931db9f7cSAlexander Block 	if (ret < 0)
262031db9f7cSAlexander Block 		goto out;
262131db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
262231db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
262331db9f7cSAlexander Block 
262431db9f7cSAlexander Block 	ret = send_cmd(sctx);
262531db9f7cSAlexander Block 
262631db9f7cSAlexander Block tlv_put_failure:
262731db9f7cSAlexander Block out:
2628924794c9STsutomu Itoh 	fs_path_free(p);
262931db9f7cSAlexander Block 	return ret;
263031db9f7cSAlexander Block }
263131db9f7cSAlexander Block 
send_chmod(struct send_ctx * sctx,u64 ino,u64 gen,u64 mode)263231db9f7cSAlexander Block static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
263331db9f7cSAlexander Block {
263404ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
263531db9f7cSAlexander Block 	int ret = 0;
263631db9f7cSAlexander Block 	struct fs_path *p;
263731db9f7cSAlexander Block 
263804ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode);
263931db9f7cSAlexander Block 
2640924794c9STsutomu Itoh 	p = fs_path_alloc();
264131db9f7cSAlexander Block 	if (!p)
264231db9f7cSAlexander Block 		return -ENOMEM;
264331db9f7cSAlexander Block 
264431db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
264531db9f7cSAlexander Block 	if (ret < 0)
264631db9f7cSAlexander Block 		goto out;
264731db9f7cSAlexander Block 
264831db9f7cSAlexander Block 	ret = get_cur_path(sctx, ino, gen, p);
264931db9f7cSAlexander Block 	if (ret < 0)
265031db9f7cSAlexander Block 		goto out;
265131db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
265231db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
265331db9f7cSAlexander Block 
265431db9f7cSAlexander Block 	ret = send_cmd(sctx);
265531db9f7cSAlexander Block 
265631db9f7cSAlexander Block tlv_put_failure:
265731db9f7cSAlexander Block out:
2658924794c9STsutomu Itoh 	fs_path_free(p);
265931db9f7cSAlexander Block 	return ret;
266031db9f7cSAlexander Block }
266131db9f7cSAlexander Block 
send_fileattr(struct send_ctx * sctx,u64 ino,u64 gen,u64 fileattr)266248247359SDavid Sterba static int send_fileattr(struct send_ctx *sctx, u64 ino, u64 gen, u64 fileattr)
266348247359SDavid Sterba {
266448247359SDavid Sterba 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
266548247359SDavid Sterba 	int ret = 0;
266648247359SDavid Sterba 	struct fs_path *p;
266748247359SDavid Sterba 
266848247359SDavid Sterba 	if (sctx->proto < 2)
266948247359SDavid Sterba 		return 0;
267048247359SDavid Sterba 
267148247359SDavid Sterba 	btrfs_debug(fs_info, "send_fileattr %llu fileattr=%llu", ino, fileattr);
267248247359SDavid Sterba 
267348247359SDavid Sterba 	p = fs_path_alloc();
267448247359SDavid Sterba 	if (!p)
267548247359SDavid Sterba 		return -ENOMEM;
267648247359SDavid Sterba 
267748247359SDavid Sterba 	ret = begin_cmd(sctx, BTRFS_SEND_C_FILEATTR);
267848247359SDavid Sterba 	if (ret < 0)
267948247359SDavid Sterba 		goto out;
268048247359SDavid Sterba 
268148247359SDavid Sterba 	ret = get_cur_path(sctx, ino, gen, p);
268248247359SDavid Sterba 	if (ret < 0)
268348247359SDavid Sterba 		goto out;
268448247359SDavid Sterba 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
268548247359SDavid Sterba 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILEATTR, fileattr);
268648247359SDavid Sterba 
268748247359SDavid Sterba 	ret = send_cmd(sctx);
268848247359SDavid Sterba 
268948247359SDavid Sterba tlv_put_failure:
269048247359SDavid Sterba out:
269148247359SDavid Sterba 	fs_path_free(p);
269248247359SDavid Sterba 	return ret;
269348247359SDavid Sterba }
269448247359SDavid Sterba 
send_chown(struct send_ctx * sctx,u64 ino,u64 gen,u64 uid,u64 gid)269531db9f7cSAlexander Block static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
269631db9f7cSAlexander Block {
269704ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
269831db9f7cSAlexander Block 	int ret = 0;
269931db9f7cSAlexander Block 	struct fs_path *p;
270031db9f7cSAlexander Block 
270104ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu",
270204ab956eSJeff Mahoney 		    ino, uid, gid);
270331db9f7cSAlexander Block 
2704924794c9STsutomu Itoh 	p = fs_path_alloc();
270531db9f7cSAlexander Block 	if (!p)
270631db9f7cSAlexander Block 		return -ENOMEM;
270731db9f7cSAlexander Block 
270831db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
270931db9f7cSAlexander Block 	if (ret < 0)
271031db9f7cSAlexander Block 		goto out;
271131db9f7cSAlexander Block 
271231db9f7cSAlexander Block 	ret = get_cur_path(sctx, ino, gen, p);
271331db9f7cSAlexander Block 	if (ret < 0)
271431db9f7cSAlexander Block 		goto out;
271531db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
271631db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
271731db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
271831db9f7cSAlexander Block 
271931db9f7cSAlexander Block 	ret = send_cmd(sctx);
272031db9f7cSAlexander Block 
272131db9f7cSAlexander Block tlv_put_failure:
272231db9f7cSAlexander Block out:
2723924794c9STsutomu Itoh 	fs_path_free(p);
272431db9f7cSAlexander Block 	return ret;
272531db9f7cSAlexander Block }
272631db9f7cSAlexander Block 
send_utimes(struct send_ctx * sctx,u64 ino,u64 gen)272731db9f7cSAlexander Block static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
272831db9f7cSAlexander Block {
272904ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
273031db9f7cSAlexander Block 	int ret = 0;
273131db9f7cSAlexander Block 	struct fs_path *p = NULL;
273231db9f7cSAlexander Block 	struct btrfs_inode_item *ii;
273331db9f7cSAlexander Block 	struct btrfs_path *path = NULL;
273431db9f7cSAlexander Block 	struct extent_buffer *eb;
273531db9f7cSAlexander Block 	struct btrfs_key key;
273631db9f7cSAlexander Block 	int slot;
273731db9f7cSAlexander Block 
273804ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_utimes %llu", ino);
273931db9f7cSAlexander Block 
2740924794c9STsutomu Itoh 	p = fs_path_alloc();
274131db9f7cSAlexander Block 	if (!p)
274231db9f7cSAlexander Block 		return -ENOMEM;
274331db9f7cSAlexander Block 
274431db9f7cSAlexander Block 	path = alloc_path_for_send();
274531db9f7cSAlexander Block 	if (!path) {
274631db9f7cSAlexander Block 		ret = -ENOMEM;
274731db9f7cSAlexander Block 		goto out;
274831db9f7cSAlexander Block 	}
274931db9f7cSAlexander Block 
275031db9f7cSAlexander Block 	key.objectid = ino;
275131db9f7cSAlexander Block 	key.type = BTRFS_INODE_ITEM_KEY;
275231db9f7cSAlexander Block 	key.offset = 0;
275331db9f7cSAlexander Block 	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
275415b253eaSFilipe Manana 	if (ret > 0)
275515b253eaSFilipe Manana 		ret = -ENOENT;
275631db9f7cSAlexander Block 	if (ret < 0)
275731db9f7cSAlexander Block 		goto out;
275831db9f7cSAlexander Block 
275931db9f7cSAlexander Block 	eb = path->nodes[0];
276031db9f7cSAlexander Block 	slot = path->slots[0];
276131db9f7cSAlexander Block 	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
276231db9f7cSAlexander Block 
276331db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
276431db9f7cSAlexander Block 	if (ret < 0)
276531db9f7cSAlexander Block 		goto out;
276631db9f7cSAlexander Block 
276731db9f7cSAlexander Block 	ret = get_cur_path(sctx, ino, gen, p);
276831db9f7cSAlexander Block 	if (ret < 0)
276931db9f7cSAlexander Block 		goto out;
277031db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2771a937b979SDavid Sterba 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime);
2772a937b979SDavid Sterba 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime);
2773a937b979SDavid Sterba 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime);
277422a5b2abSDavid Sterba 	if (sctx->proto >= 2)
277522a5b2abSDavid Sterba 		TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_OTIME, eb, &ii->otime);
277631db9f7cSAlexander Block 
277731db9f7cSAlexander Block 	ret = send_cmd(sctx);
277831db9f7cSAlexander Block 
277931db9f7cSAlexander Block tlv_put_failure:
278031db9f7cSAlexander Block out:
2781924794c9STsutomu Itoh 	fs_path_free(p);
278231db9f7cSAlexander Block 	btrfs_free_path(path);
278331db9f7cSAlexander Block 	return ret;
278431db9f7cSAlexander Block }
278531db9f7cSAlexander Block 
278631db9f7cSAlexander Block /*
27873e49363bSFilipe Manana  * If the cache is full, we can't remove entries from it and do a call to
27883e49363bSFilipe Manana  * send_utimes() for each respective inode, because we might be finishing
27893e49363bSFilipe Manana  * processing an inode that is a directory and it just got renamed, and existing
27903e49363bSFilipe Manana  * entries in the cache may refer to inodes that have the directory in their
27913e49363bSFilipe Manana  * full path - in which case we would generate outdated paths (pre-rename)
27923e49363bSFilipe Manana  * for the inodes that the cache entries point to. Instead of prunning the
27933e49363bSFilipe Manana  * cache when inserting, do it after we finish processing each inode at
27943e49363bSFilipe Manana  * finish_inode_if_needed().
27953e49363bSFilipe Manana  */
cache_dir_utimes(struct send_ctx * sctx,u64 dir,u64 gen)27963e49363bSFilipe Manana static int cache_dir_utimes(struct send_ctx *sctx, u64 dir, u64 gen)
27973e49363bSFilipe Manana {
27983e49363bSFilipe Manana 	struct btrfs_lru_cache_entry *entry;
27993e49363bSFilipe Manana 	int ret;
28003e49363bSFilipe Manana 
28013e49363bSFilipe Manana 	entry = btrfs_lru_cache_lookup(&sctx->dir_utimes_cache, dir, gen);
28023e49363bSFilipe Manana 	if (entry != NULL)
28033e49363bSFilipe Manana 		return 0;
28043e49363bSFilipe Manana 
28053e49363bSFilipe Manana 	/* Caching is optional, don't fail if we can't allocate memory. */
28063e49363bSFilipe Manana 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
28073e49363bSFilipe Manana 	if (!entry)
28083e49363bSFilipe Manana 		return send_utimes(sctx, dir, gen);
28093e49363bSFilipe Manana 
28103e49363bSFilipe Manana 	entry->key = dir;
28113e49363bSFilipe Manana 	entry->gen = gen;
28123e49363bSFilipe Manana 
28133e49363bSFilipe Manana 	ret = btrfs_lru_cache_store(&sctx->dir_utimes_cache, entry, GFP_KERNEL);
28143e49363bSFilipe Manana 	ASSERT(ret != -EEXIST);
28153e49363bSFilipe Manana 	if (ret) {
28163e49363bSFilipe Manana 		kfree(entry);
28173e49363bSFilipe Manana 		return send_utimes(sctx, dir, gen);
28183e49363bSFilipe Manana 	}
28193e49363bSFilipe Manana 
28203e49363bSFilipe Manana 	return 0;
28213e49363bSFilipe Manana }
28223e49363bSFilipe Manana 
trim_dir_utimes_cache(struct send_ctx * sctx)28233e49363bSFilipe Manana static int trim_dir_utimes_cache(struct send_ctx *sctx)
28243e49363bSFilipe Manana {
28253e49363bSFilipe Manana 	while (btrfs_lru_cache_size(&sctx->dir_utimes_cache) >
28263e49363bSFilipe Manana 	       SEND_MAX_DIR_UTIMES_CACHE_SIZE) {
28273e49363bSFilipe Manana 		struct btrfs_lru_cache_entry *lru;
28283e49363bSFilipe Manana 		int ret;
28293e49363bSFilipe Manana 
28303e49363bSFilipe Manana 		lru = btrfs_lru_cache_lru_entry(&sctx->dir_utimes_cache);
28313e49363bSFilipe Manana 		ASSERT(lru != NULL);
28323e49363bSFilipe Manana 
28333e49363bSFilipe Manana 		ret = send_utimes(sctx, lru->key, lru->gen);
28343e49363bSFilipe Manana 		if (ret)
28353e49363bSFilipe Manana 			return ret;
28363e49363bSFilipe Manana 
28373e49363bSFilipe Manana 		btrfs_lru_cache_remove(&sctx->dir_utimes_cache, lru);
28383e49363bSFilipe Manana 	}
28393e49363bSFilipe Manana 
28403e49363bSFilipe Manana 	return 0;
28413e49363bSFilipe Manana }
28423e49363bSFilipe Manana 
28433e49363bSFilipe Manana /*
284431db9f7cSAlexander Block  * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
284531db9f7cSAlexander Block  * a valid path yet because we did not process the refs yet. So, the inode
284631db9f7cSAlexander Block  * is created as orphan.
284731db9f7cSAlexander Block  */
send_create_inode(struct send_ctx * sctx,u64 ino)28481f4692daSAlexander Block static int send_create_inode(struct send_ctx *sctx, u64 ino)
284931db9f7cSAlexander Block {
285004ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
285131db9f7cSAlexander Block 	int ret = 0;
285231db9f7cSAlexander Block 	struct fs_path *p;
285331db9f7cSAlexander Block 	int cmd;
28547e93f6dcSBingJing Chang 	struct btrfs_inode_info info;
28551f4692daSAlexander Block 	u64 gen;
285631db9f7cSAlexander Block 	u64 mode;
28571f4692daSAlexander Block 	u64 rdev;
285831db9f7cSAlexander Block 
285904ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_create_inode %llu", ino);
286031db9f7cSAlexander Block 
2861924794c9STsutomu Itoh 	p = fs_path_alloc();
286231db9f7cSAlexander Block 	if (!p)
286331db9f7cSAlexander Block 		return -ENOMEM;
286431db9f7cSAlexander Block 
2865644d1940SLiu Bo 	if (ino != sctx->cur_ino) {
28667e93f6dcSBingJing Chang 		ret = get_inode_info(sctx->send_root, ino, &info);
28671f4692daSAlexander Block 		if (ret < 0)
28681f4692daSAlexander Block 			goto out;
28697e93f6dcSBingJing Chang 		gen = info.gen;
28707e93f6dcSBingJing Chang 		mode = info.mode;
28717e93f6dcSBingJing Chang 		rdev = info.rdev;
2872644d1940SLiu Bo 	} else {
2873644d1940SLiu Bo 		gen = sctx->cur_inode_gen;
2874644d1940SLiu Bo 		mode = sctx->cur_inode_mode;
2875644d1940SLiu Bo 		rdev = sctx->cur_inode_rdev;
2876644d1940SLiu Bo 	}
287731db9f7cSAlexander Block 
2878e938c8adSAlexander Block 	if (S_ISREG(mode)) {
287931db9f7cSAlexander Block 		cmd = BTRFS_SEND_C_MKFILE;
2880e938c8adSAlexander Block 	} else if (S_ISDIR(mode)) {
288131db9f7cSAlexander Block 		cmd = BTRFS_SEND_C_MKDIR;
2882e938c8adSAlexander Block 	} else if (S_ISLNK(mode)) {
288331db9f7cSAlexander Block 		cmd = BTRFS_SEND_C_SYMLINK;
2884e938c8adSAlexander Block 	} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
288531db9f7cSAlexander Block 		cmd = BTRFS_SEND_C_MKNOD;
2886e938c8adSAlexander Block 	} else if (S_ISFIFO(mode)) {
288731db9f7cSAlexander Block 		cmd = BTRFS_SEND_C_MKFIFO;
2888e938c8adSAlexander Block 	} else if (S_ISSOCK(mode)) {
288931db9f7cSAlexander Block 		cmd = BTRFS_SEND_C_MKSOCK;
2890e938c8adSAlexander Block 	} else {
2891f14d104dSDavid Sterba 		btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
289231db9f7cSAlexander Block 				(int)(mode & S_IFMT));
2893ca6842bfSTsutomu Itoh 		ret = -EOPNOTSUPP;
289431db9f7cSAlexander Block 		goto out;
289531db9f7cSAlexander Block 	}
289631db9f7cSAlexander Block 
289731db9f7cSAlexander Block 	ret = begin_cmd(sctx, cmd);
289831db9f7cSAlexander Block 	if (ret < 0)
289931db9f7cSAlexander Block 		goto out;
290031db9f7cSAlexander Block 
29011f4692daSAlexander Block 	ret = gen_unique_name(sctx, ino, gen, p);
290231db9f7cSAlexander Block 	if (ret < 0)
290331db9f7cSAlexander Block 		goto out;
290431db9f7cSAlexander Block 
290531db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
29061f4692daSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
290731db9f7cSAlexander Block 
290831db9f7cSAlexander Block 	if (S_ISLNK(mode)) {
290931db9f7cSAlexander Block 		fs_path_reset(p);
2910924794c9STsutomu Itoh 		ret = read_symlink(sctx->send_root, ino, p);
291131db9f7cSAlexander Block 		if (ret < 0)
291231db9f7cSAlexander Block 			goto out;
291331db9f7cSAlexander Block 		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
291431db9f7cSAlexander Block 	} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
291531db9f7cSAlexander Block 		   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2916d79e5043SArne Jansen 		TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2917d79e5043SArne Jansen 		TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
291831db9f7cSAlexander Block 	}
291931db9f7cSAlexander Block 
292031db9f7cSAlexander Block 	ret = send_cmd(sctx);
292131db9f7cSAlexander Block 	if (ret < 0)
292231db9f7cSAlexander Block 		goto out;
292331db9f7cSAlexander Block 
292431db9f7cSAlexander Block 
292531db9f7cSAlexander Block tlv_put_failure:
292631db9f7cSAlexander Block out:
2927924794c9STsutomu Itoh 	fs_path_free(p);
292831db9f7cSAlexander Block 	return ret;
292931db9f7cSAlexander Block }
293031db9f7cSAlexander Block 
cache_dir_created(struct send_ctx * sctx,u64 dir)2931e8a7f49dSFilipe Manana static void cache_dir_created(struct send_ctx *sctx, u64 dir)
2932e8a7f49dSFilipe Manana {
2933e8a7f49dSFilipe Manana 	struct btrfs_lru_cache_entry *entry;
2934e8a7f49dSFilipe Manana 	int ret;
2935e8a7f49dSFilipe Manana 
2936e8a7f49dSFilipe Manana 	/* Caching is optional, ignore any failures. */
2937e8a7f49dSFilipe Manana 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
2938e8a7f49dSFilipe Manana 	if (!entry)
2939e8a7f49dSFilipe Manana 		return;
2940e8a7f49dSFilipe Manana 
2941e8a7f49dSFilipe Manana 	entry->key = dir;
29420da0c560SFilipe Manana 	entry->gen = 0;
2943e8a7f49dSFilipe Manana 	ret = btrfs_lru_cache_store(&sctx->dir_created_cache, entry, GFP_KERNEL);
2944e8a7f49dSFilipe Manana 	if (ret < 0)
2945e8a7f49dSFilipe Manana 		kfree(entry);
2946e8a7f49dSFilipe Manana }
2947e8a7f49dSFilipe Manana 
29481f4692daSAlexander Block /*
29491f4692daSAlexander Block  * We need some special handling for inodes that get processed before the parent
29501f4692daSAlexander Block  * directory got created. See process_recorded_refs for details.
29511f4692daSAlexander Block  * This function does the check if we already created the dir out of order.
29521f4692daSAlexander Block  */
did_create_dir(struct send_ctx * sctx,u64 dir)29531f4692daSAlexander Block static int did_create_dir(struct send_ctx *sctx, u64 dir)
29541f4692daSAlexander Block {
29551f4692daSAlexander Block 	int ret = 0;
29566dcee260SGabriel Niebler 	int iter_ret = 0;
29571f4692daSAlexander Block 	struct btrfs_path *path = NULL;
29581f4692daSAlexander Block 	struct btrfs_key key;
29591f4692daSAlexander Block 	struct btrfs_key found_key;
29601f4692daSAlexander Block 	struct btrfs_key di_key;
29611f4692daSAlexander Block 	struct btrfs_dir_item *di;
29621f4692daSAlexander Block 
29630da0c560SFilipe Manana 	if (btrfs_lru_cache_lookup(&sctx->dir_created_cache, dir, 0))
2964e8a7f49dSFilipe Manana 		return 1;
2965e8a7f49dSFilipe Manana 
29661f4692daSAlexander Block 	path = alloc_path_for_send();
29676dcee260SGabriel Niebler 	if (!path)
29686dcee260SGabriel Niebler 		return -ENOMEM;
29691f4692daSAlexander Block 
29701f4692daSAlexander Block 	key.objectid = dir;
29711f4692daSAlexander Block 	key.type = BTRFS_DIR_INDEX_KEY;
29721f4692daSAlexander Block 	key.offset = 0;
2973dff6d0adSFilipe David Borba Manana 
29746dcee260SGabriel Niebler 	btrfs_for_each_slot(sctx->send_root, &key, &found_key, path, iter_ret) {
29756dcee260SGabriel Niebler 		struct extent_buffer *eb = path->nodes[0];
2976dff6d0adSFilipe David Borba Manana 
2977dff6d0adSFilipe David Borba Manana 		if (found_key.objectid != key.objectid ||
29781f4692daSAlexander Block 		    found_key.type != key.type) {
29791f4692daSAlexander Block 			ret = 0;
29806dcee260SGabriel Niebler 			break;
29811f4692daSAlexander Block 		}
29821f4692daSAlexander Block 
29836dcee260SGabriel Niebler 		di = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dir_item);
29841f4692daSAlexander Block 		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
29851f4692daSAlexander Block 
2986a0525414SJosef Bacik 		if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2987a0525414SJosef Bacik 		    di_key.objectid < sctx->send_progress) {
29881f4692daSAlexander Block 			ret = 1;
2989e8a7f49dSFilipe Manana 			cache_dir_created(sctx, dir);
29906dcee260SGabriel Niebler 			break;
29911f4692daSAlexander Block 		}
29921f4692daSAlexander Block 	}
29936dcee260SGabriel Niebler 	/* Catch error found during iteration */
29946dcee260SGabriel Niebler 	if (iter_ret < 0)
29956dcee260SGabriel Niebler 		ret = iter_ret;
29961f4692daSAlexander Block 
29971f4692daSAlexander Block 	btrfs_free_path(path);
29981f4692daSAlexander Block 	return ret;
29991f4692daSAlexander Block }
30001f4692daSAlexander Block 
30011f4692daSAlexander Block /*
30021f4692daSAlexander Block  * Only creates the inode if it is:
30031f4692daSAlexander Block  * 1. Not a directory
30041f4692daSAlexander Block  * 2. Or a directory which was not created already due to out of order
30051f4692daSAlexander Block  *    directories. See did_create_dir and process_recorded_refs for details.
30061f4692daSAlexander Block  */
send_create_inode_if_needed(struct send_ctx * sctx)30071f4692daSAlexander Block static int send_create_inode_if_needed(struct send_ctx *sctx)
30081f4692daSAlexander Block {
30091f4692daSAlexander Block 	int ret;
30101f4692daSAlexander Block 
30111f4692daSAlexander Block 	if (S_ISDIR(sctx->cur_inode_mode)) {
30121f4692daSAlexander Block 		ret = did_create_dir(sctx, sctx->cur_ino);
30131f4692daSAlexander Block 		if (ret < 0)
30141f4692daSAlexander Block 			return ret;
30150e3dd5bcSMarcos Paulo de Souza 		else if (ret > 0)
30160e3dd5bcSMarcos Paulo de Souza 			return 0;
30170e3dd5bcSMarcos Paulo de Souza 	}
30180e3dd5bcSMarcos Paulo de Souza 
3019e8a7f49dSFilipe Manana 	ret = send_create_inode(sctx, sctx->cur_ino);
3020e8a7f49dSFilipe Manana 
3021e8a7f49dSFilipe Manana 	if (ret == 0 && S_ISDIR(sctx->cur_inode_mode))
3022e8a7f49dSFilipe Manana 		cache_dir_created(sctx, sctx->cur_ino);
3023e8a7f49dSFilipe Manana 
3024e8a7f49dSFilipe Manana 	return ret;
30251f4692daSAlexander Block }
30261f4692daSAlexander Block 
302731db9f7cSAlexander Block struct recorded_ref {
302831db9f7cSAlexander Block 	struct list_head list;
302931db9f7cSAlexander Block 	char *name;
303031db9f7cSAlexander Block 	struct fs_path *full_path;
303131db9f7cSAlexander Block 	u64 dir;
303231db9f7cSAlexander Block 	u64 dir_gen;
303331db9f7cSAlexander Block 	int name_len;
30343aa5bd36SBingJing Chang 	struct rb_node node;
30353aa5bd36SBingJing Chang 	struct rb_root *root;
303631db9f7cSAlexander Block };
303731db9f7cSAlexander Block 
recorded_ref_alloc(void)303871ecfc13SBingJing Chang static struct recorded_ref *recorded_ref_alloc(void)
303971ecfc13SBingJing Chang {
304071ecfc13SBingJing Chang 	struct recorded_ref *ref;
304171ecfc13SBingJing Chang 
304271ecfc13SBingJing Chang 	ref = kzalloc(sizeof(*ref), GFP_KERNEL);
304371ecfc13SBingJing Chang 	if (!ref)
304471ecfc13SBingJing Chang 		return NULL;
30453aa5bd36SBingJing Chang 	RB_CLEAR_NODE(&ref->node);
304671ecfc13SBingJing Chang 	INIT_LIST_HEAD(&ref->list);
304771ecfc13SBingJing Chang 	return ref;
304871ecfc13SBingJing Chang }
304971ecfc13SBingJing Chang 
recorded_ref_free(struct recorded_ref * ref)305071ecfc13SBingJing Chang static void recorded_ref_free(struct recorded_ref *ref)
305171ecfc13SBingJing Chang {
305271ecfc13SBingJing Chang 	if (!ref)
305371ecfc13SBingJing Chang 		return;
30543aa5bd36SBingJing Chang 	if (!RB_EMPTY_NODE(&ref->node))
30553aa5bd36SBingJing Chang 		rb_erase(&ref->node, ref->root);
305671ecfc13SBingJing Chang 	list_del(&ref->list);
305771ecfc13SBingJing Chang 	fs_path_free(ref->full_path);
305871ecfc13SBingJing Chang 	kfree(ref);
305971ecfc13SBingJing Chang }
306071ecfc13SBingJing Chang 
set_ref_path(struct recorded_ref * ref,struct fs_path * path)3061fdb13889SFilipe Manana static void set_ref_path(struct recorded_ref *ref, struct fs_path *path)
3062fdb13889SFilipe Manana {
3063fdb13889SFilipe Manana 	ref->full_path = path;
3064fdb13889SFilipe Manana 	ref->name = (char *)kbasename(ref->full_path->start);
3065fdb13889SFilipe Manana 	ref->name_len = ref->full_path->end - ref->name;
3066fdb13889SFilipe Manana }
3067fdb13889SFilipe Manana 
dup_ref(struct recorded_ref * ref,struct list_head * list)3068ba5e8f2eSJosef Bacik static int dup_ref(struct recorded_ref *ref, struct list_head *list)
3069ba5e8f2eSJosef Bacik {
3070ba5e8f2eSJosef Bacik 	struct recorded_ref *new;
3071ba5e8f2eSJosef Bacik 
307271ecfc13SBingJing Chang 	new = recorded_ref_alloc();
3073ba5e8f2eSJosef Bacik 	if (!new)
3074ba5e8f2eSJosef Bacik 		return -ENOMEM;
3075ba5e8f2eSJosef Bacik 
3076ba5e8f2eSJosef Bacik 	new->dir = ref->dir;
3077ba5e8f2eSJosef Bacik 	new->dir_gen = ref->dir_gen;
3078ba5e8f2eSJosef Bacik 	list_add_tail(&new->list, list);
3079ba5e8f2eSJosef Bacik 	return 0;
3080ba5e8f2eSJosef Bacik }
3081ba5e8f2eSJosef Bacik 
__free_recorded_refs(struct list_head * head)3082924794c9STsutomu Itoh static void __free_recorded_refs(struct list_head *head)
308331db9f7cSAlexander Block {
308431db9f7cSAlexander Block 	struct recorded_ref *cur;
308531db9f7cSAlexander Block 
3086e938c8adSAlexander Block 	while (!list_empty(head)) {
3087e938c8adSAlexander Block 		cur = list_entry(head->next, struct recorded_ref, list);
308871ecfc13SBingJing Chang 		recorded_ref_free(cur);
308931db9f7cSAlexander Block 	}
309031db9f7cSAlexander Block }
309131db9f7cSAlexander Block 
free_recorded_refs(struct send_ctx * sctx)309231db9f7cSAlexander Block static void free_recorded_refs(struct send_ctx *sctx)
309331db9f7cSAlexander Block {
3094924794c9STsutomu Itoh 	__free_recorded_refs(&sctx->new_refs);
3095924794c9STsutomu Itoh 	__free_recorded_refs(&sctx->deleted_refs);
309631db9f7cSAlexander Block }
309731db9f7cSAlexander Block 
309831db9f7cSAlexander Block /*
3099766702efSAlexander Block  * Renames/moves a file/dir to its orphan name. Used when the first
310031db9f7cSAlexander Block  * ref of an unprocessed inode gets overwritten and for all non empty
310131db9f7cSAlexander Block  * directories.
310231db9f7cSAlexander Block  */
orphanize_inode(struct send_ctx * sctx,u64 ino,u64 gen,struct fs_path * path)310331db9f7cSAlexander Block static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
310431db9f7cSAlexander Block 			  struct fs_path *path)
310531db9f7cSAlexander Block {
310631db9f7cSAlexander Block 	int ret;
310731db9f7cSAlexander Block 	struct fs_path *orphan;
310831db9f7cSAlexander Block 
3109924794c9STsutomu Itoh 	orphan = fs_path_alloc();
311031db9f7cSAlexander Block 	if (!orphan)
311131db9f7cSAlexander Block 		return -ENOMEM;
311231db9f7cSAlexander Block 
311331db9f7cSAlexander Block 	ret = gen_unique_name(sctx, ino, gen, orphan);
311431db9f7cSAlexander Block 	if (ret < 0)
311531db9f7cSAlexander Block 		goto out;
311631db9f7cSAlexander Block 
311731db9f7cSAlexander Block 	ret = send_rename(sctx, path, orphan);
311831db9f7cSAlexander Block 
311931db9f7cSAlexander Block out:
3120924794c9STsutomu Itoh 	fs_path_free(orphan);
312131db9f7cSAlexander Block 	return ret;
312231db9f7cSAlexander Block }
312331db9f7cSAlexander Block 
add_orphan_dir_info(struct send_ctx * sctx,u64 dir_ino,u64 dir_gen)31240b3f407eSFilipe Manana static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
31250b3f407eSFilipe Manana 						   u64 dir_ino, u64 dir_gen)
31269dc44214SFilipe Manana {
31279dc44214SFilipe Manana 	struct rb_node **p = &sctx->orphan_dirs.rb_node;
31289dc44214SFilipe Manana 	struct rb_node *parent = NULL;
31299dc44214SFilipe Manana 	struct orphan_dir_info *entry, *odi;
31309dc44214SFilipe Manana 
31319dc44214SFilipe Manana 	while (*p) {
31329dc44214SFilipe Manana 		parent = *p;
31339dc44214SFilipe Manana 		entry = rb_entry(parent, struct orphan_dir_info, node);
31340b3f407eSFilipe Manana 		if (dir_ino < entry->ino)
31359dc44214SFilipe Manana 			p = &(*p)->rb_left;
31360b3f407eSFilipe Manana 		else if (dir_ino > entry->ino)
31379dc44214SFilipe Manana 			p = &(*p)->rb_right;
31380b3f407eSFilipe Manana 		else if (dir_gen < entry->gen)
31390b3f407eSFilipe Manana 			p = &(*p)->rb_left;
31400b3f407eSFilipe Manana 		else if (dir_gen > entry->gen)
31410b3f407eSFilipe Manana 			p = &(*p)->rb_right;
31420b3f407eSFilipe Manana 		else
31439dc44214SFilipe Manana 			return entry;
31449dc44214SFilipe Manana 	}
31459dc44214SFilipe Manana 
314635c8eda1SRobbie Ko 	odi = kmalloc(sizeof(*odi), GFP_KERNEL);
314735c8eda1SRobbie Ko 	if (!odi)
314835c8eda1SRobbie Ko 		return ERR_PTR(-ENOMEM);
314935c8eda1SRobbie Ko 	odi->ino = dir_ino;
31500b3f407eSFilipe Manana 	odi->gen = dir_gen;
31510f96f517SRobbie Ko 	odi->last_dir_index_offset = 0;
3152474e4761SFilipe Manana 	odi->dir_high_seq_ino = 0;
315335c8eda1SRobbie Ko 
31549dc44214SFilipe Manana 	rb_link_node(&odi->node, parent, p);
31559dc44214SFilipe Manana 	rb_insert_color(&odi->node, &sctx->orphan_dirs);
31569dc44214SFilipe Manana 	return odi;
31579dc44214SFilipe Manana }
31589dc44214SFilipe Manana 
get_orphan_dir_info(struct send_ctx * sctx,u64 dir_ino,u64 gen)31590b3f407eSFilipe Manana static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx,
31600b3f407eSFilipe Manana 						   u64 dir_ino, u64 gen)
31619dc44214SFilipe Manana {
31629dc44214SFilipe Manana 	struct rb_node *n = sctx->orphan_dirs.rb_node;
31639dc44214SFilipe Manana 	struct orphan_dir_info *entry;
31649dc44214SFilipe Manana 
31659dc44214SFilipe Manana 	while (n) {
31669dc44214SFilipe Manana 		entry = rb_entry(n, struct orphan_dir_info, node);
31679dc44214SFilipe Manana 		if (dir_ino < entry->ino)
31689dc44214SFilipe Manana 			n = n->rb_left;
31699dc44214SFilipe Manana 		else if (dir_ino > entry->ino)
31709dc44214SFilipe Manana 			n = n->rb_right;
31710b3f407eSFilipe Manana 		else if (gen < entry->gen)
31720b3f407eSFilipe Manana 			n = n->rb_left;
31730b3f407eSFilipe Manana 		else if (gen > entry->gen)
31740b3f407eSFilipe Manana 			n = n->rb_right;
31759dc44214SFilipe Manana 		else
31769dc44214SFilipe Manana 			return entry;
31779dc44214SFilipe Manana 	}
31789dc44214SFilipe Manana 	return NULL;
31799dc44214SFilipe Manana }
31809dc44214SFilipe Manana 
is_waiting_for_rm(struct send_ctx * sctx,u64 dir_ino,u64 gen)31810b3f407eSFilipe Manana static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen)
31829dc44214SFilipe Manana {
31830b3f407eSFilipe Manana 	struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen);
31849dc44214SFilipe Manana 
31859dc44214SFilipe Manana 	return odi != NULL;
31869dc44214SFilipe Manana }
31879dc44214SFilipe Manana 
free_orphan_dir_info(struct send_ctx * sctx,struct orphan_dir_info * odi)31889dc44214SFilipe Manana static void free_orphan_dir_info(struct send_ctx *sctx,
31899dc44214SFilipe Manana 				 struct orphan_dir_info *odi)
31909dc44214SFilipe Manana {
31919dc44214SFilipe Manana 	if (!odi)
31929dc44214SFilipe Manana 		return;
31939dc44214SFilipe Manana 	rb_erase(&odi->node, &sctx->orphan_dirs);
31949dc44214SFilipe Manana 	kfree(odi);
31959dc44214SFilipe Manana }
31969dc44214SFilipe Manana 
319731db9f7cSAlexander Block /*
319831db9f7cSAlexander Block  * Returns 1 if a directory can be removed at this point in time.
319931db9f7cSAlexander Block  * We check this by iterating all dir items and checking if the inode behind
320031db9f7cSAlexander Block  * the dir item was already processed.
320131db9f7cSAlexander Block  */
can_rmdir(struct send_ctx * sctx,u64 dir,u64 dir_gen)320224970ccbSFilipe Manana static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen)
320331db9f7cSAlexander Block {
320431db9f7cSAlexander Block 	int ret = 0;
320518f80f1fSGabriel Niebler 	int iter_ret = 0;
320631db9f7cSAlexander Block 	struct btrfs_root *root = sctx->parent_root;
320731db9f7cSAlexander Block 	struct btrfs_path *path;
320831db9f7cSAlexander Block 	struct btrfs_key key;
320931db9f7cSAlexander Block 	struct btrfs_key found_key;
321031db9f7cSAlexander Block 	struct btrfs_key loc;
321131db9f7cSAlexander Block 	struct btrfs_dir_item *di;
32120f96f517SRobbie Ko 	struct orphan_dir_info *odi = NULL;
3213474e4761SFilipe Manana 	u64 dir_high_seq_ino = 0;
3214474e4761SFilipe Manana 	u64 last_dir_index_offset = 0;
321531db9f7cSAlexander Block 
32166d85ed05SAlexander Block 	/*
32176d85ed05SAlexander Block 	 * Don't try to rmdir the top/root subvolume dir.
32186d85ed05SAlexander Block 	 */
32196d85ed05SAlexander Block 	if (dir == BTRFS_FIRST_FREE_OBJECTID)
32206d85ed05SAlexander Block 		return 0;
32216d85ed05SAlexander Block 
3222474e4761SFilipe Manana 	odi = get_orphan_dir_info(sctx, dir, dir_gen);
3223474e4761SFilipe Manana 	if (odi && sctx->cur_ino < odi->dir_high_seq_ino)
3224474e4761SFilipe Manana 		return 0;
3225474e4761SFilipe Manana 
322631db9f7cSAlexander Block 	path = alloc_path_for_send();
322731db9f7cSAlexander Block 	if (!path)
322831db9f7cSAlexander Block 		return -ENOMEM;
322931db9f7cSAlexander Block 
3230474e4761SFilipe Manana 	if (!odi) {
3231474e4761SFilipe Manana 		/*
3232474e4761SFilipe Manana 		 * Find the inode number associated with the last dir index
3233474e4761SFilipe Manana 		 * entry. This is very likely the inode with the highest number
3234474e4761SFilipe Manana 		 * of all inodes that have an entry in the directory. We can
3235474e4761SFilipe Manana 		 * then use it to avoid future calls to can_rmdir(), when
3236474e4761SFilipe Manana 		 * processing inodes with a lower number, from having to search
3237474e4761SFilipe Manana 		 * the parent root b+tree for dir index keys.
3238474e4761SFilipe Manana 		 */
323931db9f7cSAlexander Block 		key.objectid = dir;
324031db9f7cSAlexander Block 		key.type = BTRFS_DIR_INDEX_KEY;
3241474e4761SFilipe Manana 		key.offset = (u64)-1;
32420f96f517SRobbie Ko 
3243474e4761SFilipe Manana 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3244474e4761SFilipe Manana 		if (ret < 0) {
3245474e4761SFilipe Manana 			goto out;
3246474e4761SFilipe Manana 		} else if (ret > 0) {
3247474e4761SFilipe Manana 			/* Can't happen, the root is never empty. */
3248474e4761SFilipe Manana 			ASSERT(path->slots[0] > 0);
3249474e4761SFilipe Manana 			if (WARN_ON(path->slots[0] == 0)) {
3250474e4761SFilipe Manana 				ret = -EUCLEAN;
3251474e4761SFilipe Manana 				goto out;
3252474e4761SFilipe Manana 			}
3253474e4761SFilipe Manana 			path->slots[0]--;
3254474e4761SFilipe Manana 		}
3255474e4761SFilipe Manana 
3256474e4761SFilipe Manana 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3257474e4761SFilipe Manana 		if (key.objectid != dir || key.type != BTRFS_DIR_INDEX_KEY) {
3258474e4761SFilipe Manana 			/* No index keys, dir can be removed. */
3259474e4761SFilipe Manana 			ret = 1;
3260474e4761SFilipe Manana 			goto out;
3261474e4761SFilipe Manana 		}
3262474e4761SFilipe Manana 
3263474e4761SFilipe Manana 		di = btrfs_item_ptr(path->nodes[0], path->slots[0],
3264474e4761SFilipe Manana 				    struct btrfs_dir_item);
3265474e4761SFilipe Manana 		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
3266474e4761SFilipe Manana 		dir_high_seq_ino = loc.objectid;
3267474e4761SFilipe Manana 		if (sctx->cur_ino < dir_high_seq_ino) {
3268474e4761SFilipe Manana 			ret = 0;
3269474e4761SFilipe Manana 			goto out;
3270474e4761SFilipe Manana 		}
3271474e4761SFilipe Manana 
3272474e4761SFilipe Manana 		btrfs_release_path(path);
3273474e4761SFilipe Manana 	}
3274474e4761SFilipe Manana 
3275474e4761SFilipe Manana 	key.objectid = dir;
3276474e4761SFilipe Manana 	key.type = BTRFS_DIR_INDEX_KEY;
3277474e4761SFilipe Manana 	key.offset = (odi ? odi->last_dir_index_offset : 0);
32780f96f517SRobbie Ko 
327918f80f1fSGabriel Niebler 	btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
32809dc44214SFilipe Manana 		struct waiting_dir_move *dm;
32819dc44214SFilipe Manana 
3282dff6d0adSFilipe David Borba Manana 		if (found_key.objectid != key.objectid ||
3283dff6d0adSFilipe David Borba Manana 		    found_key.type != key.type)
328431db9f7cSAlexander Block 			break;
328531db9f7cSAlexander Block 
328631db9f7cSAlexander Block 		di = btrfs_item_ptr(path->nodes[0], path->slots[0],
328731db9f7cSAlexander Block 				struct btrfs_dir_item);
328831db9f7cSAlexander Block 		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
328931db9f7cSAlexander Block 
3290474e4761SFilipe Manana 		dir_high_seq_ino = max(dir_high_seq_ino, loc.objectid);
3291474e4761SFilipe Manana 		last_dir_index_offset = found_key.offset;
3292474e4761SFilipe Manana 
32939dc44214SFilipe Manana 		dm = get_waiting_dir_move(sctx, loc.objectid);
32949dc44214SFilipe Manana 		if (dm) {
32959dc44214SFilipe Manana 			dm->rmdir_ino = dir;
32960b3f407eSFilipe Manana 			dm->rmdir_gen = dir_gen;
32979dc44214SFilipe Manana 			ret = 0;
32989dc44214SFilipe Manana 			goto out;
32999dc44214SFilipe Manana 		}
33009dc44214SFilipe Manana 
330124970ccbSFilipe Manana 		if (loc.objectid > sctx->cur_ino) {
330231db9f7cSAlexander Block 			ret = 0;
330331db9f7cSAlexander Block 			goto out;
330431db9f7cSAlexander Block 		}
330518f80f1fSGabriel Niebler 	}
330618f80f1fSGabriel Niebler 	if (iter_ret < 0) {
330718f80f1fSGabriel Niebler 		ret = iter_ret;
330818f80f1fSGabriel Niebler 		goto out;
330931db9f7cSAlexander Block 	}
33100f96f517SRobbie Ko 	free_orphan_dir_info(sctx, odi);
331131db9f7cSAlexander Block 
331231db9f7cSAlexander Block 	ret = 1;
331331db9f7cSAlexander Block 
331431db9f7cSAlexander Block out:
331531db9f7cSAlexander Block 	btrfs_free_path(path);
3316d921b9cfSFilipe Manana 
3317d921b9cfSFilipe Manana 	if (ret)
331831db9f7cSAlexander Block 		return ret;
3319d921b9cfSFilipe Manana 
332078cf1a95SFilipe Manana 	if (!odi) {
3321d921b9cfSFilipe Manana 		odi = add_orphan_dir_info(sctx, dir, dir_gen);
3322d921b9cfSFilipe Manana 		if (IS_ERR(odi))
3323d921b9cfSFilipe Manana 			return PTR_ERR(odi);
3324d921b9cfSFilipe Manana 
3325d921b9cfSFilipe Manana 		odi->gen = dir_gen;
332678cf1a95SFilipe Manana 	}
332778cf1a95SFilipe Manana 
3328474e4761SFilipe Manana 	odi->last_dir_index_offset = last_dir_index_offset;
3329474e4761SFilipe Manana 	odi->dir_high_seq_ino = max(odi->dir_high_seq_ino, dir_high_seq_ino);
3330d921b9cfSFilipe Manana 
3331d921b9cfSFilipe Manana 	return 0;
333231db9f7cSAlexander Block }
333331db9f7cSAlexander Block 
is_waiting_for_move(struct send_ctx * sctx,u64 ino)33349f03740aSFilipe David Borba Manana static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
33359f03740aSFilipe David Borba Manana {
33369dc44214SFilipe Manana 	struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino);
33379f03740aSFilipe David Borba Manana 
33389dc44214SFilipe Manana 	return entry != NULL;
33399f03740aSFilipe David Borba Manana }
33409f03740aSFilipe David Borba Manana 
add_waiting_dir_move(struct send_ctx * sctx,u64 ino,bool orphanized)33418b191a68SFilipe Manana static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
33429f03740aSFilipe David Borba Manana {
33439f03740aSFilipe David Borba Manana 	struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
33449f03740aSFilipe David Borba Manana 	struct rb_node *parent = NULL;
33459f03740aSFilipe David Borba Manana 	struct waiting_dir_move *entry, *dm;
33469f03740aSFilipe David Borba Manana 
3347e780b0d1SDavid Sterba 	dm = kmalloc(sizeof(*dm), GFP_KERNEL);
33489f03740aSFilipe David Borba Manana 	if (!dm)
33499f03740aSFilipe David Borba Manana 		return -ENOMEM;
33509f03740aSFilipe David Borba Manana 	dm->ino = ino;
33519dc44214SFilipe Manana 	dm->rmdir_ino = 0;
33520b3f407eSFilipe Manana 	dm->rmdir_gen = 0;
33538b191a68SFilipe Manana 	dm->orphanized = orphanized;
33549f03740aSFilipe David Borba Manana 
33559f03740aSFilipe David Borba Manana 	while (*p) {
33569f03740aSFilipe David Borba Manana 		parent = *p;
33579f03740aSFilipe David Borba Manana 		entry = rb_entry(parent, struct waiting_dir_move, node);
33589f03740aSFilipe David Borba Manana 		if (ino < entry->ino) {
33599f03740aSFilipe David Borba Manana 			p = &(*p)->rb_left;
33609f03740aSFilipe David Borba Manana 		} else if (ino > entry->ino) {
33619f03740aSFilipe David Borba Manana 			p = &(*p)->rb_right;
33629f03740aSFilipe David Borba Manana 		} else {
33639f03740aSFilipe David Borba Manana 			kfree(dm);
33649f03740aSFilipe David Borba Manana 			return -EEXIST;
33659f03740aSFilipe David Borba Manana 		}
33669f03740aSFilipe David Borba Manana 	}
33679f03740aSFilipe David Borba Manana 
33689f03740aSFilipe David Borba Manana 	rb_link_node(&dm->node, parent, p);
33699f03740aSFilipe David Borba Manana 	rb_insert_color(&dm->node, &sctx->waiting_dir_moves);
33709f03740aSFilipe David Borba Manana 	return 0;
33719f03740aSFilipe David Borba Manana }
33729f03740aSFilipe David Borba Manana 
33739dc44214SFilipe Manana static struct waiting_dir_move *
get_waiting_dir_move(struct send_ctx * sctx,u64 ino)33749dc44214SFilipe Manana get_waiting_dir_move(struct send_ctx *sctx, u64 ino)
33759f03740aSFilipe David Borba Manana {
33769f03740aSFilipe David Borba Manana 	struct rb_node *n = sctx->waiting_dir_moves.rb_node;
33779f03740aSFilipe David Borba Manana 	struct waiting_dir_move *entry;
33789f03740aSFilipe David Borba Manana 
33799f03740aSFilipe David Borba Manana 	while (n) {
33809f03740aSFilipe David Borba Manana 		entry = rb_entry(n, struct waiting_dir_move, node);
33819dc44214SFilipe Manana 		if (ino < entry->ino)
33829f03740aSFilipe David Borba Manana 			n = n->rb_left;
33839dc44214SFilipe Manana 		else if (ino > entry->ino)
33849f03740aSFilipe David Borba Manana 			n = n->rb_right;
33859dc44214SFilipe Manana 		else
33869dc44214SFilipe Manana 			return entry;
33879f03740aSFilipe David Borba Manana 	}
33889dc44214SFilipe Manana 	return NULL;
33899f03740aSFilipe David Borba Manana }
33909dc44214SFilipe Manana 
free_waiting_dir_move(struct send_ctx * sctx,struct waiting_dir_move * dm)33919dc44214SFilipe Manana static void free_waiting_dir_move(struct send_ctx *sctx,
33929dc44214SFilipe Manana 				  struct waiting_dir_move *dm)
33939dc44214SFilipe Manana {
33949dc44214SFilipe Manana 	if (!dm)
33959dc44214SFilipe Manana 		return;
33969dc44214SFilipe Manana 	rb_erase(&dm->node, &sctx->waiting_dir_moves);
33979dc44214SFilipe Manana 	kfree(dm);
33989f03740aSFilipe David Borba Manana }
33999f03740aSFilipe David Borba Manana 
add_pending_dir_move(struct send_ctx * sctx,u64 ino,u64 ino_gen,u64 parent_ino,struct list_head * new_refs,struct list_head * deleted_refs,const bool is_orphan)3400bfa7e1f8SFilipe Manana static int add_pending_dir_move(struct send_ctx *sctx,
3401bfa7e1f8SFilipe Manana 				u64 ino,
3402bfa7e1f8SFilipe Manana 				u64 ino_gen,
3403f959492fSFilipe Manana 				u64 parent_ino,
3404f959492fSFilipe Manana 				struct list_head *new_refs,
340584471e24SFilipe Manana 				struct list_head *deleted_refs,
340684471e24SFilipe Manana 				const bool is_orphan)
34079f03740aSFilipe David Borba Manana {
34089f03740aSFilipe David Borba Manana 	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
34099f03740aSFilipe David Borba Manana 	struct rb_node *parent = NULL;
341073b802f4SChris Mason 	struct pending_dir_move *entry = NULL, *pm;
34119f03740aSFilipe David Borba Manana 	struct recorded_ref *cur;
34129f03740aSFilipe David Borba Manana 	int exists = 0;
34139f03740aSFilipe David Borba Manana 	int ret;
34149f03740aSFilipe David Borba Manana 
3415e780b0d1SDavid Sterba 	pm = kmalloc(sizeof(*pm), GFP_KERNEL);
34169f03740aSFilipe David Borba Manana 	if (!pm)
34179f03740aSFilipe David Borba Manana 		return -ENOMEM;
34189f03740aSFilipe David Borba Manana 	pm->parent_ino = parent_ino;
3419bfa7e1f8SFilipe Manana 	pm->ino = ino;
3420bfa7e1f8SFilipe Manana 	pm->gen = ino_gen;
34219f03740aSFilipe David Borba Manana 	INIT_LIST_HEAD(&pm->list);
34229f03740aSFilipe David Borba Manana 	INIT_LIST_HEAD(&pm->update_refs);
34239f03740aSFilipe David Borba Manana 	RB_CLEAR_NODE(&pm->node);
34249f03740aSFilipe David Borba Manana 
34259f03740aSFilipe David Borba Manana 	while (*p) {
34269f03740aSFilipe David Borba Manana 		parent = *p;
34279f03740aSFilipe David Borba Manana 		entry = rb_entry(parent, struct pending_dir_move, node);
34289f03740aSFilipe David Borba Manana 		if (parent_ino < entry->parent_ino) {
34299f03740aSFilipe David Borba Manana 			p = &(*p)->rb_left;
34309f03740aSFilipe David Borba Manana 		} else if (parent_ino > entry->parent_ino) {
34319f03740aSFilipe David Borba Manana 			p = &(*p)->rb_right;
34329f03740aSFilipe David Borba Manana 		} else {
34339f03740aSFilipe David Borba Manana 			exists = 1;
34349f03740aSFilipe David Borba Manana 			break;
34359f03740aSFilipe David Borba Manana 		}
34369f03740aSFilipe David Borba Manana 	}
34379f03740aSFilipe David Borba Manana 
3438f959492fSFilipe Manana 	list_for_each_entry(cur, deleted_refs, list) {
34399f03740aSFilipe David Borba Manana 		ret = dup_ref(cur, &pm->update_refs);
34409f03740aSFilipe David Borba Manana 		if (ret < 0)
34419f03740aSFilipe David Borba Manana 			goto out;
34429f03740aSFilipe David Borba Manana 	}
3443f959492fSFilipe Manana 	list_for_each_entry(cur, new_refs, list) {
34449f03740aSFilipe David Borba Manana 		ret = dup_ref(cur, &pm->update_refs);
34459f03740aSFilipe David Borba Manana 		if (ret < 0)
34469f03740aSFilipe David Borba Manana 			goto out;
34479f03740aSFilipe David Borba Manana 	}
34489f03740aSFilipe David Borba Manana 
34498b191a68SFilipe Manana 	ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
34509f03740aSFilipe David Borba Manana 	if (ret)
34519f03740aSFilipe David Borba Manana 		goto out;
34529f03740aSFilipe David Borba Manana 
34539f03740aSFilipe David Borba Manana 	if (exists) {
34549f03740aSFilipe David Borba Manana 		list_add_tail(&pm->list, &entry->list);
34559f03740aSFilipe David Borba Manana 	} else {
34569f03740aSFilipe David Borba Manana 		rb_link_node(&pm->node, parent, p);
34579f03740aSFilipe David Borba Manana 		rb_insert_color(&pm->node, &sctx->pending_dir_moves);
34589f03740aSFilipe David Borba Manana 	}
34599f03740aSFilipe David Borba Manana 	ret = 0;
34609f03740aSFilipe David Borba Manana out:
34619f03740aSFilipe David Borba Manana 	if (ret) {
34629f03740aSFilipe David Borba Manana 		__free_recorded_refs(&pm->update_refs);
34639f03740aSFilipe David Borba Manana 		kfree(pm);
34649f03740aSFilipe David Borba Manana 	}
34659f03740aSFilipe David Borba Manana 	return ret;
34669f03740aSFilipe David Borba Manana }
34679f03740aSFilipe David Borba Manana 
get_pending_dir_moves(struct send_ctx * sctx,u64 parent_ino)34689f03740aSFilipe David Borba Manana static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
34699f03740aSFilipe David Borba Manana 						      u64 parent_ino)
34709f03740aSFilipe David Borba Manana {
34719f03740aSFilipe David Borba Manana 	struct rb_node *n = sctx->pending_dir_moves.rb_node;
34729f03740aSFilipe David Borba Manana 	struct pending_dir_move *entry;
34739f03740aSFilipe David Borba Manana 
34749f03740aSFilipe David Borba Manana 	while (n) {
34759f03740aSFilipe David Borba Manana 		entry = rb_entry(n, struct pending_dir_move, node);
34769f03740aSFilipe David Borba Manana 		if (parent_ino < entry->parent_ino)
34779f03740aSFilipe David Borba Manana 			n = n->rb_left;
34789f03740aSFilipe David Borba Manana 		else if (parent_ino > entry->parent_ino)
34799f03740aSFilipe David Borba Manana 			n = n->rb_right;
34809f03740aSFilipe David Borba Manana 		else
34819f03740aSFilipe David Borba Manana 			return entry;
34829f03740aSFilipe David Borba Manana 	}
34839f03740aSFilipe David Borba Manana 	return NULL;
34849f03740aSFilipe David Borba Manana }
34859f03740aSFilipe David Borba Manana 
path_loop(struct send_ctx * sctx,struct fs_path * name,u64 ino,u64 gen,u64 * ancestor_ino)3486801bec36SRobbie Ko static int path_loop(struct send_ctx *sctx, struct fs_path *name,
3487801bec36SRobbie Ko 		     u64 ino, u64 gen, u64 *ancestor_ino)
3488801bec36SRobbie Ko {
3489801bec36SRobbie Ko 	int ret = 0;
3490801bec36SRobbie Ko 	u64 parent_inode = 0;
3491801bec36SRobbie Ko 	u64 parent_gen = 0;
3492801bec36SRobbie Ko 	u64 start_ino = ino;
3493801bec36SRobbie Ko 
3494801bec36SRobbie Ko 	*ancestor_ino = 0;
3495801bec36SRobbie Ko 	while (ino != BTRFS_FIRST_FREE_OBJECTID) {
3496801bec36SRobbie Ko 		fs_path_reset(name);
3497801bec36SRobbie Ko 
34980b3f407eSFilipe Manana 		if (is_waiting_for_rm(sctx, ino, gen))
3499801bec36SRobbie Ko 			break;
3500801bec36SRobbie Ko 		if (is_waiting_for_move(sctx, ino)) {
3501801bec36SRobbie Ko 			if (*ancestor_ino == 0)
3502801bec36SRobbie Ko 				*ancestor_ino = ino;
3503801bec36SRobbie Ko 			ret = get_first_ref(sctx->parent_root, ino,
3504801bec36SRobbie Ko 					    &parent_inode, &parent_gen, name);
3505801bec36SRobbie Ko 		} else {
3506801bec36SRobbie Ko 			ret = __get_cur_name_and_parent(sctx, ino, gen,
3507801bec36SRobbie Ko 							&parent_inode,
3508801bec36SRobbie Ko 							&parent_gen, name);
3509801bec36SRobbie Ko 			if (ret > 0) {
3510801bec36SRobbie Ko 				ret = 0;
3511801bec36SRobbie Ko 				break;
3512801bec36SRobbie Ko 			}
3513801bec36SRobbie Ko 		}
3514801bec36SRobbie Ko 		if (ret < 0)
3515801bec36SRobbie Ko 			break;
3516801bec36SRobbie Ko 		if (parent_inode == start_ino) {
3517801bec36SRobbie Ko 			ret = 1;
3518801bec36SRobbie Ko 			if (*ancestor_ino == 0)
3519801bec36SRobbie Ko 				*ancestor_ino = ino;
3520801bec36SRobbie Ko 			break;
3521801bec36SRobbie Ko 		}
3522801bec36SRobbie Ko 		ino = parent_inode;
3523801bec36SRobbie Ko 		gen = parent_gen;
3524801bec36SRobbie Ko 	}
3525801bec36SRobbie Ko 	return ret;
3526801bec36SRobbie Ko }
3527801bec36SRobbie Ko 
apply_dir_move(struct send_ctx * sctx,struct pending_dir_move * pm)35289f03740aSFilipe David Borba Manana static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
35299f03740aSFilipe David Borba Manana {
35309f03740aSFilipe David Borba Manana 	struct fs_path *from_path = NULL;
35319f03740aSFilipe David Borba Manana 	struct fs_path *to_path = NULL;
35322b863a13SFilipe Manana 	struct fs_path *name = NULL;
35339f03740aSFilipe David Borba Manana 	u64 orig_progress = sctx->send_progress;
35349f03740aSFilipe David Borba Manana 	struct recorded_ref *cur;
35352b863a13SFilipe Manana 	u64 parent_ino, parent_gen;
35369dc44214SFilipe Manana 	struct waiting_dir_move *dm = NULL;
35379dc44214SFilipe Manana 	u64 rmdir_ino = 0;
35380b3f407eSFilipe Manana 	u64 rmdir_gen;
3539801bec36SRobbie Ko 	u64 ancestor;
3540801bec36SRobbie Ko 	bool is_orphan;
35419f03740aSFilipe David Borba Manana 	int ret;
35429f03740aSFilipe David Borba Manana 
35432b863a13SFilipe Manana 	name = fs_path_alloc();
35449f03740aSFilipe David Borba Manana 	from_path = fs_path_alloc();
35452b863a13SFilipe Manana 	if (!name || !from_path) {
35462b863a13SFilipe Manana 		ret = -ENOMEM;
35472b863a13SFilipe Manana 		goto out;
35482b863a13SFilipe Manana 	}
35499f03740aSFilipe David Borba Manana 
35509dc44214SFilipe Manana 	dm = get_waiting_dir_move(sctx, pm->ino);
35519dc44214SFilipe Manana 	ASSERT(dm);
35529dc44214SFilipe Manana 	rmdir_ino = dm->rmdir_ino;
35530b3f407eSFilipe Manana 	rmdir_gen = dm->rmdir_gen;
3554801bec36SRobbie Ko 	is_orphan = dm->orphanized;
35559dc44214SFilipe Manana 	free_waiting_dir_move(sctx, dm);
35562b863a13SFilipe Manana 
3557801bec36SRobbie Ko 	if (is_orphan) {
355884471e24SFilipe Manana 		ret = gen_unique_name(sctx, pm->ino,
355984471e24SFilipe Manana 				      pm->gen, from_path);
356084471e24SFilipe Manana 	} else {
35612b863a13SFilipe Manana 		ret = get_first_ref(sctx->parent_root, pm->ino,
35622b863a13SFilipe Manana 				    &parent_ino, &parent_gen, name);
35632b863a13SFilipe Manana 		if (ret < 0)
35642b863a13SFilipe Manana 			goto out;
3565c992ec94SFilipe Manana 		ret = get_cur_path(sctx, parent_ino, parent_gen,
35662b863a13SFilipe Manana 				   from_path);
35672b863a13SFilipe Manana 		if (ret < 0)
35682b863a13SFilipe Manana 			goto out;
35692b863a13SFilipe Manana 		ret = fs_path_add_path(from_path, name);
357084471e24SFilipe Manana 	}
35712b863a13SFilipe Manana 	if (ret < 0)
35722b863a13SFilipe Manana 		goto out;
35732b863a13SFilipe Manana 
3574f959492fSFilipe Manana 	sctx->send_progress = sctx->cur_ino + 1;
3575801bec36SRobbie Ko 	ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
35767969e77aSFilipe Manana 	if (ret < 0)
35777969e77aSFilipe Manana 		goto out;
3578801bec36SRobbie Ko 	if (ret) {
3579801bec36SRobbie Ko 		LIST_HEAD(deleted_refs);
3580801bec36SRobbie Ko 		ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3581801bec36SRobbie Ko 		ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3582801bec36SRobbie Ko 					   &pm->update_refs, &deleted_refs,
3583801bec36SRobbie Ko 					   is_orphan);
3584801bec36SRobbie Ko 		if (ret < 0)
3585801bec36SRobbie Ko 			goto out;
3586801bec36SRobbie Ko 		if (rmdir_ino) {
3587801bec36SRobbie Ko 			dm = get_waiting_dir_move(sctx, pm->ino);
3588801bec36SRobbie Ko 			ASSERT(dm);
3589801bec36SRobbie Ko 			dm->rmdir_ino = rmdir_ino;
35900b3f407eSFilipe Manana 			dm->rmdir_gen = rmdir_gen;
3591801bec36SRobbie Ko 		}
3592801bec36SRobbie Ko 		goto out;
3593801bec36SRobbie Ko 	}
3594c992ec94SFilipe Manana 	fs_path_reset(name);
3595c992ec94SFilipe Manana 	to_path = name;
35962b863a13SFilipe Manana 	name = NULL;
35979f03740aSFilipe David Borba Manana 	ret = get_cur_path(sctx, pm->ino, pm->gen, to_path);
35989f03740aSFilipe David Borba Manana 	if (ret < 0)
35999f03740aSFilipe David Borba Manana 		goto out;
36009f03740aSFilipe David Borba Manana 
36019f03740aSFilipe David Borba Manana 	ret = send_rename(sctx, from_path, to_path);
36029f03740aSFilipe David Borba Manana 	if (ret < 0)
36039f03740aSFilipe David Borba Manana 		goto out;
36049f03740aSFilipe David Borba Manana 
36059dc44214SFilipe Manana 	if (rmdir_ino) {
36069dc44214SFilipe Manana 		struct orphan_dir_info *odi;
36070f96f517SRobbie Ko 		u64 gen;
36089dc44214SFilipe Manana 
36090b3f407eSFilipe Manana 		odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen);
36109dc44214SFilipe Manana 		if (!odi) {
36119dc44214SFilipe Manana 			/* already deleted */
36129dc44214SFilipe Manana 			goto finish;
36139dc44214SFilipe Manana 		}
36140f96f517SRobbie Ko 		gen = odi->gen;
36150f96f517SRobbie Ko 
361624970ccbSFilipe Manana 		ret = can_rmdir(sctx, rmdir_ino, gen);
36179dc44214SFilipe Manana 		if (ret < 0)
36189dc44214SFilipe Manana 			goto out;
36199dc44214SFilipe Manana 		if (!ret)
36209dc44214SFilipe Manana 			goto finish;
36219dc44214SFilipe Manana 
36229dc44214SFilipe Manana 		name = fs_path_alloc();
36239dc44214SFilipe Manana 		if (!name) {
36249dc44214SFilipe Manana 			ret = -ENOMEM;
36259dc44214SFilipe Manana 			goto out;
36269dc44214SFilipe Manana 		}
36270f96f517SRobbie Ko 		ret = get_cur_path(sctx, rmdir_ino, gen, name);
36289dc44214SFilipe Manana 		if (ret < 0)
36299dc44214SFilipe Manana 			goto out;
36309dc44214SFilipe Manana 		ret = send_rmdir(sctx, name);
36319dc44214SFilipe Manana 		if (ret < 0)
36329dc44214SFilipe Manana 			goto out;
36339dc44214SFilipe Manana 	}
36349dc44214SFilipe Manana 
36359dc44214SFilipe Manana finish:
36363e49363bSFilipe Manana 	ret = cache_dir_utimes(sctx, pm->ino, pm->gen);
36379f03740aSFilipe David Borba Manana 	if (ret < 0)
36389f03740aSFilipe David Borba Manana 		goto out;
36399f03740aSFilipe David Borba Manana 
36409f03740aSFilipe David Borba Manana 	/*
36419f03740aSFilipe David Borba Manana 	 * After rename/move, need to update the utimes of both new parent(s)
36429f03740aSFilipe David Borba Manana 	 * and old parent(s).
36439f03740aSFilipe David Borba Manana 	 */
36449f03740aSFilipe David Borba Manana 	list_for_each_entry(cur, &pm->update_refs, list) {
3645764433a1SRobbie Ko 		/*
3646764433a1SRobbie Ko 		 * The parent inode might have been deleted in the send snapshot
3647764433a1SRobbie Ko 		 */
36487e93f6dcSBingJing Chang 		ret = get_inode_info(sctx->send_root, cur->dir, NULL);
3649764433a1SRobbie Ko 		if (ret == -ENOENT) {
3650764433a1SRobbie Ko 			ret = 0;
36519dc44214SFilipe Manana 			continue;
3652764433a1SRobbie Ko 		}
3653764433a1SRobbie Ko 		if (ret < 0)
3654764433a1SRobbie Ko 			goto out;
3655764433a1SRobbie Ko 
36563e49363bSFilipe Manana 		ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
36579f03740aSFilipe David Borba Manana 		if (ret < 0)
36589f03740aSFilipe David Borba Manana 			goto out;
36599f03740aSFilipe David Borba Manana 	}
36609f03740aSFilipe David Borba Manana 
36619f03740aSFilipe David Borba Manana out:
36622b863a13SFilipe Manana 	fs_path_free(name);
36639f03740aSFilipe David Borba Manana 	fs_path_free(from_path);
36649f03740aSFilipe David Borba Manana 	fs_path_free(to_path);
36659f03740aSFilipe David Borba Manana 	sctx->send_progress = orig_progress;
36669f03740aSFilipe David Borba Manana 
36679f03740aSFilipe David Borba Manana 	return ret;
36689f03740aSFilipe David Borba Manana }
36699f03740aSFilipe David Borba Manana 
free_pending_move(struct send_ctx * sctx,struct pending_dir_move * m)36709f03740aSFilipe David Borba Manana static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
36719f03740aSFilipe David Borba Manana {
36729f03740aSFilipe David Borba Manana 	if (!list_empty(&m->list))
36739f03740aSFilipe David Borba Manana 		list_del(&m->list);
36749f03740aSFilipe David Borba Manana 	if (!RB_EMPTY_NODE(&m->node))
36759f03740aSFilipe David Borba Manana 		rb_erase(&m->node, &sctx->pending_dir_moves);
36769f03740aSFilipe David Borba Manana 	__free_recorded_refs(&m->update_refs);
36779f03740aSFilipe David Borba Manana 	kfree(m);
36789f03740aSFilipe David Borba Manana }
36799f03740aSFilipe David Borba Manana 
tail_append_pending_moves(struct send_ctx * sctx,struct pending_dir_move * moves,struct list_head * stack)3680a4390aeeSRobbie Ko static void tail_append_pending_moves(struct send_ctx *sctx,
3681a4390aeeSRobbie Ko 				      struct pending_dir_move *moves,
36829f03740aSFilipe David Borba Manana 				      struct list_head *stack)
36839f03740aSFilipe David Borba Manana {
36849f03740aSFilipe David Borba Manana 	if (list_empty(&moves->list)) {
36859f03740aSFilipe David Borba Manana 		list_add_tail(&moves->list, stack);
36869f03740aSFilipe David Borba Manana 	} else {
36879f03740aSFilipe David Borba Manana 		LIST_HEAD(list);
36889f03740aSFilipe David Borba Manana 		list_splice_init(&moves->list, &list);
36899f03740aSFilipe David Borba Manana 		list_add_tail(&moves->list, stack);
36909f03740aSFilipe David Borba Manana 		list_splice_tail(&list, stack);
36919f03740aSFilipe David Borba Manana 	}
3692a4390aeeSRobbie Ko 	if (!RB_EMPTY_NODE(&moves->node)) {
3693a4390aeeSRobbie Ko 		rb_erase(&moves->node, &sctx->pending_dir_moves);
3694a4390aeeSRobbie Ko 		RB_CLEAR_NODE(&moves->node);
3695a4390aeeSRobbie Ko 	}
36969f03740aSFilipe David Borba Manana }
36979f03740aSFilipe David Borba Manana 
apply_children_dir_moves(struct send_ctx * sctx)36989f03740aSFilipe David Borba Manana static int apply_children_dir_moves(struct send_ctx *sctx)
36999f03740aSFilipe David Borba Manana {
37009f03740aSFilipe David Borba Manana 	struct pending_dir_move *pm;
370184af994bSRuan Jinjie 	LIST_HEAD(stack);
37029f03740aSFilipe David Borba Manana 	u64 parent_ino = sctx->cur_ino;
37039f03740aSFilipe David Borba Manana 	int ret = 0;
37049f03740aSFilipe David Borba Manana 
37059f03740aSFilipe David Borba Manana 	pm = get_pending_dir_moves(sctx, parent_ino);
37069f03740aSFilipe David Borba Manana 	if (!pm)
37079f03740aSFilipe David Borba Manana 		return 0;
37089f03740aSFilipe David Borba Manana 
3709a4390aeeSRobbie Ko 	tail_append_pending_moves(sctx, pm, &stack);
37109f03740aSFilipe David Borba Manana 
37119f03740aSFilipe David Borba Manana 	while (!list_empty(&stack)) {
37129f03740aSFilipe David Borba Manana 		pm = list_first_entry(&stack, struct pending_dir_move, list);
37139f03740aSFilipe David Borba Manana 		parent_ino = pm->ino;
37149f03740aSFilipe David Borba Manana 		ret = apply_dir_move(sctx, pm);
37159f03740aSFilipe David Borba Manana 		free_pending_move(sctx, pm);
37169f03740aSFilipe David Borba Manana 		if (ret)
37179f03740aSFilipe David Borba Manana 			goto out;
37189f03740aSFilipe David Borba Manana 		pm = get_pending_dir_moves(sctx, parent_ino);
37199f03740aSFilipe David Borba Manana 		if (pm)
3720a4390aeeSRobbie Ko 			tail_append_pending_moves(sctx, pm, &stack);
37219f03740aSFilipe David Borba Manana 	}
37229f03740aSFilipe David Borba Manana 	return 0;
37239f03740aSFilipe David Borba Manana 
37249f03740aSFilipe David Borba Manana out:
37259f03740aSFilipe David Borba Manana 	while (!list_empty(&stack)) {
37269f03740aSFilipe David Borba Manana 		pm = list_first_entry(&stack, struct pending_dir_move, list);
37279f03740aSFilipe David Borba Manana 		free_pending_move(sctx, pm);
37289f03740aSFilipe David Borba Manana 	}
37299f03740aSFilipe David Borba Manana 	return ret;
37309f03740aSFilipe David Borba Manana }
37319f03740aSFilipe David Borba Manana 
373284471e24SFilipe Manana /*
373384471e24SFilipe Manana  * We might need to delay a directory rename even when no ancestor directory
373484471e24SFilipe Manana  * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
373584471e24SFilipe Manana  * renamed. This happens when we rename a directory to the old name (the name
373684471e24SFilipe Manana  * in the parent root) of some other unrelated directory that got its rename
373784471e24SFilipe Manana  * delayed due to some ancestor with higher number that got renamed.
373884471e24SFilipe Manana  *
373984471e24SFilipe Manana  * Example:
374084471e24SFilipe Manana  *
374184471e24SFilipe Manana  * Parent snapshot:
374284471e24SFilipe Manana  * .                                       (ino 256)
374384471e24SFilipe Manana  * |---- a/                                (ino 257)
374484471e24SFilipe Manana  * |     |---- file                        (ino 260)
374584471e24SFilipe Manana  * |
374684471e24SFilipe Manana  * |---- b/                                (ino 258)
374784471e24SFilipe Manana  * |---- c/                                (ino 259)
374884471e24SFilipe Manana  *
374984471e24SFilipe Manana  * Send snapshot:
375084471e24SFilipe Manana  * .                                       (ino 256)
375184471e24SFilipe Manana  * |---- a/                                (ino 258)
375284471e24SFilipe Manana  * |---- x/                                (ino 259)
375384471e24SFilipe Manana  *       |---- y/                          (ino 257)
375484471e24SFilipe Manana  *             |----- file                 (ino 260)
375584471e24SFilipe Manana  *
375684471e24SFilipe Manana  * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
375784471e24SFilipe Manana  * from 'a' to 'x/y' happening first, which in turn depends on the rename of
375884471e24SFilipe Manana  * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
375984471e24SFilipe Manana  * must issue is:
376084471e24SFilipe Manana  *
376184471e24SFilipe Manana  * 1 - rename 259 from 'c' to 'x'
376284471e24SFilipe Manana  * 2 - rename 257 from 'a' to 'x/y'
376384471e24SFilipe Manana  * 3 - rename 258 from 'b' to 'a'
376484471e24SFilipe Manana  *
376584471e24SFilipe Manana  * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
376684471e24SFilipe Manana  * be done right away and < 0 on error.
376784471e24SFilipe Manana  */
wait_for_dest_dir_move(struct send_ctx * sctx,struct recorded_ref * parent_ref,const bool is_orphan)376884471e24SFilipe Manana static int wait_for_dest_dir_move(struct send_ctx *sctx,
376984471e24SFilipe Manana 				  struct recorded_ref *parent_ref,
377084471e24SFilipe Manana 				  const bool is_orphan)
377184471e24SFilipe Manana {
37722ff7e61eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
377384471e24SFilipe Manana 	struct btrfs_path *path;
377484471e24SFilipe Manana 	struct btrfs_key key;
377584471e24SFilipe Manana 	struct btrfs_key di_key;
377684471e24SFilipe Manana 	struct btrfs_dir_item *di;
377784471e24SFilipe Manana 	u64 left_gen;
377884471e24SFilipe Manana 	u64 right_gen;
377984471e24SFilipe Manana 	int ret = 0;
3780801bec36SRobbie Ko 	struct waiting_dir_move *wdm;
378184471e24SFilipe Manana 
378284471e24SFilipe Manana 	if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
378384471e24SFilipe Manana 		return 0;
378484471e24SFilipe Manana 
378584471e24SFilipe Manana 	path = alloc_path_for_send();
378684471e24SFilipe Manana 	if (!path)
378784471e24SFilipe Manana 		return -ENOMEM;
378884471e24SFilipe Manana 
378984471e24SFilipe Manana 	key.objectid = parent_ref->dir;
379084471e24SFilipe Manana 	key.type = BTRFS_DIR_ITEM_KEY;
379184471e24SFilipe Manana 	key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
379284471e24SFilipe Manana 
379384471e24SFilipe Manana 	ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
379484471e24SFilipe Manana 	if (ret < 0) {
379584471e24SFilipe Manana 		goto out;
379684471e24SFilipe Manana 	} else if (ret > 0) {
379784471e24SFilipe Manana 		ret = 0;
379884471e24SFilipe Manana 		goto out;
379984471e24SFilipe Manana 	}
380084471e24SFilipe Manana 
38012ff7e61eSJeff Mahoney 	di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
38022ff7e61eSJeff Mahoney 				       parent_ref->name_len);
380384471e24SFilipe Manana 	if (!di) {
380484471e24SFilipe Manana 		ret = 0;
380584471e24SFilipe Manana 		goto out;
380684471e24SFilipe Manana 	}
380784471e24SFilipe Manana 	/*
380884471e24SFilipe Manana 	 * di_key.objectid has the number of the inode that has a dentry in the
380984471e24SFilipe Manana 	 * parent directory with the same name that sctx->cur_ino is being
381084471e24SFilipe Manana 	 * renamed to. We need to check if that inode is in the send root as
381184471e24SFilipe Manana 	 * well and if it is currently marked as an inode with a pending rename,
381284471e24SFilipe Manana 	 * if it is, we need to delay the rename of sctx->cur_ino as well, so
381384471e24SFilipe Manana 	 * that it happens after that other inode is renamed.
381484471e24SFilipe Manana 	 */
381584471e24SFilipe Manana 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
381684471e24SFilipe Manana 	if (di_key.type != BTRFS_INODE_ITEM_KEY) {
381784471e24SFilipe Manana 		ret = 0;
381884471e24SFilipe Manana 		goto out;
381984471e24SFilipe Manana 	}
382084471e24SFilipe Manana 
38217e93f6dcSBingJing Chang 	ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
382284471e24SFilipe Manana 	if (ret < 0)
382384471e24SFilipe Manana 		goto out;
38247e93f6dcSBingJing Chang 	ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
382584471e24SFilipe Manana 	if (ret < 0) {
382684471e24SFilipe Manana 		if (ret == -ENOENT)
382784471e24SFilipe Manana 			ret = 0;
382884471e24SFilipe Manana 		goto out;
382984471e24SFilipe Manana 	}
383084471e24SFilipe Manana 
383184471e24SFilipe Manana 	/* Different inode, no need to delay the rename of sctx->cur_ino */
383284471e24SFilipe Manana 	if (right_gen != left_gen) {
383384471e24SFilipe Manana 		ret = 0;
383484471e24SFilipe Manana 		goto out;
383584471e24SFilipe Manana 	}
383684471e24SFilipe Manana 
3837801bec36SRobbie Ko 	wdm = get_waiting_dir_move(sctx, di_key.objectid);
3838801bec36SRobbie Ko 	if (wdm && !wdm->orphanized) {
383984471e24SFilipe Manana 		ret = add_pending_dir_move(sctx,
384084471e24SFilipe Manana 					   sctx->cur_ino,
384184471e24SFilipe Manana 					   sctx->cur_inode_gen,
384284471e24SFilipe Manana 					   di_key.objectid,
384384471e24SFilipe Manana 					   &sctx->new_refs,
384484471e24SFilipe Manana 					   &sctx->deleted_refs,
384584471e24SFilipe Manana 					   is_orphan);
384684471e24SFilipe Manana 		if (!ret)
384784471e24SFilipe Manana 			ret = 1;
384884471e24SFilipe Manana 	}
384984471e24SFilipe Manana out:
385084471e24SFilipe Manana 	btrfs_free_path(path);
385184471e24SFilipe Manana 	return ret;
385284471e24SFilipe Manana }
385384471e24SFilipe Manana 
385480aa6027SFilipe Manana /*
3855ea37d599SFilipe Manana  * Check if inode ino2, or any of its ancestors, is inode ino1.
3856ea37d599SFilipe Manana  * Return 1 if true, 0 if false and < 0 on error.
3857ea37d599SFilipe Manana  */
check_ino_in_path(struct btrfs_root * root,const u64 ino1,const u64 ino1_gen,const u64 ino2,const u64 ino2_gen,struct fs_path * fs_path)3858ea37d599SFilipe Manana static int check_ino_in_path(struct btrfs_root *root,
3859ea37d599SFilipe Manana 			     const u64 ino1,
3860ea37d599SFilipe Manana 			     const u64 ino1_gen,
3861ea37d599SFilipe Manana 			     const u64 ino2,
3862ea37d599SFilipe Manana 			     const u64 ino2_gen,
3863ea37d599SFilipe Manana 			     struct fs_path *fs_path)
3864ea37d599SFilipe Manana {
3865ea37d599SFilipe Manana 	u64 ino = ino2;
3866ea37d599SFilipe Manana 
3867ea37d599SFilipe Manana 	if (ino1 == ino2)
3868ea37d599SFilipe Manana 		return ino1_gen == ino2_gen;
3869ea37d599SFilipe Manana 
3870ea37d599SFilipe Manana 	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
3871ea37d599SFilipe Manana 		u64 parent;
3872ea37d599SFilipe Manana 		u64 parent_gen;
3873ea37d599SFilipe Manana 		int ret;
3874ea37d599SFilipe Manana 
3875ea37d599SFilipe Manana 		fs_path_reset(fs_path);
3876ea37d599SFilipe Manana 		ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
3877ea37d599SFilipe Manana 		if (ret < 0)
3878ea37d599SFilipe Manana 			return ret;
3879ea37d599SFilipe Manana 		if (parent == ino1)
3880ea37d599SFilipe Manana 			return parent_gen == ino1_gen;
3881ea37d599SFilipe Manana 		ino = parent;
3882ea37d599SFilipe Manana 	}
3883ea37d599SFilipe Manana 	return 0;
3884ea37d599SFilipe Manana }
3885ea37d599SFilipe Manana 
3886ea37d599SFilipe Manana /*
388735a68080SGabriel Niebler  * Check if inode ino1 is an ancestor of inode ino2 in the given root for any
3888ea37d599SFilipe Manana  * possible path (in case ino2 is not a directory and has multiple hard links).
388980aa6027SFilipe Manana  * Return 1 if true, 0 if false and < 0 on error.
389080aa6027SFilipe Manana  */
is_ancestor(struct btrfs_root * root,const u64 ino1,const u64 ino1_gen,const u64 ino2,struct fs_path * fs_path)389180aa6027SFilipe Manana static int is_ancestor(struct btrfs_root *root,
389280aa6027SFilipe Manana 		       const u64 ino1,
389380aa6027SFilipe Manana 		       const u64 ino1_gen,
389480aa6027SFilipe Manana 		       const u64 ino2,
389580aa6027SFilipe Manana 		       struct fs_path *fs_path)
389680aa6027SFilipe Manana {
3897ea37d599SFilipe Manana 	bool free_fs_path = false;
389872c3668fSFilipe Manana 	int ret = 0;
389935a68080SGabriel Niebler 	int iter_ret = 0;
3900ea37d599SFilipe Manana 	struct btrfs_path *path = NULL;
3901ea37d599SFilipe Manana 	struct btrfs_key key;
390272c3668fSFilipe Manana 
390372c3668fSFilipe Manana 	if (!fs_path) {
390472c3668fSFilipe Manana 		fs_path = fs_path_alloc();
390572c3668fSFilipe Manana 		if (!fs_path)
390672c3668fSFilipe Manana 			return -ENOMEM;
3907ea37d599SFilipe Manana 		free_fs_path = true;
390872c3668fSFilipe Manana 	}
390980aa6027SFilipe Manana 
3910ea37d599SFilipe Manana 	path = alloc_path_for_send();
3911ea37d599SFilipe Manana 	if (!path) {
3912ea37d599SFilipe Manana 		ret = -ENOMEM;
3913ea37d599SFilipe Manana 		goto out;
3914ea37d599SFilipe Manana 	}
3915ea37d599SFilipe Manana 
3916ea37d599SFilipe Manana 	key.objectid = ino2;
3917ea37d599SFilipe Manana 	key.type = BTRFS_INODE_REF_KEY;
3918ea37d599SFilipe Manana 	key.offset = 0;
3919ea37d599SFilipe Manana 
392035a68080SGabriel Niebler 	btrfs_for_each_slot(root, &key, &key, path, iter_ret) {
3921ea37d599SFilipe Manana 		struct extent_buffer *leaf = path->nodes[0];
3922ea37d599SFilipe Manana 		int slot = path->slots[0];
3923ea37d599SFilipe Manana 		u32 cur_offset = 0;
3924ea37d599SFilipe Manana 		u32 item_size;
3925ea37d599SFilipe Manana 
3926ea37d599SFilipe Manana 		if (key.objectid != ino2)
3927ea37d599SFilipe Manana 			break;
3928ea37d599SFilipe Manana 		if (key.type != BTRFS_INODE_REF_KEY &&
3929ea37d599SFilipe Manana 		    key.type != BTRFS_INODE_EXTREF_KEY)
3930ea37d599SFilipe Manana 			break;
3931ea37d599SFilipe Manana 
39323212fa14SJosef Bacik 		item_size = btrfs_item_size(leaf, slot);
3933ea37d599SFilipe Manana 		while (cur_offset < item_size) {
393480aa6027SFilipe Manana 			u64 parent;
393580aa6027SFilipe Manana 			u64 parent_gen;
393680aa6027SFilipe Manana 
3937ea37d599SFilipe Manana 			if (key.type == BTRFS_INODE_EXTREF_KEY) {
3938ea37d599SFilipe Manana 				unsigned long ptr;
3939ea37d599SFilipe Manana 				struct btrfs_inode_extref *extref;
3940ea37d599SFilipe Manana 
3941ea37d599SFilipe Manana 				ptr = btrfs_item_ptr_offset(leaf, slot);
3942ea37d599SFilipe Manana 				extref = (struct btrfs_inode_extref *)
3943ea37d599SFilipe Manana 					(ptr + cur_offset);
3944ea37d599SFilipe Manana 				parent = btrfs_inode_extref_parent(leaf,
3945ea37d599SFilipe Manana 								   extref);
3946ea37d599SFilipe Manana 				cur_offset += sizeof(*extref);
3947ea37d599SFilipe Manana 				cur_offset += btrfs_inode_extref_name_len(leaf,
3948ea37d599SFilipe Manana 								  extref);
3949ea37d599SFilipe Manana 			} else {
3950ea37d599SFilipe Manana 				parent = key.offset;
3951ea37d599SFilipe Manana 				cur_offset = item_size;
3952ea37d599SFilipe Manana 			}
3953ea37d599SFilipe Manana 
39547e93f6dcSBingJing Chang 			ret = get_inode_gen(root, parent, &parent_gen);
3955ea37d599SFilipe Manana 			if (ret < 0)
3956ea37d599SFilipe Manana 				goto out;
3957ea37d599SFilipe Manana 			ret = check_ino_in_path(root, ino1, ino1_gen,
3958ea37d599SFilipe Manana 						parent, parent_gen, fs_path);
3959ea37d599SFilipe Manana 			if (ret)
3960ea37d599SFilipe Manana 				goto out;
3961ea37d599SFilipe Manana 		}
3962ea37d599SFilipe Manana 	}
396380aa6027SFilipe Manana 	ret = 0;
396435a68080SGabriel Niebler 	if (iter_ret < 0)
396535a68080SGabriel Niebler 		ret = iter_ret;
396635a68080SGabriel Niebler 
396772c3668fSFilipe Manana out:
3968ea37d599SFilipe Manana 	btrfs_free_path(path);
3969ea37d599SFilipe Manana 	if (free_fs_path)
397072c3668fSFilipe Manana 		fs_path_free(fs_path);
397172c3668fSFilipe Manana 	return ret;
397280aa6027SFilipe Manana }
397380aa6027SFilipe Manana 
wait_for_parent_move(struct send_ctx * sctx,struct recorded_ref * parent_ref,const bool is_orphan)39749f03740aSFilipe David Borba Manana static int wait_for_parent_move(struct send_ctx *sctx,
39758b191a68SFilipe Manana 				struct recorded_ref *parent_ref,
39768b191a68SFilipe Manana 				const bool is_orphan)
39779f03740aSFilipe David Borba Manana {
3978f959492fSFilipe Manana 	int ret = 0;
39799f03740aSFilipe David Borba Manana 	u64 ino = parent_ref->dir;
3980fe9c798dSFilipe Manana 	u64 ino_gen = parent_ref->dir_gen;
39819f03740aSFilipe David Borba Manana 	u64 parent_ino_before, parent_ino_after;
39829f03740aSFilipe David Borba Manana 	struct fs_path *path_before = NULL;
39839f03740aSFilipe David Borba Manana 	struct fs_path *path_after = NULL;
39849f03740aSFilipe David Borba Manana 	int len1, len2;
39859f03740aSFilipe David Borba Manana 
39869f03740aSFilipe David Borba Manana 	path_after = fs_path_alloc();
3987f959492fSFilipe Manana 	path_before = fs_path_alloc();
3988f959492fSFilipe Manana 	if (!path_after || !path_before) {
39899f03740aSFilipe David Borba Manana 		ret = -ENOMEM;
39909f03740aSFilipe David Borba Manana 		goto out;
39919f03740aSFilipe David Borba Manana 	}
39929f03740aSFilipe David Borba Manana 
3993bfa7e1f8SFilipe Manana 	/*
3994f959492fSFilipe Manana 	 * Our current directory inode may not yet be renamed/moved because some
3995f959492fSFilipe Manana 	 * ancestor (immediate or not) has to be renamed/moved first. So find if
3996f959492fSFilipe Manana 	 * such ancestor exists and make sure our own rename/move happens after
399780aa6027SFilipe Manana 	 * that ancestor is processed to avoid path build infinite loops (done
399880aa6027SFilipe Manana 	 * at get_cur_path()).
3999bfa7e1f8SFilipe Manana 	 */
4000f959492fSFilipe Manana 	while (ino > BTRFS_FIRST_FREE_OBJECTID) {
4001fe9c798dSFilipe Manana 		u64 parent_ino_after_gen;
4002fe9c798dSFilipe Manana 
4003f959492fSFilipe Manana 		if (is_waiting_for_move(sctx, ino)) {
400480aa6027SFilipe Manana 			/*
400580aa6027SFilipe Manana 			 * If the current inode is an ancestor of ino in the
400680aa6027SFilipe Manana 			 * parent root, we need to delay the rename of the
400780aa6027SFilipe Manana 			 * current inode, otherwise don't delayed the rename
400880aa6027SFilipe Manana 			 * because we can end up with a circular dependency
400980aa6027SFilipe Manana 			 * of renames, resulting in some directories never
401080aa6027SFilipe Manana 			 * getting the respective rename operations issued in
401180aa6027SFilipe Manana 			 * the send stream or getting into infinite path build
401280aa6027SFilipe Manana 			 * loops.
401380aa6027SFilipe Manana 			 */
401480aa6027SFilipe Manana 			ret = is_ancestor(sctx->parent_root,
401580aa6027SFilipe Manana 					  sctx->cur_ino, sctx->cur_inode_gen,
401680aa6027SFilipe Manana 					  ino, path_before);
40174122ea64SFilipe Manana 			if (ret)
4018f959492fSFilipe Manana 				break;
4019f959492fSFilipe Manana 		}
4020bfa7e1f8SFilipe Manana 
4021bfa7e1f8SFilipe Manana 		fs_path_reset(path_before);
4022bfa7e1f8SFilipe Manana 		fs_path_reset(path_after);
4023bfa7e1f8SFilipe Manana 
4024bfa7e1f8SFilipe Manana 		ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
4025fe9c798dSFilipe Manana 				    &parent_ino_after_gen, path_after);
4026bfa7e1f8SFilipe Manana 		if (ret < 0)
4027bfa7e1f8SFilipe Manana 			goto out;
4028bfa7e1f8SFilipe Manana 		ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
4029bfa7e1f8SFilipe Manana 				    NULL, path_before);
4030f959492fSFilipe Manana 		if (ret < 0 && ret != -ENOENT) {
4031bfa7e1f8SFilipe Manana 			goto out;
4032f959492fSFilipe Manana 		} else if (ret == -ENOENT) {
4033bf8e8ca6SFilipe Manana 			ret = 0;
4034f959492fSFilipe Manana 			break;
4035bfa7e1f8SFilipe Manana 		}
4036bfa7e1f8SFilipe Manana 
4037bfa7e1f8SFilipe Manana 		len1 = fs_path_len(path_before);
4038bfa7e1f8SFilipe Manana 		len2 = fs_path_len(path_after);
4039f959492fSFilipe Manana 		if (ino > sctx->cur_ino &&
4040f959492fSFilipe Manana 		    (parent_ino_before != parent_ino_after || len1 != len2 ||
4041f959492fSFilipe Manana 		     memcmp(path_before->start, path_after->start, len1))) {
4042fe9c798dSFilipe Manana 			u64 parent_ino_gen;
4043fe9c798dSFilipe Manana 
40447e93f6dcSBingJing Chang 			ret = get_inode_gen(sctx->parent_root, ino, &parent_ino_gen);
4045fe9c798dSFilipe Manana 			if (ret < 0)
4046fe9c798dSFilipe Manana 				goto out;
4047fe9c798dSFilipe Manana 			if (ino_gen == parent_ino_gen) {
4048bfa7e1f8SFilipe Manana 				ret = 1;
4049bfa7e1f8SFilipe Manana 				break;
4050bfa7e1f8SFilipe Manana 			}
4051fe9c798dSFilipe Manana 		}
4052bfa7e1f8SFilipe Manana 		ino = parent_ino_after;
4053fe9c798dSFilipe Manana 		ino_gen = parent_ino_after_gen;
4054bfa7e1f8SFilipe Manana 	}
4055bfa7e1f8SFilipe Manana 
40569f03740aSFilipe David Borba Manana out:
40579f03740aSFilipe David Borba Manana 	fs_path_free(path_before);
40589f03740aSFilipe David Borba Manana 	fs_path_free(path_after);
40599f03740aSFilipe David Borba Manana 
4060f959492fSFilipe Manana 	if (ret == 1) {
4061f959492fSFilipe Manana 		ret = add_pending_dir_move(sctx,
4062f959492fSFilipe Manana 					   sctx->cur_ino,
4063f959492fSFilipe Manana 					   sctx->cur_inode_gen,
4064f959492fSFilipe Manana 					   ino,
4065f959492fSFilipe Manana 					   &sctx->new_refs,
406684471e24SFilipe Manana 					   &sctx->deleted_refs,
40678b191a68SFilipe Manana 					   is_orphan);
4068f959492fSFilipe Manana 		if (!ret)
4069f959492fSFilipe Manana 			ret = 1;
4070f959492fSFilipe Manana 	}
4071f959492fSFilipe Manana 
40729f03740aSFilipe David Borba Manana 	return ret;
40739f03740aSFilipe David Borba Manana }
40749f03740aSFilipe David Borba Manana 
update_ref_path(struct send_ctx * sctx,struct recorded_ref * ref)4075f5962781SFilipe Manana static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
4076f5962781SFilipe Manana {
4077f5962781SFilipe Manana 	int ret;
4078f5962781SFilipe Manana 	struct fs_path *new_path;
4079f5962781SFilipe Manana 
4080f5962781SFilipe Manana 	/*
4081f5962781SFilipe Manana 	 * Our reference's name member points to its full_path member string, so
4082f5962781SFilipe Manana 	 * we use here a new path.
4083f5962781SFilipe Manana 	 */
4084f5962781SFilipe Manana 	new_path = fs_path_alloc();
4085f5962781SFilipe Manana 	if (!new_path)
4086f5962781SFilipe Manana 		return -ENOMEM;
4087f5962781SFilipe Manana 
4088f5962781SFilipe Manana 	ret = get_cur_path(sctx, ref->dir, ref->dir_gen, new_path);
4089f5962781SFilipe Manana 	if (ret < 0) {
4090f5962781SFilipe Manana 		fs_path_free(new_path);
4091f5962781SFilipe Manana 		return ret;
4092f5962781SFilipe Manana 	}
4093f5962781SFilipe Manana 	ret = fs_path_add(new_path, ref->name, ref->name_len);
4094f5962781SFilipe Manana 	if (ret < 0) {
4095f5962781SFilipe Manana 		fs_path_free(new_path);
4096f5962781SFilipe Manana 		return ret;
4097f5962781SFilipe Manana 	}
4098f5962781SFilipe Manana 
4099f5962781SFilipe Manana 	fs_path_free(ref->full_path);
4100f5962781SFilipe Manana 	set_ref_path(ref, new_path);
4101f5962781SFilipe Manana 
4102f5962781SFilipe Manana 	return 0;
4103f5962781SFilipe Manana }
4104f5962781SFilipe Manana 
410531db9f7cSAlexander Block /*
41069c2b4e03SFilipe Manana  * When processing the new references for an inode we may orphanize an existing
41079c2b4e03SFilipe Manana  * directory inode because its old name conflicts with one of the new references
41089c2b4e03SFilipe Manana  * of the current inode. Later, when processing another new reference of our
41099c2b4e03SFilipe Manana  * inode, we might need to orphanize another inode, but the path we have in the
41109c2b4e03SFilipe Manana  * reference reflects the pre-orphanization name of the directory we previously
41119c2b4e03SFilipe Manana  * orphanized. For example:
41129c2b4e03SFilipe Manana  *
41139c2b4e03SFilipe Manana  * parent snapshot looks like:
41149c2b4e03SFilipe Manana  *
41159c2b4e03SFilipe Manana  * .                                     (ino 256)
41169c2b4e03SFilipe Manana  * |----- f1                             (ino 257)
41179c2b4e03SFilipe Manana  * |----- f2                             (ino 258)
41189c2b4e03SFilipe Manana  * |----- d1/                            (ino 259)
41199c2b4e03SFilipe Manana  *        |----- d2/                     (ino 260)
41209c2b4e03SFilipe Manana  *
41219c2b4e03SFilipe Manana  * send snapshot looks like:
41229c2b4e03SFilipe Manana  *
41239c2b4e03SFilipe Manana  * .                                     (ino 256)
41249c2b4e03SFilipe Manana  * |----- d1                             (ino 258)
41259c2b4e03SFilipe Manana  * |----- f2/                            (ino 259)
41269c2b4e03SFilipe Manana  *        |----- f2_link/                (ino 260)
41279c2b4e03SFilipe Manana  *        |       |----- f1              (ino 257)
41289c2b4e03SFilipe Manana  *        |
41299c2b4e03SFilipe Manana  *        |----- d2                      (ino 258)
41309c2b4e03SFilipe Manana  *
41319c2b4e03SFilipe Manana  * When processing inode 257 we compute the name for inode 259 as "d1", and we
41329c2b4e03SFilipe Manana  * cache it in the name cache. Later when we start processing inode 258, when
41339c2b4e03SFilipe Manana  * collecting all its new references we set a full path of "d1/d2" for its new
41349c2b4e03SFilipe Manana  * reference with name "d2". When we start processing the new references we
41359c2b4e03SFilipe Manana  * start by processing the new reference with name "d1", and this results in
41369c2b4e03SFilipe Manana  * orphanizing inode 259, since its old reference causes a conflict. Then we
41379c2b4e03SFilipe Manana  * move on the next new reference, with name "d2", and we find out we must
41389c2b4e03SFilipe Manana  * orphanize inode 260, as its old reference conflicts with ours - but for the
41399c2b4e03SFilipe Manana  * orphanization we use a source path corresponding to the path we stored in the
41409c2b4e03SFilipe Manana  * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the
41419c2b4e03SFilipe Manana  * receiver fail since the path component "d1/" no longer exists, it was renamed
41429c2b4e03SFilipe Manana  * to "o259-6-0/" when processing the previous new reference. So in this case we
41439c2b4e03SFilipe Manana  * must recompute the path in the new reference and use it for the new
41449c2b4e03SFilipe Manana  * orphanization operation.
41459c2b4e03SFilipe Manana  */
refresh_ref_path(struct send_ctx * sctx,struct recorded_ref * ref)41469c2b4e03SFilipe Manana static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
41479c2b4e03SFilipe Manana {
41489c2b4e03SFilipe Manana 	char *name;
41499c2b4e03SFilipe Manana 	int ret;
41509c2b4e03SFilipe Manana 
41519c2b4e03SFilipe Manana 	name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
41529c2b4e03SFilipe Manana 	if (!name)
41539c2b4e03SFilipe Manana 		return -ENOMEM;
41549c2b4e03SFilipe Manana 
41559c2b4e03SFilipe Manana 	fs_path_reset(ref->full_path);
41569c2b4e03SFilipe Manana 	ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
41579c2b4e03SFilipe Manana 	if (ret < 0)
41589c2b4e03SFilipe Manana 		goto out;
41599c2b4e03SFilipe Manana 
41609c2b4e03SFilipe Manana 	ret = fs_path_add(ref->full_path, name, ref->name_len);
41619c2b4e03SFilipe Manana 	if (ret < 0)
41629c2b4e03SFilipe Manana 		goto out;
41639c2b4e03SFilipe Manana 
41649c2b4e03SFilipe Manana 	/* Update the reference's base name pointer. */
41659c2b4e03SFilipe Manana 	set_ref_path(ref, ref->full_path);
41669c2b4e03SFilipe Manana out:
41679c2b4e03SFilipe Manana 	kfree(name);
41689c2b4e03SFilipe Manana 	return ret;
41699c2b4e03SFilipe Manana }
41709c2b4e03SFilipe Manana 
41719c2b4e03SFilipe Manana /*
417231db9f7cSAlexander Block  * This does all the move/link/unlink/rmdir magic.
417331db9f7cSAlexander Block  */
process_recorded_refs(struct send_ctx * sctx,int * pending_move)41749f03740aSFilipe David Borba Manana static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
417531db9f7cSAlexander Block {
417604ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
417731db9f7cSAlexander Block 	int ret = 0;
417831db9f7cSAlexander Block 	struct recorded_ref *cur;
41791f4692daSAlexander Block 	struct recorded_ref *cur2;
418084af994bSRuan Jinjie 	LIST_HEAD(check_dirs);
418131db9f7cSAlexander Block 	struct fs_path *valid_path = NULL;
4182b24baf69SChris Mason 	u64 ow_inode = 0;
418331db9f7cSAlexander Block 	u64 ow_gen;
4184f5962781SFilipe Manana 	u64 ow_mode;
418531db9f7cSAlexander Block 	int did_overwrite = 0;
418631db9f7cSAlexander Block 	int is_orphan = 0;
418729d6d30fSFilipe Manana 	u64 last_dir_ino_rm = 0;
418884471e24SFilipe Manana 	bool can_rename = true;
4189f5962781SFilipe Manana 	bool orphanized_dir = false;
4190fdb13889SFilipe Manana 	bool orphanized_ancestor = false;
419131db9f7cSAlexander Block 
419204ab956eSJeff Mahoney 	btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino);
419331db9f7cSAlexander Block 
41946d85ed05SAlexander Block 	/*
41956d85ed05SAlexander Block 	 * This should never happen as the root dir always has the same ref
41966d85ed05SAlexander Block 	 * which is always '..'
41976d85ed05SAlexander Block 	 */
419848256173SDavid Sterba 	if (unlikely(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID)) {
419948256173SDavid Sterba 		btrfs_err(fs_info,
420048256173SDavid Sterba 			  "send: unexpected inode %llu in process_recorded_refs()",
420148256173SDavid Sterba 			  sctx->cur_ino);
420248256173SDavid Sterba 		ret = -EINVAL;
420348256173SDavid Sterba 		goto out;
420448256173SDavid Sterba 	}
42056d85ed05SAlexander Block 
4206924794c9STsutomu Itoh 	valid_path = fs_path_alloc();
420731db9f7cSAlexander Block 	if (!valid_path) {
420831db9f7cSAlexander Block 		ret = -ENOMEM;
420931db9f7cSAlexander Block 		goto out;
421031db9f7cSAlexander Block 	}
421131db9f7cSAlexander Block 
421231db9f7cSAlexander Block 	/*
421331db9f7cSAlexander Block 	 * First, check if the first ref of the current inode was overwritten
421431db9f7cSAlexander Block 	 * before. If yes, we know that the current inode was already orphanized
421531db9f7cSAlexander Block 	 * and thus use the orphan name. If not, we can use get_cur_path to
421631db9f7cSAlexander Block 	 * get the path of the first ref as it would like while receiving at
421731db9f7cSAlexander Block 	 * this point in time.
421831db9f7cSAlexander Block 	 * New inodes are always orphan at the beginning, so force to use the
421931db9f7cSAlexander Block 	 * orphan name in this case.
422031db9f7cSAlexander Block 	 * The first ref is stored in valid_path and will be updated if it
422131db9f7cSAlexander Block 	 * gets moved around.
422231db9f7cSAlexander Block 	 */
422331db9f7cSAlexander Block 	if (!sctx->cur_inode_new) {
422431db9f7cSAlexander Block 		ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
422531db9f7cSAlexander Block 				sctx->cur_inode_gen);
422631db9f7cSAlexander Block 		if (ret < 0)
422731db9f7cSAlexander Block 			goto out;
422831db9f7cSAlexander Block 		if (ret)
422931db9f7cSAlexander Block 			did_overwrite = 1;
423031db9f7cSAlexander Block 	}
423131db9f7cSAlexander Block 	if (sctx->cur_inode_new || did_overwrite) {
423231db9f7cSAlexander Block 		ret = gen_unique_name(sctx, sctx->cur_ino,
423331db9f7cSAlexander Block 				sctx->cur_inode_gen, valid_path);
423431db9f7cSAlexander Block 		if (ret < 0)
423531db9f7cSAlexander Block 			goto out;
423631db9f7cSAlexander Block 		is_orphan = 1;
423731db9f7cSAlexander Block 	} else {
423831db9f7cSAlexander Block 		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
423931db9f7cSAlexander Block 				valid_path);
424031db9f7cSAlexander Block 		if (ret < 0)
424131db9f7cSAlexander Block 			goto out;
424231db9f7cSAlexander Block 	}
424331db9f7cSAlexander Block 
424431db9f7cSAlexander Block 	/*
424598272bb7SFilipe Manana 	 * Before doing any rename and link operations, do a first pass on the
424698272bb7SFilipe Manana 	 * new references to orphanize any unprocessed inodes that may have a
424798272bb7SFilipe Manana 	 * reference that conflicts with one of the new references of the current
424898272bb7SFilipe Manana 	 * inode. This needs to happen first because a new reference may conflict
424998272bb7SFilipe Manana 	 * with the old reference of a parent directory, so we must make sure
425098272bb7SFilipe Manana 	 * that the path used for link and rename commands don't use an
425198272bb7SFilipe Manana 	 * orphanized name when an ancestor was not yet orphanized.
425298272bb7SFilipe Manana 	 *
425398272bb7SFilipe Manana 	 * Example:
425498272bb7SFilipe Manana 	 *
425598272bb7SFilipe Manana 	 * Parent snapshot:
425698272bb7SFilipe Manana 	 *
425798272bb7SFilipe Manana 	 * .                                                      (ino 256)
425898272bb7SFilipe Manana 	 * |----- testdir/                                        (ino 259)
425998272bb7SFilipe Manana 	 * |          |----- a                                    (ino 257)
426098272bb7SFilipe Manana 	 * |
426198272bb7SFilipe Manana 	 * |----- b                                               (ino 258)
426298272bb7SFilipe Manana 	 *
426398272bb7SFilipe Manana 	 * Send snapshot:
426498272bb7SFilipe Manana 	 *
426598272bb7SFilipe Manana 	 * .                                                      (ino 256)
426698272bb7SFilipe Manana 	 * |----- testdir_2/                                      (ino 259)
426798272bb7SFilipe Manana 	 * |          |----- a                                    (ino 260)
426898272bb7SFilipe Manana 	 * |
426998272bb7SFilipe Manana 	 * |----- testdir                                         (ino 257)
427098272bb7SFilipe Manana 	 * |----- b                                               (ino 257)
427198272bb7SFilipe Manana 	 * |----- b2                                              (ino 258)
427298272bb7SFilipe Manana 	 *
427398272bb7SFilipe Manana 	 * Processing the new reference for inode 257 with name "b" may happen
427498272bb7SFilipe Manana 	 * before processing the new reference with name "testdir". If so, we
427598272bb7SFilipe Manana 	 * must make sure that by the time we send a link command to create the
427698272bb7SFilipe Manana 	 * hard link "b", inode 259 was already orphanized, since the generated
427798272bb7SFilipe Manana 	 * path in "valid_path" already contains the orphanized name for 259.
427898272bb7SFilipe Manana 	 * We are processing inode 257, so only later when processing 259 we do
427998272bb7SFilipe Manana 	 * the rename operation to change its temporary (orphanized) name to
428098272bb7SFilipe Manana 	 * "testdir_2".
42811f4692daSAlexander Block 	 */
428298272bb7SFilipe Manana 	list_for_each_entry(cur, &sctx->new_refs, list) {
4283498581f3SFilipe Manana 		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
42841f4692daSAlexander Block 		if (ret < 0)
42851f4692daSAlexander Block 			goto out;
428698272bb7SFilipe Manana 		if (ret == inode_state_will_create)
428798272bb7SFilipe Manana 			continue;
42881f4692daSAlexander Block 
42891f4692daSAlexander Block 		/*
429098272bb7SFilipe Manana 		 * Check if this new ref would overwrite the first ref of another
429198272bb7SFilipe Manana 		 * unprocessed inode. If yes, orphanize the overwritten inode.
429298272bb7SFilipe Manana 		 * If we find an overwritten ref that is not the first ref,
429398272bb7SFilipe Manana 		 * simply unlink it.
429431db9f7cSAlexander Block 		 */
429531db9f7cSAlexander Block 		ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
429631db9f7cSAlexander Block 				cur->name, cur->name_len,
4297f5962781SFilipe Manana 				&ow_inode, &ow_gen, &ow_mode);
429831db9f7cSAlexander Block 		if (ret < 0)
429931db9f7cSAlexander Block 			goto out;
430031db9f7cSAlexander Block 		if (ret) {
4301924794c9STsutomu Itoh 			ret = is_first_ref(sctx->parent_root,
430231db9f7cSAlexander Block 					   ow_inode, cur->dir, cur->name,
430331db9f7cSAlexander Block 					   cur->name_len);
430431db9f7cSAlexander Block 			if (ret < 0)
430531db9f7cSAlexander Block 				goto out;
430631db9f7cSAlexander Block 			if (ret) {
43078996a48cSFilipe Manana 				struct name_cache_entry *nce;
4308801bec36SRobbie Ko 				struct waiting_dir_move *wdm;
43098996a48cSFilipe Manana 
43109c2b4e03SFilipe Manana 				if (orphanized_dir) {
43119c2b4e03SFilipe Manana 					ret = refresh_ref_path(sctx, cur);
43129c2b4e03SFilipe Manana 					if (ret < 0)
43139c2b4e03SFilipe Manana 						goto out;
43149c2b4e03SFilipe Manana 				}
43159c2b4e03SFilipe Manana 
431631db9f7cSAlexander Block 				ret = orphanize_inode(sctx, ow_inode, ow_gen,
431731db9f7cSAlexander Block 						cur->full_path);
431831db9f7cSAlexander Block 				if (ret < 0)
431931db9f7cSAlexander Block 					goto out;
4320f5962781SFilipe Manana 				if (S_ISDIR(ow_mode))
4321f5962781SFilipe Manana 					orphanized_dir = true;
4322801bec36SRobbie Ko 
4323801bec36SRobbie Ko 				/*
4324801bec36SRobbie Ko 				 * If ow_inode has its rename operation delayed
4325801bec36SRobbie Ko 				 * make sure that its orphanized name is used in
4326801bec36SRobbie Ko 				 * the source path when performing its rename
4327801bec36SRobbie Ko 				 * operation.
4328801bec36SRobbie Ko 				 */
43298c139e1dSFilipe Manana 				wdm = get_waiting_dir_move(sctx, ow_inode);
43308c139e1dSFilipe Manana 				if (wdm)
4331801bec36SRobbie Ko 					wdm->orphanized = true;
4332801bec36SRobbie Ko 
43338996a48cSFilipe Manana 				/*
43348996a48cSFilipe Manana 				 * Make sure we clear our orphanized inode's
43358996a48cSFilipe Manana 				 * name from the name cache. This is because the
43368996a48cSFilipe Manana 				 * inode ow_inode might be an ancestor of some
43378996a48cSFilipe Manana 				 * other inode that will be orphanized as well
43388996a48cSFilipe Manana 				 * later and has an inode number greater than
43398996a48cSFilipe Manana 				 * sctx->send_progress. We need to prevent
43408996a48cSFilipe Manana 				 * future name lookups from using the old name
43418996a48cSFilipe Manana 				 * and get instead the orphan name.
43428996a48cSFilipe Manana 				 */
43438996a48cSFilipe Manana 				nce = name_cache_search(sctx, ow_inode, ow_gen);
4344c48545deSFilipe Manana 				if (nce)
4345c48545deSFilipe Manana 					btrfs_lru_cache_remove(&sctx->name_cache,
4346c48545deSFilipe Manana 							       &nce->entry);
4347801bec36SRobbie Ko 
4348801bec36SRobbie Ko 				/*
4349801bec36SRobbie Ko 				 * ow_inode might currently be an ancestor of
4350801bec36SRobbie Ko 				 * cur_ino, therefore compute valid_path (the
4351801bec36SRobbie Ko 				 * current path of cur_ino) again because it
4352801bec36SRobbie Ko 				 * might contain the pre-orphanization name of
4353801bec36SRobbie Ko 				 * ow_inode, which is no longer valid.
4354801bec36SRobbie Ko 				 */
435572c3668fSFilipe Manana 				ret = is_ancestor(sctx->parent_root,
435672c3668fSFilipe Manana 						  ow_inode, ow_gen,
435772c3668fSFilipe Manana 						  sctx->cur_ino, NULL);
435872c3668fSFilipe Manana 				if (ret > 0) {
4359fdb13889SFilipe Manana 					orphanized_ancestor = true;
4360801bec36SRobbie Ko 					fs_path_reset(valid_path);
4361801bec36SRobbie Ko 					ret = get_cur_path(sctx, sctx->cur_ino,
436272c3668fSFilipe Manana 							   sctx->cur_inode_gen,
436372c3668fSFilipe Manana 							   valid_path);
436472c3668fSFilipe Manana 				}
4365801bec36SRobbie Ko 				if (ret < 0)
4366801bec36SRobbie Ko 					goto out;
436731db9f7cSAlexander Block 			} else {
4368d8ac76cdSFilipe Manana 				/*
4369d8ac76cdSFilipe Manana 				 * If we previously orphanized a directory that
4370d8ac76cdSFilipe Manana 				 * collided with a new reference that we already
4371d8ac76cdSFilipe Manana 				 * processed, recompute the current path because
4372d8ac76cdSFilipe Manana 				 * that directory may be part of the path.
4373d8ac76cdSFilipe Manana 				 */
4374d8ac76cdSFilipe Manana 				if (orphanized_dir) {
4375d8ac76cdSFilipe Manana 					ret = refresh_ref_path(sctx, cur);
4376d8ac76cdSFilipe Manana 					if (ret < 0)
4377d8ac76cdSFilipe Manana 						goto out;
4378d8ac76cdSFilipe Manana 				}
437931db9f7cSAlexander Block 				ret = send_unlink(sctx, cur->full_path);
438031db9f7cSAlexander Block 				if (ret < 0)
438131db9f7cSAlexander Block 					goto out;
438231db9f7cSAlexander Block 			}
438331db9f7cSAlexander Block 		}
438431db9f7cSAlexander Block 
438598272bb7SFilipe Manana 	}
438698272bb7SFilipe Manana 
438798272bb7SFilipe Manana 	list_for_each_entry(cur, &sctx->new_refs, list) {
438898272bb7SFilipe Manana 		/*
438998272bb7SFilipe Manana 		 * We may have refs where the parent directory does not exist
439098272bb7SFilipe Manana 		 * yet. This happens if the parent directories inum is higher
439198272bb7SFilipe Manana 		 * than the current inum. To handle this case, we create the
439298272bb7SFilipe Manana 		 * parent directory out of order. But we need to check if this
439398272bb7SFilipe Manana 		 * did already happen before due to other refs in the same dir.
439498272bb7SFilipe Manana 		 */
4395498581f3SFilipe Manana 		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
439698272bb7SFilipe Manana 		if (ret < 0)
439798272bb7SFilipe Manana 			goto out;
439898272bb7SFilipe Manana 		if (ret == inode_state_will_create) {
439998272bb7SFilipe Manana 			ret = 0;
440098272bb7SFilipe Manana 			/*
440198272bb7SFilipe Manana 			 * First check if any of the current inodes refs did
440298272bb7SFilipe Manana 			 * already create the dir.
440398272bb7SFilipe Manana 			 */
440498272bb7SFilipe Manana 			list_for_each_entry(cur2, &sctx->new_refs, list) {
440598272bb7SFilipe Manana 				if (cur == cur2)
440698272bb7SFilipe Manana 					break;
440798272bb7SFilipe Manana 				if (cur2->dir == cur->dir) {
440898272bb7SFilipe Manana 					ret = 1;
440998272bb7SFilipe Manana 					break;
441098272bb7SFilipe Manana 				}
441198272bb7SFilipe Manana 			}
441298272bb7SFilipe Manana 
441398272bb7SFilipe Manana 			/*
441498272bb7SFilipe Manana 			 * If that did not happen, check if a previous inode
441598272bb7SFilipe Manana 			 * did already create the dir.
441698272bb7SFilipe Manana 			 */
441798272bb7SFilipe Manana 			if (!ret)
441898272bb7SFilipe Manana 				ret = did_create_dir(sctx, cur->dir);
441998272bb7SFilipe Manana 			if (ret < 0)
442098272bb7SFilipe Manana 				goto out;
442198272bb7SFilipe Manana 			if (!ret) {
442298272bb7SFilipe Manana 				ret = send_create_inode(sctx, cur->dir);
442398272bb7SFilipe Manana 				if (ret < 0)
442498272bb7SFilipe Manana 					goto out;
4425e8a7f49dSFilipe Manana 				cache_dir_created(sctx, cur->dir);
442698272bb7SFilipe Manana 			}
442798272bb7SFilipe Manana 		}
442898272bb7SFilipe Manana 
442984471e24SFilipe Manana 		if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
443084471e24SFilipe Manana 			ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
443184471e24SFilipe Manana 			if (ret < 0)
443284471e24SFilipe Manana 				goto out;
443384471e24SFilipe Manana 			if (ret == 1) {
443484471e24SFilipe Manana 				can_rename = false;
443584471e24SFilipe Manana 				*pending_move = 1;
443684471e24SFilipe Manana 			}
443784471e24SFilipe Manana 		}
443884471e24SFilipe Manana 
44398b191a68SFilipe Manana 		if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
44408b191a68SFilipe Manana 		    can_rename) {
44418b191a68SFilipe Manana 			ret = wait_for_parent_move(sctx, cur, is_orphan);
44428b191a68SFilipe Manana 			if (ret < 0)
44438b191a68SFilipe Manana 				goto out;
44448b191a68SFilipe Manana 			if (ret == 1) {
44458b191a68SFilipe Manana 				can_rename = false;
44468b191a68SFilipe Manana 				*pending_move = 1;
44478b191a68SFilipe Manana 			}
44488b191a68SFilipe Manana 		}
44498b191a68SFilipe Manana 
445031db9f7cSAlexander Block 		/*
445131db9f7cSAlexander Block 		 * link/move the ref to the new place. If we have an orphan
445231db9f7cSAlexander Block 		 * inode, move it and update valid_path. If not, link or move
445331db9f7cSAlexander Block 		 * it depending on the inode mode.
445431db9f7cSAlexander Block 		 */
445584471e24SFilipe Manana 		if (is_orphan && can_rename) {
445631db9f7cSAlexander Block 			ret = send_rename(sctx, valid_path, cur->full_path);
445731db9f7cSAlexander Block 			if (ret < 0)
445831db9f7cSAlexander Block 				goto out;
445931db9f7cSAlexander Block 			is_orphan = 0;
446031db9f7cSAlexander Block 			ret = fs_path_copy(valid_path, cur->full_path);
446131db9f7cSAlexander Block 			if (ret < 0)
446231db9f7cSAlexander Block 				goto out;
446384471e24SFilipe Manana 		} else if (can_rename) {
446431db9f7cSAlexander Block 			if (S_ISDIR(sctx->cur_inode_mode)) {
446531db9f7cSAlexander Block 				/*
446631db9f7cSAlexander Block 				 * Dirs can't be linked, so move it. For moved
446731db9f7cSAlexander Block 				 * dirs, we always have one new and one deleted
446831db9f7cSAlexander Block 				 * ref. The deleted ref is ignored later.
446931db9f7cSAlexander Block 				 */
447031db9f7cSAlexander Block 				ret = send_rename(sctx, valid_path,
447131db9f7cSAlexander Block 						  cur->full_path);
44729f03740aSFilipe David Borba Manana 				if (!ret)
44739f03740aSFilipe David Borba Manana 					ret = fs_path_copy(valid_path,
44749f03740aSFilipe David Borba Manana 							   cur->full_path);
447531db9f7cSAlexander Block 				if (ret < 0)
447631db9f7cSAlexander Block 					goto out;
447731db9f7cSAlexander Block 			} else {
4478f5962781SFilipe Manana 				/*
4479f5962781SFilipe Manana 				 * We might have previously orphanized an inode
4480f5962781SFilipe Manana 				 * which is an ancestor of our current inode,
4481f5962781SFilipe Manana 				 * so our reference's full path, which was
4482f5962781SFilipe Manana 				 * computed before any such orphanizations, must
4483f5962781SFilipe Manana 				 * be updated.
4484f5962781SFilipe Manana 				 */
4485f5962781SFilipe Manana 				if (orphanized_dir) {
4486f5962781SFilipe Manana 					ret = update_ref_path(sctx, cur);
4487f5962781SFilipe Manana 					if (ret < 0)
4488f5962781SFilipe Manana 						goto out;
4489f5962781SFilipe Manana 				}
449031db9f7cSAlexander Block 				ret = send_link(sctx, cur->full_path,
449131db9f7cSAlexander Block 						valid_path);
449231db9f7cSAlexander Block 				if (ret < 0)
449331db9f7cSAlexander Block 					goto out;
449431db9f7cSAlexander Block 			}
449531db9f7cSAlexander Block 		}
4496ba5e8f2eSJosef Bacik 		ret = dup_ref(cur, &check_dirs);
449731db9f7cSAlexander Block 		if (ret < 0)
449831db9f7cSAlexander Block 			goto out;
449931db9f7cSAlexander Block 	}
450031db9f7cSAlexander Block 
450131db9f7cSAlexander Block 	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
450231db9f7cSAlexander Block 		/*
450331db9f7cSAlexander Block 		 * Check if we can already rmdir the directory. If not,
450431db9f7cSAlexander Block 		 * orphanize it. For every dir item inside that gets deleted
450531db9f7cSAlexander Block 		 * later, we do this check again and rmdir it then if possible.
450631db9f7cSAlexander Block 		 * See the use of check_dirs for more details.
450731db9f7cSAlexander Block 		 */
450824970ccbSFilipe Manana 		ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen);
450931db9f7cSAlexander Block 		if (ret < 0)
451031db9f7cSAlexander Block 			goto out;
451131db9f7cSAlexander Block 		if (ret) {
451231db9f7cSAlexander Block 			ret = send_rmdir(sctx, valid_path);
451331db9f7cSAlexander Block 			if (ret < 0)
451431db9f7cSAlexander Block 				goto out;
451531db9f7cSAlexander Block 		} else if (!is_orphan) {
451631db9f7cSAlexander Block 			ret = orphanize_inode(sctx, sctx->cur_ino,
451731db9f7cSAlexander Block 					sctx->cur_inode_gen, valid_path);
451831db9f7cSAlexander Block 			if (ret < 0)
451931db9f7cSAlexander Block 				goto out;
452031db9f7cSAlexander Block 			is_orphan = 1;
452131db9f7cSAlexander Block 		}
452231db9f7cSAlexander Block 
452331db9f7cSAlexander Block 		list_for_each_entry(cur, &sctx->deleted_refs, list) {
4524ba5e8f2eSJosef Bacik 			ret = dup_ref(cur, &check_dirs);
452531db9f7cSAlexander Block 			if (ret < 0)
452631db9f7cSAlexander Block 				goto out;
452731db9f7cSAlexander Block 		}
4528ccf1626bSAlexander Block 	} else if (S_ISDIR(sctx->cur_inode_mode) &&
4529ccf1626bSAlexander Block 		   !list_empty(&sctx->deleted_refs)) {
4530ccf1626bSAlexander Block 		/*
4531ccf1626bSAlexander Block 		 * We have a moved dir. Add the old parent to check_dirs
4532ccf1626bSAlexander Block 		 */
4533ccf1626bSAlexander Block 		cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
4534ccf1626bSAlexander Block 				list);
4535ba5e8f2eSJosef Bacik 		ret = dup_ref(cur, &check_dirs);
4536ccf1626bSAlexander Block 		if (ret < 0)
4537ccf1626bSAlexander Block 			goto out;
453831db9f7cSAlexander Block 	} else if (!S_ISDIR(sctx->cur_inode_mode)) {
453931db9f7cSAlexander Block 		/*
454031db9f7cSAlexander Block 		 * We have a non dir inode. Go through all deleted refs and
454131db9f7cSAlexander Block 		 * unlink them if they were not already overwritten by other
454231db9f7cSAlexander Block 		 * inodes.
454331db9f7cSAlexander Block 		 */
454431db9f7cSAlexander Block 		list_for_each_entry(cur, &sctx->deleted_refs, list) {
454531db9f7cSAlexander Block 			ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
454631db9f7cSAlexander Block 					sctx->cur_ino, sctx->cur_inode_gen,
454731db9f7cSAlexander Block 					cur->name, cur->name_len);
454831db9f7cSAlexander Block 			if (ret < 0)
454931db9f7cSAlexander Block 				goto out;
455031db9f7cSAlexander Block 			if (!ret) {
4551fdb13889SFilipe Manana 				/*
4552fdb13889SFilipe Manana 				 * If we orphanized any ancestor before, we need
4553fdb13889SFilipe Manana 				 * to recompute the full path for deleted names,
4554fdb13889SFilipe Manana 				 * since any such path was computed before we
4555fdb13889SFilipe Manana 				 * processed any references and orphanized any
4556fdb13889SFilipe Manana 				 * ancestor inode.
4557fdb13889SFilipe Manana 				 */
4558fdb13889SFilipe Manana 				if (orphanized_ancestor) {
4559f5962781SFilipe Manana 					ret = update_ref_path(sctx, cur);
4560f5962781SFilipe Manana 					if (ret < 0)
4561fdb13889SFilipe Manana 						goto out;
4562fdb13889SFilipe Manana 				}
456331db9f7cSAlexander Block 				ret = send_unlink(sctx, cur->full_path);
456431db9f7cSAlexander Block 				if (ret < 0)
456531db9f7cSAlexander Block 					goto out;
456631db9f7cSAlexander Block 			}
4567ba5e8f2eSJosef Bacik 			ret = dup_ref(cur, &check_dirs);
456831db9f7cSAlexander Block 			if (ret < 0)
456931db9f7cSAlexander Block 				goto out;
457031db9f7cSAlexander Block 		}
457131db9f7cSAlexander Block 		/*
457231db9f7cSAlexander Block 		 * If the inode is still orphan, unlink the orphan. This may
457331db9f7cSAlexander Block 		 * happen when a previous inode did overwrite the first ref
457431db9f7cSAlexander Block 		 * of this inode and no new refs were added for the current
4575766702efSAlexander Block 		 * inode. Unlinking does not mean that the inode is deleted in
4576766702efSAlexander Block 		 * all cases. There may still be links to this inode in other
4577766702efSAlexander Block 		 * places.
457831db9f7cSAlexander Block 		 */
45791f4692daSAlexander Block 		if (is_orphan) {
458031db9f7cSAlexander Block 			ret = send_unlink(sctx, valid_path);
458131db9f7cSAlexander Block 			if (ret < 0)
458231db9f7cSAlexander Block 				goto out;
458331db9f7cSAlexander Block 		}
458431db9f7cSAlexander Block 	}
458531db9f7cSAlexander Block 
458631db9f7cSAlexander Block 	/*
458731db9f7cSAlexander Block 	 * We did collect all parent dirs where cur_inode was once located. We
458831db9f7cSAlexander Block 	 * now go through all these dirs and check if they are pending for
458931db9f7cSAlexander Block 	 * deletion and if it's finally possible to perform the rmdir now.
459031db9f7cSAlexander Block 	 * We also update the inode stats of the parent dirs here.
459131db9f7cSAlexander Block 	 */
4592ba5e8f2eSJosef Bacik 	list_for_each_entry(cur, &check_dirs, list) {
4593766702efSAlexander Block 		/*
4594766702efSAlexander Block 		 * In case we had refs into dirs that were not processed yet,
4595766702efSAlexander Block 		 * we don't need to do the utime and rmdir logic for these dirs.
4596766702efSAlexander Block 		 * The dir will be processed later.
4597766702efSAlexander Block 		 */
4598ba5e8f2eSJosef Bacik 		if (cur->dir > sctx->cur_ino)
459931db9f7cSAlexander Block 			continue;
460031db9f7cSAlexander Block 
4601498581f3SFilipe Manana 		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen, NULL, NULL);
460231db9f7cSAlexander Block 		if (ret < 0)
460331db9f7cSAlexander Block 			goto out;
460431db9f7cSAlexander Block 
460531db9f7cSAlexander Block 		if (ret == inode_state_did_create ||
460631db9f7cSAlexander Block 		    ret == inode_state_no_change) {
46073e49363bSFilipe Manana 			ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
460831db9f7cSAlexander Block 			if (ret < 0)
460931db9f7cSAlexander Block 				goto out;
461029d6d30fSFilipe Manana 		} else if (ret == inode_state_did_delete &&
461129d6d30fSFilipe Manana 			   cur->dir != last_dir_ino_rm) {
461224970ccbSFilipe Manana 			ret = can_rmdir(sctx, cur->dir, cur->dir_gen);
461331db9f7cSAlexander Block 			if (ret < 0)
461431db9f7cSAlexander Block 				goto out;
461531db9f7cSAlexander Block 			if (ret) {
4616ba5e8f2eSJosef Bacik 				ret = get_cur_path(sctx, cur->dir,
4617ba5e8f2eSJosef Bacik 						   cur->dir_gen, valid_path);
461831db9f7cSAlexander Block 				if (ret < 0)
461931db9f7cSAlexander Block 					goto out;
462031db9f7cSAlexander Block 				ret = send_rmdir(sctx, valid_path);
462131db9f7cSAlexander Block 				if (ret < 0)
462231db9f7cSAlexander Block 					goto out;
462329d6d30fSFilipe Manana 				last_dir_ino_rm = cur->dir;
462431db9f7cSAlexander Block 			}
462531db9f7cSAlexander Block 		}
462631db9f7cSAlexander Block 	}
462731db9f7cSAlexander Block 
462831db9f7cSAlexander Block 	ret = 0;
462931db9f7cSAlexander Block 
463031db9f7cSAlexander Block out:
4631ba5e8f2eSJosef Bacik 	__free_recorded_refs(&check_dirs);
463231db9f7cSAlexander Block 	free_recorded_refs(sctx);
4633924794c9STsutomu Itoh 	fs_path_free(valid_path);
463431db9f7cSAlexander Block 	return ret;
463531db9f7cSAlexander Block }
463631db9f7cSAlexander Block 
rbtree_ref_comp(const void * k,const struct rb_node * node)46373aa5bd36SBingJing Chang static int rbtree_ref_comp(const void *k, const struct rb_node *node)
46383aa5bd36SBingJing Chang {
46393aa5bd36SBingJing Chang 	const struct recorded_ref *data = k;
46403aa5bd36SBingJing Chang 	const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node);
46413aa5bd36SBingJing Chang 	int result;
46423aa5bd36SBingJing Chang 
46433aa5bd36SBingJing Chang 	if (data->dir > ref->dir)
46443aa5bd36SBingJing Chang 		return 1;
46453aa5bd36SBingJing Chang 	if (data->dir < ref->dir)
46463aa5bd36SBingJing Chang 		return -1;
46473aa5bd36SBingJing Chang 	if (data->dir_gen > ref->dir_gen)
46483aa5bd36SBingJing Chang 		return 1;
46493aa5bd36SBingJing Chang 	if (data->dir_gen < ref->dir_gen)
46503aa5bd36SBingJing Chang 		return -1;
46513aa5bd36SBingJing Chang 	if (data->name_len > ref->name_len)
46523aa5bd36SBingJing Chang 		return 1;
46533aa5bd36SBingJing Chang 	if (data->name_len < ref->name_len)
46543aa5bd36SBingJing Chang 		return -1;
46553aa5bd36SBingJing Chang 	result = strcmp(data->name, ref->name);
46563aa5bd36SBingJing Chang 	if (result > 0)
46573aa5bd36SBingJing Chang 		return 1;
46583aa5bd36SBingJing Chang 	if (result < 0)
46593aa5bd36SBingJing Chang 		return -1;
46603aa5bd36SBingJing Chang 	return 0;
46613aa5bd36SBingJing Chang }
46623aa5bd36SBingJing Chang 
rbtree_ref_less(struct rb_node * node,const struct rb_node * parent)46633aa5bd36SBingJing Chang static bool rbtree_ref_less(struct rb_node *node, const struct rb_node *parent)
46643aa5bd36SBingJing Chang {
46653aa5bd36SBingJing Chang 	const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node);
46663aa5bd36SBingJing Chang 
46673aa5bd36SBingJing Chang 	return rbtree_ref_comp(entry, parent) < 0;
46683aa5bd36SBingJing Chang }
46693aa5bd36SBingJing Chang 
record_ref_in_tree(struct rb_root * root,struct list_head * refs,struct fs_path * name,u64 dir,u64 dir_gen,struct send_ctx * sctx)46703aa5bd36SBingJing Chang static int record_ref_in_tree(struct rb_root *root, struct list_head *refs,
46713aa5bd36SBingJing Chang 			      struct fs_path *name, u64 dir, u64 dir_gen,
46723aa5bd36SBingJing Chang 			      struct send_ctx *sctx)
46733aa5bd36SBingJing Chang {
46743aa5bd36SBingJing Chang 	int ret = 0;
46753aa5bd36SBingJing Chang 	struct fs_path *path = NULL;
46763aa5bd36SBingJing Chang 	struct recorded_ref *ref = NULL;
46773aa5bd36SBingJing Chang 
46783aa5bd36SBingJing Chang 	path = fs_path_alloc();
46793aa5bd36SBingJing Chang 	if (!path) {
46803aa5bd36SBingJing Chang 		ret = -ENOMEM;
46813aa5bd36SBingJing Chang 		goto out;
46823aa5bd36SBingJing Chang 	}
46833aa5bd36SBingJing Chang 
46843aa5bd36SBingJing Chang 	ref = recorded_ref_alloc();
46853aa5bd36SBingJing Chang 	if (!ref) {
46863aa5bd36SBingJing Chang 		ret = -ENOMEM;
46873aa5bd36SBingJing Chang 		goto out;
46883aa5bd36SBingJing Chang 	}
46893aa5bd36SBingJing Chang 
46903aa5bd36SBingJing Chang 	ret = get_cur_path(sctx, dir, dir_gen, path);
46913aa5bd36SBingJing Chang 	if (ret < 0)
46923aa5bd36SBingJing Chang 		goto out;
46933aa5bd36SBingJing Chang 	ret = fs_path_add_path(path, name);
46943aa5bd36SBingJing Chang 	if (ret < 0)
46953aa5bd36SBingJing Chang 		goto out;
46963aa5bd36SBingJing Chang 
46973aa5bd36SBingJing Chang 	ref->dir = dir;
46983aa5bd36SBingJing Chang 	ref->dir_gen = dir_gen;
46993aa5bd36SBingJing Chang 	set_ref_path(ref, path);
47003aa5bd36SBingJing Chang 	list_add_tail(&ref->list, refs);
47013aa5bd36SBingJing Chang 	rb_add(&ref->node, root, rbtree_ref_less);
47023aa5bd36SBingJing Chang 	ref->root = root;
47033aa5bd36SBingJing Chang out:
47043aa5bd36SBingJing Chang 	if (ret) {
47053aa5bd36SBingJing Chang 		if (path && (!ref || !ref->full_path))
47063aa5bd36SBingJing Chang 			fs_path_free(path);
47073aa5bd36SBingJing Chang 		recorded_ref_free(ref);
47083aa5bd36SBingJing Chang 	}
47093aa5bd36SBingJing Chang 	return ret;
47103aa5bd36SBingJing Chang }
47113aa5bd36SBingJing Chang 
record_new_ref_if_needed(int num,u64 dir,int index,struct fs_path * name,void * ctx)47123aa5bd36SBingJing Chang static int record_new_ref_if_needed(int num, u64 dir, int index,
47133aa5bd36SBingJing Chang 				    struct fs_path *name, void *ctx)
47143aa5bd36SBingJing Chang {
47153aa5bd36SBingJing Chang 	int ret = 0;
47163aa5bd36SBingJing Chang 	struct send_ctx *sctx = ctx;
47173aa5bd36SBingJing Chang 	struct rb_node *node = NULL;
47183aa5bd36SBingJing Chang 	struct recorded_ref data;
47193aa5bd36SBingJing Chang 	struct recorded_ref *ref;
47203aa5bd36SBingJing Chang 	u64 dir_gen;
47213aa5bd36SBingJing Chang 
47227e93f6dcSBingJing Chang 	ret = get_inode_gen(sctx->send_root, dir, &dir_gen);
47233aa5bd36SBingJing Chang 	if (ret < 0)
47243aa5bd36SBingJing Chang 		goto out;
47253aa5bd36SBingJing Chang 
47263aa5bd36SBingJing Chang 	data.dir = dir;
47273aa5bd36SBingJing Chang 	data.dir_gen = dir_gen;
47283aa5bd36SBingJing Chang 	set_ref_path(&data, name);
47293aa5bd36SBingJing Chang 	node = rb_find(&data, &sctx->rbtree_deleted_refs, rbtree_ref_comp);
47303aa5bd36SBingJing Chang 	if (node) {
47313aa5bd36SBingJing Chang 		ref = rb_entry(node, struct recorded_ref, node);
47323aa5bd36SBingJing Chang 		recorded_ref_free(ref);
47333aa5bd36SBingJing Chang 	} else {
47343aa5bd36SBingJing Chang 		ret = record_ref_in_tree(&sctx->rbtree_new_refs,
47353aa5bd36SBingJing Chang 					 &sctx->new_refs, name, dir, dir_gen,
47363aa5bd36SBingJing Chang 					 sctx);
47373aa5bd36SBingJing Chang 	}
47383aa5bd36SBingJing Chang out:
47393aa5bd36SBingJing Chang 	return ret;
47403aa5bd36SBingJing Chang }
47413aa5bd36SBingJing Chang 
record_deleted_ref_if_needed(int num,u64 dir,int index,struct fs_path * name,void * ctx)47423aa5bd36SBingJing Chang static int record_deleted_ref_if_needed(int num, u64 dir, int index,
47433aa5bd36SBingJing Chang 					struct fs_path *name, void *ctx)
47443aa5bd36SBingJing Chang {
47453aa5bd36SBingJing Chang 	int ret = 0;
47463aa5bd36SBingJing Chang 	struct send_ctx *sctx = ctx;
47473aa5bd36SBingJing Chang 	struct rb_node *node = NULL;
47483aa5bd36SBingJing Chang 	struct recorded_ref data;
47493aa5bd36SBingJing Chang 	struct recorded_ref *ref;
47503aa5bd36SBingJing Chang 	u64 dir_gen;
47513aa5bd36SBingJing Chang 
47527e93f6dcSBingJing Chang 	ret = get_inode_gen(sctx->parent_root, dir, &dir_gen);
47533aa5bd36SBingJing Chang 	if (ret < 0)
47543aa5bd36SBingJing Chang 		goto out;
47553aa5bd36SBingJing Chang 
47563aa5bd36SBingJing Chang 	data.dir = dir;
47573aa5bd36SBingJing Chang 	data.dir_gen = dir_gen;
47583aa5bd36SBingJing Chang 	set_ref_path(&data, name);
47593aa5bd36SBingJing Chang 	node = rb_find(&data, &sctx->rbtree_new_refs, rbtree_ref_comp);
47603aa5bd36SBingJing Chang 	if (node) {
47613aa5bd36SBingJing Chang 		ref = rb_entry(node, struct recorded_ref, node);
47623aa5bd36SBingJing Chang 		recorded_ref_free(ref);
47633aa5bd36SBingJing Chang 	} else {
47643aa5bd36SBingJing Chang 		ret = record_ref_in_tree(&sctx->rbtree_deleted_refs,
47653aa5bd36SBingJing Chang 					 &sctx->deleted_refs, name, dir,
47663aa5bd36SBingJing Chang 					 dir_gen, sctx);
47673aa5bd36SBingJing Chang 	}
47683aa5bd36SBingJing Chang out:
47693aa5bd36SBingJing Chang 	return ret;
47703aa5bd36SBingJing Chang }
47713aa5bd36SBingJing Chang 
record_new_ref(struct send_ctx * sctx)477231db9f7cSAlexander Block static int record_new_ref(struct send_ctx *sctx)
477331db9f7cSAlexander Block {
477431db9f7cSAlexander Block 	int ret;
477531db9f7cSAlexander Block 
4776924794c9STsutomu Itoh 	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
47773aa5bd36SBingJing Chang 				sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
477831db9f7cSAlexander Block 	if (ret < 0)
477931db9f7cSAlexander Block 		goto out;
478031db9f7cSAlexander Block 	ret = 0;
478131db9f7cSAlexander Block 
478231db9f7cSAlexander Block out:
478331db9f7cSAlexander Block 	return ret;
478431db9f7cSAlexander Block }
478531db9f7cSAlexander Block 
record_deleted_ref(struct send_ctx * sctx)478631db9f7cSAlexander Block static int record_deleted_ref(struct send_ctx *sctx)
478731db9f7cSAlexander Block {
478831db9f7cSAlexander Block 	int ret;
478931db9f7cSAlexander Block 
4790924794c9STsutomu Itoh 	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
47913aa5bd36SBingJing Chang 				sctx->cmp_key, 0, record_deleted_ref_if_needed,
47923aa5bd36SBingJing Chang 				sctx);
479331db9f7cSAlexander Block 	if (ret < 0)
479431db9f7cSAlexander Block 		goto out;
479531db9f7cSAlexander Block 	ret = 0;
479631db9f7cSAlexander Block 
479731db9f7cSAlexander Block out:
479831db9f7cSAlexander Block 	return ret;
479931db9f7cSAlexander Block }
480031db9f7cSAlexander Block 
record_changed_ref(struct send_ctx * sctx)480131db9f7cSAlexander Block static int record_changed_ref(struct send_ctx *sctx)
480231db9f7cSAlexander Block {
480331db9f7cSAlexander Block 	int ret = 0;
480431db9f7cSAlexander Block 
4805924794c9STsutomu Itoh 	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
48060d8869fbSFilipe Manana 			sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
480731db9f7cSAlexander Block 	if (ret < 0)
480831db9f7cSAlexander Block 		goto out;
4809924794c9STsutomu Itoh 	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
48100d8869fbSFilipe Manana 			sctx->cmp_key, 0, record_deleted_ref_if_needed, sctx);
481131db9f7cSAlexander Block 	if (ret < 0)
481231db9f7cSAlexander Block 		goto out;
481331db9f7cSAlexander Block 	ret = 0;
481431db9f7cSAlexander Block 
481531db9f7cSAlexander Block out:
481631db9f7cSAlexander Block 	return ret;
481731db9f7cSAlexander Block }
481831db9f7cSAlexander Block 
481931db9f7cSAlexander Block /*
482031db9f7cSAlexander Block  * Record and process all refs at once. Needed when an inode changes the
482131db9f7cSAlexander Block  * generation number, which means that it was deleted and recreated.
482231db9f7cSAlexander Block  */
process_all_refs(struct send_ctx * sctx,enum btrfs_compare_tree_result cmd)482331db9f7cSAlexander Block static int process_all_refs(struct send_ctx *sctx,
482431db9f7cSAlexander Block 			    enum btrfs_compare_tree_result cmd)
482531db9f7cSAlexander Block {
4826649b9635SGabriel Niebler 	int ret = 0;
4827649b9635SGabriel Niebler 	int iter_ret = 0;
482831db9f7cSAlexander Block 	struct btrfs_root *root;
482931db9f7cSAlexander Block 	struct btrfs_path *path;
483031db9f7cSAlexander Block 	struct btrfs_key key;
483131db9f7cSAlexander Block 	struct btrfs_key found_key;
483231db9f7cSAlexander Block 	iterate_inode_ref_t cb;
48339f03740aSFilipe David Borba Manana 	int pending_move = 0;
483431db9f7cSAlexander Block 
483531db9f7cSAlexander Block 	path = alloc_path_for_send();
483631db9f7cSAlexander Block 	if (!path)
483731db9f7cSAlexander Block 		return -ENOMEM;
483831db9f7cSAlexander Block 
483931db9f7cSAlexander Block 	if (cmd == BTRFS_COMPARE_TREE_NEW) {
484031db9f7cSAlexander Block 		root = sctx->send_root;
48410d8869fbSFilipe Manana 		cb = record_new_ref_if_needed;
484231db9f7cSAlexander Block 	} else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
484331db9f7cSAlexander Block 		root = sctx->parent_root;
48440d8869fbSFilipe Manana 		cb = record_deleted_ref_if_needed;
484531db9f7cSAlexander Block 	} else {
48464d1a63b2SDavid Sterba 		btrfs_err(sctx->send_root->fs_info,
48474d1a63b2SDavid Sterba 				"Wrong command %d in process_all_refs", cmd);
48484d1a63b2SDavid Sterba 		ret = -EINVAL;
48494d1a63b2SDavid Sterba 		goto out;
485031db9f7cSAlexander Block 	}
485131db9f7cSAlexander Block 
485231db9f7cSAlexander Block 	key.objectid = sctx->cmp_key->objectid;
485331db9f7cSAlexander Block 	key.type = BTRFS_INODE_REF_KEY;
485431db9f7cSAlexander Block 	key.offset = 0;
4855649b9635SGabriel Niebler 	btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
485631db9f7cSAlexander Block 		if (found_key.objectid != key.objectid ||
485796b5bd77SJan Schmidt 		    (found_key.type != BTRFS_INODE_REF_KEY &&
485896b5bd77SJan Schmidt 		     found_key.type != BTRFS_INODE_EXTREF_KEY))
485931db9f7cSAlexander Block 			break;
486031db9f7cSAlexander Block 
4861924794c9STsutomu Itoh 		ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
486231db9f7cSAlexander Block 		if (ret < 0)
486331db9f7cSAlexander Block 			goto out;
4864649b9635SGabriel Niebler 	}
4865649b9635SGabriel Niebler 	/* Catch error found during iteration */
4866649b9635SGabriel Niebler 	if (iter_ret < 0) {
4867649b9635SGabriel Niebler 		ret = iter_ret;
4868649b9635SGabriel Niebler 		goto out;
486931db9f7cSAlexander Block 	}
4870e938c8adSAlexander Block 	btrfs_release_path(path);
487131db9f7cSAlexander Block 
48723dc09ec8SJosef Bacik 	/*
48733dc09ec8SJosef Bacik 	 * We don't actually care about pending_move as we are simply
48743dc09ec8SJosef Bacik 	 * re-creating this inode and will be rename'ing it into place once we
48753dc09ec8SJosef Bacik 	 * rename the parent directory.
48763dc09ec8SJosef Bacik 	 */
48779f03740aSFilipe David Borba Manana 	ret = process_recorded_refs(sctx, &pending_move);
487831db9f7cSAlexander Block out:
487931db9f7cSAlexander Block 	btrfs_free_path(path);
488031db9f7cSAlexander Block 	return ret;
488131db9f7cSAlexander Block }
488231db9f7cSAlexander Block 
send_set_xattr(struct send_ctx * sctx,struct fs_path * path,const char * name,int name_len,const char * data,int data_len)488331db9f7cSAlexander Block static int send_set_xattr(struct send_ctx *sctx,
488431db9f7cSAlexander Block 			  struct fs_path *path,
488531db9f7cSAlexander Block 			  const char *name, int name_len,
488631db9f7cSAlexander Block 			  const char *data, int data_len)
488731db9f7cSAlexander Block {
488831db9f7cSAlexander Block 	int ret = 0;
488931db9f7cSAlexander Block 
489031db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
489131db9f7cSAlexander Block 	if (ret < 0)
489231db9f7cSAlexander Block 		goto out;
489331db9f7cSAlexander Block 
489431db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
489531db9f7cSAlexander Block 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
489631db9f7cSAlexander Block 	TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
489731db9f7cSAlexander Block 
489831db9f7cSAlexander Block 	ret = send_cmd(sctx);
489931db9f7cSAlexander Block 
490031db9f7cSAlexander Block tlv_put_failure:
490131db9f7cSAlexander Block out:
490231db9f7cSAlexander Block 	return ret;
490331db9f7cSAlexander Block }
490431db9f7cSAlexander Block 
send_remove_xattr(struct send_ctx * sctx,struct fs_path * path,const char * name,int name_len)490531db9f7cSAlexander Block static int send_remove_xattr(struct send_ctx *sctx,
490631db9f7cSAlexander Block 			  struct fs_path *path,
490731db9f7cSAlexander Block 			  const char *name, int name_len)
490831db9f7cSAlexander Block {
490931db9f7cSAlexander Block 	int ret = 0;
491031db9f7cSAlexander Block 
491131db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
491231db9f7cSAlexander Block 	if (ret < 0)
491331db9f7cSAlexander Block 		goto out;
491431db9f7cSAlexander Block 
491531db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
491631db9f7cSAlexander Block 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
491731db9f7cSAlexander Block 
491831db9f7cSAlexander Block 	ret = send_cmd(sctx);
491931db9f7cSAlexander Block 
492031db9f7cSAlexander Block tlv_put_failure:
492131db9f7cSAlexander Block out:
492231db9f7cSAlexander Block 	return ret;
492331db9f7cSAlexander Block }
492431db9f7cSAlexander Block 
__process_new_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,void * ctx)492531db9f7cSAlexander Block static int __process_new_xattr(int num, struct btrfs_key *di_key,
4926b1dea4e7SOmar Sandoval 			       const char *name, int name_len, const char *data,
4927b1dea4e7SOmar Sandoval 			       int data_len, void *ctx)
492831db9f7cSAlexander Block {
492931db9f7cSAlexander Block 	int ret;
493031db9f7cSAlexander Block 	struct send_ctx *sctx = ctx;
493131db9f7cSAlexander Block 	struct fs_path *p;
49322211d5baSAndreas Gruenbacher 	struct posix_acl_xattr_header dummy_acl;
493331db9f7cSAlexander Block 
493489efda52SMarcos Paulo de Souza 	/* Capabilities are emitted by finish_inode_if_needed */
493589efda52SMarcos Paulo de Souza 	if (!strncmp(name, XATTR_NAME_CAPS, name_len))
493689efda52SMarcos Paulo de Souza 		return 0;
493789efda52SMarcos Paulo de Souza 
4938924794c9STsutomu Itoh 	p = fs_path_alloc();
493931db9f7cSAlexander Block 	if (!p)
494031db9f7cSAlexander Block 		return -ENOMEM;
494131db9f7cSAlexander Block 
494231db9f7cSAlexander Block 	/*
494301327610SNicholas D Steeves 	 * This hack is needed because empty acls are stored as zero byte
494431db9f7cSAlexander Block 	 * data in xattrs. Problem with that is, that receiving these zero byte
494501327610SNicholas D Steeves 	 * acls will fail later. To fix this, we send a dummy acl list that
494631db9f7cSAlexander Block 	 * only contains the version number and no entries.
494731db9f7cSAlexander Block 	 */
494831db9f7cSAlexander Block 	if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
494931db9f7cSAlexander Block 	    !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
495031db9f7cSAlexander Block 		if (data_len == 0) {
495131db9f7cSAlexander Block 			dummy_acl.a_version =
495231db9f7cSAlexander Block 					cpu_to_le32(POSIX_ACL_XATTR_VERSION);
495331db9f7cSAlexander Block 			data = (char *)&dummy_acl;
495431db9f7cSAlexander Block 			data_len = sizeof(dummy_acl);
495531db9f7cSAlexander Block 		}
495631db9f7cSAlexander Block 	}
495731db9f7cSAlexander Block 
495831db9f7cSAlexander Block 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
495931db9f7cSAlexander Block 	if (ret < 0)
496031db9f7cSAlexander Block 		goto out;
496131db9f7cSAlexander Block 
496231db9f7cSAlexander Block 	ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
496331db9f7cSAlexander Block 
496431db9f7cSAlexander Block out:
4965924794c9STsutomu Itoh 	fs_path_free(p);
496631db9f7cSAlexander Block 	return ret;
496731db9f7cSAlexander Block }
496831db9f7cSAlexander Block 
__process_deleted_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,void * ctx)496931db9f7cSAlexander Block static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
497031db9f7cSAlexander Block 				   const char *name, int name_len,
4971b1dea4e7SOmar Sandoval 				   const char *data, int data_len, void *ctx)
497231db9f7cSAlexander Block {
497331db9f7cSAlexander Block 	int ret;
497431db9f7cSAlexander Block 	struct send_ctx *sctx = ctx;
497531db9f7cSAlexander Block 	struct fs_path *p;
497631db9f7cSAlexander Block 
4977924794c9STsutomu Itoh 	p = fs_path_alloc();
497831db9f7cSAlexander Block 	if (!p)
497931db9f7cSAlexander Block 		return -ENOMEM;
498031db9f7cSAlexander Block 
498131db9f7cSAlexander Block 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
498231db9f7cSAlexander Block 	if (ret < 0)
498331db9f7cSAlexander Block 		goto out;
498431db9f7cSAlexander Block 
498531db9f7cSAlexander Block 	ret = send_remove_xattr(sctx, p, name, name_len);
498631db9f7cSAlexander Block 
498731db9f7cSAlexander Block out:
4988924794c9STsutomu Itoh 	fs_path_free(p);
498931db9f7cSAlexander Block 	return ret;
499031db9f7cSAlexander Block }
499131db9f7cSAlexander Block 
process_new_xattr(struct send_ctx * sctx)499231db9f7cSAlexander Block static int process_new_xattr(struct send_ctx *sctx)
499331db9f7cSAlexander Block {
499431db9f7cSAlexander Block 	int ret = 0;
499531db9f7cSAlexander Block 
4996924794c9STsutomu Itoh 	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
4997a0357511SNikolay Borisov 			       __process_new_xattr, sctx);
499831db9f7cSAlexander Block 
499931db9f7cSAlexander Block 	return ret;
500031db9f7cSAlexander Block }
500131db9f7cSAlexander Block 
process_deleted_xattr(struct send_ctx * sctx)500231db9f7cSAlexander Block static int process_deleted_xattr(struct send_ctx *sctx)
500331db9f7cSAlexander Block {
5004e2c89907SMasahiro Yamada 	return iterate_dir_item(sctx->parent_root, sctx->right_path,
5005a0357511SNikolay Borisov 				__process_deleted_xattr, sctx);
500631db9f7cSAlexander Block }
500731db9f7cSAlexander Block 
500831db9f7cSAlexander Block struct find_xattr_ctx {
500931db9f7cSAlexander Block 	const char *name;
501031db9f7cSAlexander Block 	int name_len;
501131db9f7cSAlexander Block 	int found_idx;
501231db9f7cSAlexander Block 	char *found_data;
501331db9f7cSAlexander Block 	int found_data_len;
501431db9f7cSAlexander Block };
501531db9f7cSAlexander Block 
__find_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,void * vctx)5016b1dea4e7SOmar Sandoval static int __find_xattr(int num, struct btrfs_key *di_key, const char *name,
5017b1dea4e7SOmar Sandoval 			int name_len, const char *data, int data_len, void *vctx)
501831db9f7cSAlexander Block {
501931db9f7cSAlexander Block 	struct find_xattr_ctx *ctx = vctx;
502031db9f7cSAlexander Block 
502131db9f7cSAlexander Block 	if (name_len == ctx->name_len &&
502231db9f7cSAlexander Block 	    strncmp(name, ctx->name, name_len) == 0) {
502331db9f7cSAlexander Block 		ctx->found_idx = num;
502431db9f7cSAlexander Block 		ctx->found_data_len = data_len;
5025e780b0d1SDavid Sterba 		ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
502631db9f7cSAlexander Block 		if (!ctx->found_data)
502731db9f7cSAlexander Block 			return -ENOMEM;
502831db9f7cSAlexander Block 		return 1;
502931db9f7cSAlexander Block 	}
503031db9f7cSAlexander Block 	return 0;
503131db9f7cSAlexander Block }
503231db9f7cSAlexander Block 
find_xattr(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * key,const char * name,int name_len,char ** data,int * data_len)5033924794c9STsutomu Itoh static int find_xattr(struct btrfs_root *root,
503431db9f7cSAlexander Block 		      struct btrfs_path *path,
503531db9f7cSAlexander Block 		      struct btrfs_key *key,
503631db9f7cSAlexander Block 		      const char *name, int name_len,
503731db9f7cSAlexander Block 		      char **data, int *data_len)
503831db9f7cSAlexander Block {
503931db9f7cSAlexander Block 	int ret;
504031db9f7cSAlexander Block 	struct find_xattr_ctx ctx;
504131db9f7cSAlexander Block 
504231db9f7cSAlexander Block 	ctx.name = name;
504331db9f7cSAlexander Block 	ctx.name_len = name_len;
504431db9f7cSAlexander Block 	ctx.found_idx = -1;
504531db9f7cSAlexander Block 	ctx.found_data = NULL;
504631db9f7cSAlexander Block 	ctx.found_data_len = 0;
504731db9f7cSAlexander Block 
5048a0357511SNikolay Borisov 	ret = iterate_dir_item(root, path, __find_xattr, &ctx);
504931db9f7cSAlexander Block 	if (ret < 0)
505031db9f7cSAlexander Block 		return ret;
505131db9f7cSAlexander Block 
505231db9f7cSAlexander Block 	if (ctx.found_idx == -1)
505331db9f7cSAlexander Block 		return -ENOENT;
505431db9f7cSAlexander Block 	if (data) {
505531db9f7cSAlexander Block 		*data = ctx.found_data;
505631db9f7cSAlexander Block 		*data_len = ctx.found_data_len;
505731db9f7cSAlexander Block 	} else {
505831db9f7cSAlexander Block 		kfree(ctx.found_data);
505931db9f7cSAlexander Block 	}
506031db9f7cSAlexander Block 	return ctx.found_idx;
506131db9f7cSAlexander Block }
506231db9f7cSAlexander Block 
506331db9f7cSAlexander Block 
__process_changed_new_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,void * ctx)506431db9f7cSAlexander Block static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
506531db9f7cSAlexander Block 				       const char *name, int name_len,
506631db9f7cSAlexander Block 				       const char *data, int data_len,
5067b1dea4e7SOmar Sandoval 				       void *ctx)
506831db9f7cSAlexander Block {
506931db9f7cSAlexander Block 	int ret;
507031db9f7cSAlexander Block 	struct send_ctx *sctx = ctx;
507131db9f7cSAlexander Block 	char *found_data = NULL;
507231db9f7cSAlexander Block 	int found_data_len  = 0;
507331db9f7cSAlexander Block 
5074924794c9STsutomu Itoh 	ret = find_xattr(sctx->parent_root, sctx->right_path,
507531db9f7cSAlexander Block 			 sctx->cmp_key, name, name_len, &found_data,
507631db9f7cSAlexander Block 			 &found_data_len);
507731db9f7cSAlexander Block 	if (ret == -ENOENT) {
507831db9f7cSAlexander Block 		ret = __process_new_xattr(num, di_key, name, name_len, data,
5079b1dea4e7SOmar Sandoval 					  data_len, ctx);
508031db9f7cSAlexander Block 	} else if (ret >= 0) {
508131db9f7cSAlexander Block 		if (data_len != found_data_len ||
508231db9f7cSAlexander Block 		    memcmp(data, found_data, data_len)) {
508331db9f7cSAlexander Block 			ret = __process_new_xattr(num, di_key, name, name_len,
5084b1dea4e7SOmar Sandoval 						  data, data_len, ctx);
508531db9f7cSAlexander Block 		} else {
508631db9f7cSAlexander Block 			ret = 0;
508731db9f7cSAlexander Block 		}
508831db9f7cSAlexander Block 	}
508931db9f7cSAlexander Block 
509031db9f7cSAlexander Block 	kfree(found_data);
509131db9f7cSAlexander Block 	return ret;
509231db9f7cSAlexander Block }
509331db9f7cSAlexander Block 
__process_changed_deleted_xattr(int num,struct btrfs_key * di_key,const char * name,int name_len,const char * data,int data_len,void * ctx)509431db9f7cSAlexander Block static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
509531db9f7cSAlexander Block 					   const char *name, int name_len,
509631db9f7cSAlexander Block 					   const char *data, int data_len,
5097b1dea4e7SOmar Sandoval 					   void *ctx)
509831db9f7cSAlexander Block {
509931db9f7cSAlexander Block 	int ret;
510031db9f7cSAlexander Block 	struct send_ctx *sctx = ctx;
510131db9f7cSAlexander Block 
5102924794c9STsutomu Itoh 	ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
510331db9f7cSAlexander Block 			 name, name_len, NULL, NULL);
510431db9f7cSAlexander Block 	if (ret == -ENOENT)
510531db9f7cSAlexander Block 		ret = __process_deleted_xattr(num, di_key, name, name_len, data,
5106b1dea4e7SOmar Sandoval 					      data_len, ctx);
510731db9f7cSAlexander Block 	else if (ret >= 0)
510831db9f7cSAlexander Block 		ret = 0;
510931db9f7cSAlexander Block 
511031db9f7cSAlexander Block 	return ret;
511131db9f7cSAlexander Block }
511231db9f7cSAlexander Block 
process_changed_xattr(struct send_ctx * sctx)511331db9f7cSAlexander Block static int process_changed_xattr(struct send_ctx *sctx)
511431db9f7cSAlexander Block {
511531db9f7cSAlexander Block 	int ret = 0;
511631db9f7cSAlexander Block 
5117924794c9STsutomu Itoh 	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
5118a0357511SNikolay Borisov 			__process_changed_new_xattr, sctx);
511931db9f7cSAlexander Block 	if (ret < 0)
512031db9f7cSAlexander Block 		goto out;
5121924794c9STsutomu Itoh 	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
5122a0357511SNikolay Borisov 			__process_changed_deleted_xattr, sctx);
512331db9f7cSAlexander Block 
512431db9f7cSAlexander Block out:
512531db9f7cSAlexander Block 	return ret;
512631db9f7cSAlexander Block }
512731db9f7cSAlexander Block 
process_all_new_xattrs(struct send_ctx * sctx)512831db9f7cSAlexander Block static int process_all_new_xattrs(struct send_ctx *sctx)
512931db9f7cSAlexander Block {
513069e43177SGabriel Niebler 	int ret = 0;
513169e43177SGabriel Niebler 	int iter_ret = 0;
513231db9f7cSAlexander Block 	struct btrfs_root *root;
513331db9f7cSAlexander Block 	struct btrfs_path *path;
513431db9f7cSAlexander Block 	struct btrfs_key key;
513531db9f7cSAlexander Block 	struct btrfs_key found_key;
513631db9f7cSAlexander Block 
513731db9f7cSAlexander Block 	path = alloc_path_for_send();
513831db9f7cSAlexander Block 	if (!path)
513931db9f7cSAlexander Block 		return -ENOMEM;
514031db9f7cSAlexander Block 
514131db9f7cSAlexander Block 	root = sctx->send_root;
514231db9f7cSAlexander Block 
514331db9f7cSAlexander Block 	key.objectid = sctx->cmp_key->objectid;
514431db9f7cSAlexander Block 	key.type = BTRFS_XATTR_ITEM_KEY;
514531db9f7cSAlexander Block 	key.offset = 0;
514669e43177SGabriel Niebler 	btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
514731db9f7cSAlexander Block 		if (found_key.objectid != key.objectid ||
514831db9f7cSAlexander Block 		    found_key.type != key.type) {
514931db9f7cSAlexander Block 			ret = 0;
515069e43177SGabriel Niebler 			break;
515131db9f7cSAlexander Block 		}
515231db9f7cSAlexander Block 
5153a0357511SNikolay Borisov 		ret = iterate_dir_item(root, path, __process_new_xattr, sctx);
515431db9f7cSAlexander Block 		if (ret < 0)
515569e43177SGabriel Niebler 			break;
515631db9f7cSAlexander Block 	}
515769e43177SGabriel Niebler 	/* Catch error found during iteration */
515869e43177SGabriel Niebler 	if (iter_ret < 0)
515969e43177SGabriel Niebler 		ret = iter_ret;
516031db9f7cSAlexander Block 
516131db9f7cSAlexander Block 	btrfs_free_path(path);
516231db9f7cSAlexander Block 	return ret;
516331db9f7cSAlexander Block }
516431db9f7cSAlexander Block 
send_verity(struct send_ctx * sctx,struct fs_path * path,struct fsverity_descriptor * desc)516538622010SBoris Burkov static int send_verity(struct send_ctx *sctx, struct fs_path *path,
516638622010SBoris Burkov 		       struct fsverity_descriptor *desc)
516738622010SBoris Burkov {
516838622010SBoris Burkov 	int ret;
516938622010SBoris Burkov 
517038622010SBoris Burkov 	ret = begin_cmd(sctx, BTRFS_SEND_C_ENABLE_VERITY);
517138622010SBoris Burkov 	if (ret < 0)
517238622010SBoris Burkov 		goto out;
517338622010SBoris Burkov 
517438622010SBoris Burkov 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
517538622010SBoris Burkov 	TLV_PUT_U8(sctx, BTRFS_SEND_A_VERITY_ALGORITHM,
517638622010SBoris Burkov 			le8_to_cpu(desc->hash_algorithm));
517738622010SBoris Burkov 	TLV_PUT_U32(sctx, BTRFS_SEND_A_VERITY_BLOCK_SIZE,
517838622010SBoris Burkov 			1U << le8_to_cpu(desc->log_blocksize));
517938622010SBoris Burkov 	TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SALT_DATA, desc->salt,
518038622010SBoris Burkov 			le8_to_cpu(desc->salt_size));
518138622010SBoris Burkov 	TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SIG_DATA, desc->signature,
518238622010SBoris Burkov 			le32_to_cpu(desc->sig_size));
518338622010SBoris Burkov 
518438622010SBoris Burkov 	ret = send_cmd(sctx);
518538622010SBoris Burkov 
518638622010SBoris Burkov tlv_put_failure:
518738622010SBoris Burkov out:
518838622010SBoris Burkov 	return ret;
518938622010SBoris Burkov }
519038622010SBoris Burkov 
process_verity(struct send_ctx * sctx)519138622010SBoris Burkov static int process_verity(struct send_ctx *sctx)
519238622010SBoris Burkov {
519338622010SBoris Burkov 	int ret = 0;
519438622010SBoris Burkov 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
519538622010SBoris Burkov 	struct inode *inode;
519638622010SBoris Burkov 	struct fs_path *p;
519738622010SBoris Burkov 
519838622010SBoris Burkov 	inode = btrfs_iget(fs_info->sb, sctx->cur_ino, sctx->send_root);
519938622010SBoris Burkov 	if (IS_ERR(inode))
520038622010SBoris Burkov 		return PTR_ERR(inode);
520138622010SBoris Burkov 
520238622010SBoris Burkov 	ret = btrfs_get_verity_descriptor(inode, NULL, 0);
520338622010SBoris Burkov 	if (ret < 0)
520438622010SBoris Burkov 		goto iput;
520538622010SBoris Burkov 
520638622010SBoris Burkov 	if (ret > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
520738622010SBoris Burkov 		ret = -EMSGSIZE;
520838622010SBoris Burkov 		goto iput;
520938622010SBoris Burkov 	}
521038622010SBoris Burkov 	if (!sctx->verity_descriptor) {
521138622010SBoris Burkov 		sctx->verity_descriptor = kvmalloc(FS_VERITY_MAX_DESCRIPTOR_SIZE,
521238622010SBoris Burkov 						   GFP_KERNEL);
521338622010SBoris Burkov 		if (!sctx->verity_descriptor) {
521438622010SBoris Burkov 			ret = -ENOMEM;
521538622010SBoris Burkov 			goto iput;
521638622010SBoris Burkov 		}
521738622010SBoris Burkov 	}
521838622010SBoris Burkov 
521938622010SBoris Burkov 	ret = btrfs_get_verity_descriptor(inode, sctx->verity_descriptor, ret);
522038622010SBoris Burkov 	if (ret < 0)
522138622010SBoris Burkov 		goto iput;
522238622010SBoris Burkov 
522338622010SBoris Burkov 	p = fs_path_alloc();
522438622010SBoris Burkov 	if (!p) {
522538622010SBoris Burkov 		ret = -ENOMEM;
522638622010SBoris Burkov 		goto iput;
522738622010SBoris Burkov 	}
522838622010SBoris Burkov 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
522938622010SBoris Burkov 	if (ret < 0)
523038622010SBoris Burkov 		goto free_path;
523138622010SBoris Burkov 
523238622010SBoris Burkov 	ret = send_verity(sctx, p, sctx->verity_descriptor);
523338622010SBoris Burkov 	if (ret < 0)
523438622010SBoris Burkov 		goto free_path;
523538622010SBoris Burkov 
523638622010SBoris Burkov free_path:
523738622010SBoris Burkov 	fs_path_free(p);
523838622010SBoris Burkov iput:
523938622010SBoris Burkov 	iput(inode);
524038622010SBoris Burkov 	return ret;
524138622010SBoris Burkov }
524238622010SBoris Burkov 
max_send_read_size(const struct send_ctx * sctx)52438c7d9fe0SOmar Sandoval static inline u64 max_send_read_size(const struct send_ctx *sctx)
52448c7d9fe0SOmar Sandoval {
52458c7d9fe0SOmar Sandoval 	return sctx->send_max_size - SZ_16K;
52468c7d9fe0SOmar Sandoval }
52478c7d9fe0SOmar Sandoval 
put_data_header(struct send_ctx * sctx,u32 len)52488c7d9fe0SOmar Sandoval static int put_data_header(struct send_ctx *sctx, u32 len)
52498c7d9fe0SOmar Sandoval {
5250356bbbb6SOmar Sandoval 	if (WARN_ON_ONCE(sctx->put_data))
5251356bbbb6SOmar Sandoval 		return -EINVAL;
5252356bbbb6SOmar Sandoval 	sctx->put_data = true;
5253356bbbb6SOmar Sandoval 	if (sctx->proto >= 2) {
5254356bbbb6SOmar Sandoval 		/*
5255356bbbb6SOmar Sandoval 		 * Since v2, the data attribute header doesn't include a length,
5256356bbbb6SOmar Sandoval 		 * it is implicitly to the end of the command.
5257356bbbb6SOmar Sandoval 		 */
5258356bbbb6SOmar Sandoval 		if (sctx->send_max_size - sctx->send_size < sizeof(__le16) + len)
5259356bbbb6SOmar Sandoval 			return -EOVERFLOW;
5260356bbbb6SOmar Sandoval 		put_unaligned_le16(BTRFS_SEND_A_DATA, sctx->send_buf + sctx->send_size);
5261356bbbb6SOmar Sandoval 		sctx->send_size += sizeof(__le16);
5262356bbbb6SOmar Sandoval 	} else {
52638c7d9fe0SOmar Sandoval 		struct btrfs_tlv_header *hdr;
52648c7d9fe0SOmar Sandoval 
52658c7d9fe0SOmar Sandoval 		if (sctx->send_max_size - sctx->send_size < sizeof(*hdr) + len)
52668c7d9fe0SOmar Sandoval 			return -EOVERFLOW;
52678c7d9fe0SOmar Sandoval 		hdr = (struct btrfs_tlv_header *)(sctx->send_buf + sctx->send_size);
52688c7d9fe0SOmar Sandoval 		put_unaligned_le16(BTRFS_SEND_A_DATA, &hdr->tlv_type);
52698c7d9fe0SOmar Sandoval 		put_unaligned_le16(len, &hdr->tlv_len);
52708c7d9fe0SOmar Sandoval 		sctx->send_size += sizeof(*hdr);
5271356bbbb6SOmar Sandoval 	}
52728c7d9fe0SOmar Sandoval 	return 0;
52738c7d9fe0SOmar Sandoval }
52748c7d9fe0SOmar Sandoval 
put_file_data(struct send_ctx * sctx,u64 offset,u32 len)52758c7d9fe0SOmar Sandoval static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
5276ed259095SJosef Bacik {
5277ed259095SJosef Bacik 	struct btrfs_root *root = sctx->send_root;
5278ed259095SJosef Bacik 	struct btrfs_fs_info *fs_info = root->fs_info;
5279ed259095SJosef Bacik 	struct page *page;
528009cbfeafSKirill A. Shutemov 	pgoff_t index = offset >> PAGE_SHIFT;
5281ed259095SJosef Bacik 	pgoff_t last_index;
52827073017aSJohannes Thumshirn 	unsigned pg_offset = offset_in_page(offset);
52838c7d9fe0SOmar Sandoval 	int ret;
52848c7d9fe0SOmar Sandoval 
52858c7d9fe0SOmar Sandoval 	ret = put_data_header(sctx, len);
52868c7d9fe0SOmar Sandoval 	if (ret)
52878c7d9fe0SOmar Sandoval 		return ret;
5288ed259095SJosef Bacik 
528909cbfeafSKirill A. Shutemov 	last_index = (offset + len - 1) >> PAGE_SHIFT;
52902131bcd3SLiu Bo 
5291ed259095SJosef Bacik 	while (index <= last_index) {
5292ed259095SJosef Bacik 		unsigned cur_len = min_t(unsigned, len,
529309cbfeafSKirill A. Shutemov 					 PAGE_SIZE - pg_offset);
5294eef16ba2SKuanling Huang 
5295521b6803SFilipe Manana 		page = find_lock_page(sctx->cur_inode->i_mapping, index);
5296eef16ba2SKuanling Huang 		if (!page) {
5297521b6803SFilipe Manana 			page_cache_sync_readahead(sctx->cur_inode->i_mapping,
5298521b6803SFilipe Manana 						  &sctx->ra, NULL, index,
5299521b6803SFilipe Manana 						  last_index + 1 - index);
5300eef16ba2SKuanling Huang 
5301521b6803SFilipe Manana 			page = find_or_create_page(sctx->cur_inode->i_mapping,
5302521b6803SFilipe Manana 						   index, GFP_KERNEL);
5303ed259095SJosef Bacik 			if (!page) {
5304ed259095SJosef Bacik 				ret = -ENOMEM;
5305ed259095SJosef Bacik 				break;
5306ed259095SJosef Bacik 			}
5307eef16ba2SKuanling Huang 		}
5308eef16ba2SKuanling Huang 
5309521b6803SFilipe Manana 		if (PageReadahead(page))
5310521b6803SFilipe Manana 			page_cache_async_readahead(sctx->cur_inode->i_mapping,
5311fdaf9a58SLinus Torvalds 						   &sctx->ra, NULL, page_folio(page),
5312fdaf9a58SLinus Torvalds 						   index, last_index + 1 - index);
5313ed259095SJosef Bacik 
5314ed259095SJosef Bacik 		if (!PageUptodate(page)) {
5315fb12489bSMatthew Wilcox (Oracle) 			btrfs_read_folio(NULL, page_folio(page));
5316ed259095SJosef Bacik 			lock_page(page);
5317ed259095SJosef Bacik 			if (!PageUptodate(page)) {
5318ed259095SJosef Bacik 				unlock_page(page);
53192e7be9dbSDāvis Mosāns 				btrfs_err(fs_info,
53202e7be9dbSDāvis Mosāns 			"send: IO error at offset %llu for inode %llu root %llu",
53212e7be9dbSDāvis Mosāns 					page_offset(page), sctx->cur_ino,
53222e7be9dbSDāvis Mosāns 					sctx->send_root->root_key.objectid);
532309cbfeafSKirill A. Shutemov 				put_page(page);
5324ed259095SJosef Bacik 				ret = -EIO;
5325ed259095SJosef Bacik 				break;
5326ed259095SJosef Bacik 			}
5327ed259095SJosef Bacik 		}
5328ed259095SJosef Bacik 
53293590ec58SIra Weiny 		memcpy_from_page(sctx->send_buf + sctx->send_size, page,
53303590ec58SIra Weiny 				 pg_offset, cur_len);
5331ed259095SJosef Bacik 		unlock_page(page);
533209cbfeafSKirill A. Shutemov 		put_page(page);
5333ed259095SJosef Bacik 		index++;
5334ed259095SJosef Bacik 		pg_offset = 0;
5335ed259095SJosef Bacik 		len -= cur_len;
53368c7d9fe0SOmar Sandoval 		sctx->send_size += cur_len;
5337ed259095SJosef Bacik 	}
5338521b6803SFilipe Manana 
5339ed259095SJosef Bacik 	return ret;
5340ed259095SJosef Bacik }
5341ed259095SJosef Bacik 
534231db9f7cSAlexander Block /*
534331db9f7cSAlexander Block  * Read some bytes from the current inode/file and send a write command to
534431db9f7cSAlexander Block  * user space.
534531db9f7cSAlexander Block  */
send_write(struct send_ctx * sctx,u64 offset,u32 len)534631db9f7cSAlexander Block static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
534731db9f7cSAlexander Block {
534804ab956eSJeff Mahoney 	struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
534931db9f7cSAlexander Block 	int ret = 0;
535031db9f7cSAlexander Block 	struct fs_path *p;
535131db9f7cSAlexander Block 
5352924794c9STsutomu Itoh 	p = fs_path_alloc();
535331db9f7cSAlexander Block 	if (!p)
535431db9f7cSAlexander Block 		return -ENOMEM;
535531db9f7cSAlexander Block 
535604ab956eSJeff Mahoney 	btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len);
535731db9f7cSAlexander Block 
535831db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
535931db9f7cSAlexander Block 	if (ret < 0)
536031db9f7cSAlexander Block 		goto out;
536131db9f7cSAlexander Block 
536231db9f7cSAlexander Block 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
536331db9f7cSAlexander Block 	if (ret < 0)
536431db9f7cSAlexander Block 		goto out;
536531db9f7cSAlexander Block 
536631db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
536731db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
53688c7d9fe0SOmar Sandoval 	ret = put_file_data(sctx, offset, len);
53698c7d9fe0SOmar Sandoval 	if (ret < 0)
53708c7d9fe0SOmar Sandoval 		goto out;
537131db9f7cSAlexander Block 
537231db9f7cSAlexander Block 	ret = send_cmd(sctx);
537331db9f7cSAlexander Block 
537431db9f7cSAlexander Block tlv_put_failure:
537531db9f7cSAlexander Block out:
5376924794c9STsutomu Itoh 	fs_path_free(p);
537731db9f7cSAlexander Block 	return ret;
537831db9f7cSAlexander Block }
537931db9f7cSAlexander Block 
538031db9f7cSAlexander Block /*
538131db9f7cSAlexander Block  * Send a clone command to user space.
538231db9f7cSAlexander Block  */
send_clone(struct send_ctx * sctx,u64 offset,u32 len,struct clone_root * clone_root)538331db9f7cSAlexander Block static int send_clone(struct send_ctx *sctx,
538431db9f7cSAlexander Block 		      u64 offset, u32 len,
538531db9f7cSAlexander Block 		      struct clone_root *clone_root)
538631db9f7cSAlexander Block {
538731db9f7cSAlexander Block 	int ret = 0;
538831db9f7cSAlexander Block 	struct fs_path *p;
538931db9f7cSAlexander Block 	u64 gen;
539031db9f7cSAlexander Block 
539104ab956eSJeff Mahoney 	btrfs_debug(sctx->send_root->fs_info,
539204ab956eSJeff Mahoney 		    "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu",
53934fd786e6SMisono Tomohiro 		    offset, len, clone_root->root->root_key.objectid,
53944fd786e6SMisono Tomohiro 		    clone_root->ino, clone_root->offset);
539531db9f7cSAlexander Block 
5396924794c9STsutomu Itoh 	p = fs_path_alloc();
539731db9f7cSAlexander Block 	if (!p)
539831db9f7cSAlexander Block 		return -ENOMEM;
539931db9f7cSAlexander Block 
540031db9f7cSAlexander Block 	ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
540131db9f7cSAlexander Block 	if (ret < 0)
540231db9f7cSAlexander Block 		goto out;
540331db9f7cSAlexander Block 
540431db9f7cSAlexander Block 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
540531db9f7cSAlexander Block 	if (ret < 0)
540631db9f7cSAlexander Block 		goto out;
540731db9f7cSAlexander Block 
540831db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
540931db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
541031db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
541131db9f7cSAlexander Block 
5412e938c8adSAlexander Block 	if (clone_root->root == sctx->send_root) {
54137e93f6dcSBingJing Chang 		ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen);
541431db9f7cSAlexander Block 		if (ret < 0)
541531db9f7cSAlexander Block 			goto out;
541631db9f7cSAlexander Block 		ret = get_cur_path(sctx, clone_root->ino, gen, p);
541731db9f7cSAlexander Block 	} else {
5418924794c9STsutomu Itoh 		ret = get_inode_path(clone_root->root, clone_root->ino, p);
541931db9f7cSAlexander Block 	}
542031db9f7cSAlexander Block 	if (ret < 0)
542131db9f7cSAlexander Block 		goto out;
542231db9f7cSAlexander Block 
542337b8d27dSJosef Bacik 	/*
542437b8d27dSJosef Bacik 	 * If the parent we're using has a received_uuid set then use that as
542537b8d27dSJosef Bacik 	 * our clone source as that is what we will look for when doing a
542637b8d27dSJosef Bacik 	 * receive.
542737b8d27dSJosef Bacik 	 *
542837b8d27dSJosef Bacik 	 * This covers the case that we create a snapshot off of a received
542937b8d27dSJosef Bacik 	 * subvolume and then use that as the parent and try to receive on a
543037b8d27dSJosef Bacik 	 * different host.
543137b8d27dSJosef Bacik 	 */
543237b8d27dSJosef Bacik 	if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
543337b8d27dSJosef Bacik 		TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
543437b8d27dSJosef Bacik 			     clone_root->root->root_item.received_uuid);
543537b8d27dSJosef Bacik 	else
543631db9f7cSAlexander Block 		TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
5437e938c8adSAlexander Block 			     clone_root->root->root_item.uuid);
543831db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
543909e3a288SDavid Sterba 		    btrfs_root_ctransid(&clone_root->root->root_item));
544031db9f7cSAlexander Block 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
544131db9f7cSAlexander Block 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
544231db9f7cSAlexander Block 			clone_root->offset);
544331db9f7cSAlexander Block 
544431db9f7cSAlexander Block 	ret = send_cmd(sctx);
544531db9f7cSAlexander Block 
544631db9f7cSAlexander Block tlv_put_failure:
544731db9f7cSAlexander Block out:
5448924794c9STsutomu Itoh 	fs_path_free(p);
544931db9f7cSAlexander Block 	return ret;
545031db9f7cSAlexander Block }
545131db9f7cSAlexander Block 
5452cb95e7bfSMark Fasheh /*
5453cb95e7bfSMark Fasheh  * Send an update extent command to user space.
5454cb95e7bfSMark Fasheh  */
send_update_extent(struct send_ctx * sctx,u64 offset,u32 len)5455cb95e7bfSMark Fasheh static int send_update_extent(struct send_ctx *sctx,
5456cb95e7bfSMark Fasheh 			      u64 offset, u32 len)
5457cb95e7bfSMark Fasheh {
5458cb95e7bfSMark Fasheh 	int ret = 0;
5459cb95e7bfSMark Fasheh 	struct fs_path *p;
5460cb95e7bfSMark Fasheh 
5461924794c9STsutomu Itoh 	p = fs_path_alloc();
5462cb95e7bfSMark Fasheh 	if (!p)
5463cb95e7bfSMark Fasheh 		return -ENOMEM;
5464cb95e7bfSMark Fasheh 
5465cb95e7bfSMark Fasheh 	ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
5466cb95e7bfSMark Fasheh 	if (ret < 0)
5467cb95e7bfSMark Fasheh 		goto out;
5468cb95e7bfSMark Fasheh 
5469cb95e7bfSMark Fasheh 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
5470cb95e7bfSMark Fasheh 	if (ret < 0)
5471cb95e7bfSMark Fasheh 		goto out;
5472cb95e7bfSMark Fasheh 
5473cb95e7bfSMark Fasheh 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
5474cb95e7bfSMark Fasheh 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
5475cb95e7bfSMark Fasheh 	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
5476cb95e7bfSMark Fasheh 
5477cb95e7bfSMark Fasheh 	ret = send_cmd(sctx);
5478cb95e7bfSMark Fasheh 
5479cb95e7bfSMark Fasheh tlv_put_failure:
5480cb95e7bfSMark Fasheh out:
5481924794c9STsutomu Itoh 	fs_path_free(p);
5482cb95e7bfSMark Fasheh 	return ret;
5483cb95e7bfSMark Fasheh }
5484cb95e7bfSMark Fasheh 
send_hole(struct send_ctx * sctx,u64 end)548516e7549fSJosef Bacik static int send_hole(struct send_ctx *sctx, u64 end)
548616e7549fSJosef Bacik {
548716e7549fSJosef Bacik 	struct fs_path *p = NULL;
54888c7d9fe0SOmar Sandoval 	u64 read_size = max_send_read_size(sctx);
548916e7549fSJosef Bacik 	u64 offset = sctx->cur_inode_last_extent;
549016e7549fSJosef Bacik 	int ret = 0;
549116e7549fSJosef Bacik 
549222d3151cSFilipe Manana 	/*
549322d3151cSFilipe Manana 	 * A hole that starts at EOF or beyond it. Since we do not yet support
549422d3151cSFilipe Manana 	 * fallocate (for extent preallocation and hole punching), sending a
549522d3151cSFilipe Manana 	 * write of zeroes starting at EOF or beyond would later require issuing
549622d3151cSFilipe Manana 	 * a truncate operation which would undo the write and achieve nothing.
549722d3151cSFilipe Manana 	 */
549822d3151cSFilipe Manana 	if (offset >= sctx->cur_inode_size)
549922d3151cSFilipe Manana 		return 0;
550022d3151cSFilipe Manana 
55016b1f72e5SFilipe Manana 	/*
55026b1f72e5SFilipe Manana 	 * Don't go beyond the inode's i_size due to prealloc extents that start
55036b1f72e5SFilipe Manana 	 * after the i_size.
55046b1f72e5SFilipe Manana 	 */
55056b1f72e5SFilipe Manana 	end = min_t(u64, end, sctx->cur_inode_size);
55066b1f72e5SFilipe Manana 
5507d4dfc0f4SFilipe Manana 	if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5508d4dfc0f4SFilipe Manana 		return send_update_extent(sctx, offset, end - offset);
5509d4dfc0f4SFilipe Manana 
551016e7549fSJosef Bacik 	p = fs_path_alloc();
551116e7549fSJosef Bacik 	if (!p)
551216e7549fSJosef Bacik 		return -ENOMEM;
5513c715e155SFilipe Manana 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
5514c715e155SFilipe Manana 	if (ret < 0)
5515c715e155SFilipe Manana 		goto tlv_put_failure;
551616e7549fSJosef Bacik 	while (offset < end) {
55178c7d9fe0SOmar Sandoval 		u64 len = min(end - offset, read_size);
551816e7549fSJosef Bacik 
551916e7549fSJosef Bacik 		ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
552016e7549fSJosef Bacik 		if (ret < 0)
552116e7549fSJosef Bacik 			break;
552216e7549fSJosef Bacik 		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
552316e7549fSJosef Bacik 		TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
55248c7d9fe0SOmar Sandoval 		ret = put_data_header(sctx, len);
55258c7d9fe0SOmar Sandoval 		if (ret < 0)
55268c7d9fe0SOmar Sandoval 			break;
55278c7d9fe0SOmar Sandoval 		memset(sctx->send_buf + sctx->send_size, 0, len);
55288c7d9fe0SOmar Sandoval 		sctx->send_size += len;
552916e7549fSJosef Bacik 		ret = send_cmd(sctx);
553016e7549fSJosef Bacik 		if (ret < 0)
553116e7549fSJosef Bacik 			break;
553216e7549fSJosef Bacik 		offset += len;
553316e7549fSJosef Bacik 	}
5534ffa7c429SFilipe Manana 	sctx->cur_inode_next_write_offset = offset;
553516e7549fSJosef Bacik tlv_put_failure:
553616e7549fSJosef Bacik 	fs_path_free(p);
553716e7549fSJosef Bacik 	return ret;
553816e7549fSJosef Bacik }
553916e7549fSJosef Bacik 
send_encoded_inline_extent(struct send_ctx * sctx,struct btrfs_path * path,u64 offset,u64 len)55403ea4dc5bSOmar Sandoval static int send_encoded_inline_extent(struct send_ctx *sctx,
55413ea4dc5bSOmar Sandoval 				      struct btrfs_path *path, u64 offset,
55423ea4dc5bSOmar Sandoval 				      u64 len)
55433ea4dc5bSOmar Sandoval {
55443ea4dc5bSOmar Sandoval 	struct btrfs_root *root = sctx->send_root;
55453ea4dc5bSOmar Sandoval 	struct btrfs_fs_info *fs_info = root->fs_info;
55463ea4dc5bSOmar Sandoval 	struct inode *inode;
55473ea4dc5bSOmar Sandoval 	struct fs_path *fspath;
55483ea4dc5bSOmar Sandoval 	struct extent_buffer *leaf = path->nodes[0];
55493ea4dc5bSOmar Sandoval 	struct btrfs_key key;
55503ea4dc5bSOmar Sandoval 	struct btrfs_file_extent_item *ei;
55513ea4dc5bSOmar Sandoval 	u64 ram_bytes;
55523ea4dc5bSOmar Sandoval 	size_t inline_size;
55533ea4dc5bSOmar Sandoval 	int ret;
55543ea4dc5bSOmar Sandoval 
55553ea4dc5bSOmar Sandoval 	inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
55563ea4dc5bSOmar Sandoval 	if (IS_ERR(inode))
55573ea4dc5bSOmar Sandoval 		return PTR_ERR(inode);
55583ea4dc5bSOmar Sandoval 
55593ea4dc5bSOmar Sandoval 	fspath = fs_path_alloc();
55603ea4dc5bSOmar Sandoval 	if (!fspath) {
55613ea4dc5bSOmar Sandoval 		ret = -ENOMEM;
55623ea4dc5bSOmar Sandoval 		goto out;
55633ea4dc5bSOmar Sandoval 	}
55643ea4dc5bSOmar Sandoval 
55653ea4dc5bSOmar Sandoval 	ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
55663ea4dc5bSOmar Sandoval 	if (ret < 0)
55673ea4dc5bSOmar Sandoval 		goto out;
55683ea4dc5bSOmar Sandoval 
55693ea4dc5bSOmar Sandoval 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
55703ea4dc5bSOmar Sandoval 	if (ret < 0)
55713ea4dc5bSOmar Sandoval 		goto out;
55723ea4dc5bSOmar Sandoval 
55733ea4dc5bSOmar Sandoval 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
55743ea4dc5bSOmar Sandoval 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
55753ea4dc5bSOmar Sandoval 	ram_bytes = btrfs_file_extent_ram_bytes(leaf, ei);
55763ea4dc5bSOmar Sandoval 	inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]);
55773ea4dc5bSOmar Sandoval 
55783ea4dc5bSOmar Sandoval 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
55793ea4dc5bSOmar Sandoval 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
55803ea4dc5bSOmar Sandoval 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
55813ea4dc5bSOmar Sandoval 		    min(key.offset + ram_bytes - offset, len));
55823ea4dc5bSOmar Sandoval 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN, ram_bytes);
55833ea4dc5bSOmar Sandoval 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET, offset - key.offset);
55843ea4dc5bSOmar Sandoval 	ret = btrfs_encoded_io_compression_from_extent(fs_info,
55853ea4dc5bSOmar Sandoval 				btrfs_file_extent_compression(leaf, ei));
55863ea4dc5bSOmar Sandoval 	if (ret < 0)
55873ea4dc5bSOmar Sandoval 		goto out;
55883ea4dc5bSOmar Sandoval 	TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
55893ea4dc5bSOmar Sandoval 
55903ea4dc5bSOmar Sandoval 	ret = put_data_header(sctx, inline_size);
55913ea4dc5bSOmar Sandoval 	if (ret < 0)
55923ea4dc5bSOmar Sandoval 		goto out;
55933ea4dc5bSOmar Sandoval 	read_extent_buffer(leaf, sctx->send_buf + sctx->send_size,
55943ea4dc5bSOmar Sandoval 			   btrfs_file_extent_inline_start(ei), inline_size);
55953ea4dc5bSOmar Sandoval 	sctx->send_size += inline_size;
55963ea4dc5bSOmar Sandoval 
55973ea4dc5bSOmar Sandoval 	ret = send_cmd(sctx);
55983ea4dc5bSOmar Sandoval 
55993ea4dc5bSOmar Sandoval tlv_put_failure:
56003ea4dc5bSOmar Sandoval out:
56013ea4dc5bSOmar Sandoval 	fs_path_free(fspath);
56023ea4dc5bSOmar Sandoval 	iput(inode);
56033ea4dc5bSOmar Sandoval 	return ret;
56043ea4dc5bSOmar Sandoval }
56053ea4dc5bSOmar Sandoval 
send_encoded_extent(struct send_ctx * sctx,struct btrfs_path * path,u64 offset,u64 len)56063ea4dc5bSOmar Sandoval static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
56073ea4dc5bSOmar Sandoval 			       u64 offset, u64 len)
56083ea4dc5bSOmar Sandoval {
56093ea4dc5bSOmar Sandoval 	struct btrfs_root *root = sctx->send_root;
56103ea4dc5bSOmar Sandoval 	struct btrfs_fs_info *fs_info = root->fs_info;
56113ea4dc5bSOmar Sandoval 	struct inode *inode;
56123ea4dc5bSOmar Sandoval 	struct fs_path *fspath;
56133ea4dc5bSOmar Sandoval 	struct extent_buffer *leaf = path->nodes[0];
56143ea4dc5bSOmar Sandoval 	struct btrfs_key key;
56153ea4dc5bSOmar Sandoval 	struct btrfs_file_extent_item *ei;
56163ea4dc5bSOmar Sandoval 	u64 disk_bytenr, disk_num_bytes;
56173ea4dc5bSOmar Sandoval 	u32 data_offset;
56183ea4dc5bSOmar Sandoval 	struct btrfs_cmd_header *hdr;
56193ea4dc5bSOmar Sandoval 	u32 crc;
56203ea4dc5bSOmar Sandoval 	int ret;
56213ea4dc5bSOmar Sandoval 
56223ea4dc5bSOmar Sandoval 	inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
56233ea4dc5bSOmar Sandoval 	if (IS_ERR(inode))
56243ea4dc5bSOmar Sandoval 		return PTR_ERR(inode);
56253ea4dc5bSOmar Sandoval 
56263ea4dc5bSOmar Sandoval 	fspath = fs_path_alloc();
56273ea4dc5bSOmar Sandoval 	if (!fspath) {
56283ea4dc5bSOmar Sandoval 		ret = -ENOMEM;
56293ea4dc5bSOmar Sandoval 		goto out;
56303ea4dc5bSOmar Sandoval 	}
56313ea4dc5bSOmar Sandoval 
56323ea4dc5bSOmar Sandoval 	ret = begin_cmd(sctx, BTRFS_SEND_C_ENCODED_WRITE);
56333ea4dc5bSOmar Sandoval 	if (ret < 0)
56343ea4dc5bSOmar Sandoval 		goto out;
56353ea4dc5bSOmar Sandoval 
56363ea4dc5bSOmar Sandoval 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
56373ea4dc5bSOmar Sandoval 	if (ret < 0)
56383ea4dc5bSOmar Sandoval 		goto out;
56393ea4dc5bSOmar Sandoval 
56403ea4dc5bSOmar Sandoval 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
56413ea4dc5bSOmar Sandoval 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
56423ea4dc5bSOmar Sandoval 	disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
56433ea4dc5bSOmar Sandoval 	disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, ei);
56443ea4dc5bSOmar Sandoval 
56453ea4dc5bSOmar Sandoval 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, fspath);
56463ea4dc5bSOmar Sandoval 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
56473ea4dc5bSOmar Sandoval 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_FILE_LEN,
56483ea4dc5bSOmar Sandoval 		    min(key.offset + btrfs_file_extent_num_bytes(leaf, ei) - offset,
56493ea4dc5bSOmar Sandoval 			len));
56503ea4dc5bSOmar Sandoval 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_LEN,
56513ea4dc5bSOmar Sandoval 		    btrfs_file_extent_ram_bytes(leaf, ei));
56523ea4dc5bSOmar Sandoval 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UNENCODED_OFFSET,
56533ea4dc5bSOmar Sandoval 		    offset - key.offset + btrfs_file_extent_offset(leaf, ei));
56543ea4dc5bSOmar Sandoval 	ret = btrfs_encoded_io_compression_from_extent(fs_info,
56553ea4dc5bSOmar Sandoval 				btrfs_file_extent_compression(leaf, ei));
56563ea4dc5bSOmar Sandoval 	if (ret < 0)
56573ea4dc5bSOmar Sandoval 		goto out;
56583ea4dc5bSOmar Sandoval 	TLV_PUT_U32(sctx, BTRFS_SEND_A_COMPRESSION, ret);
56593ea4dc5bSOmar Sandoval 	TLV_PUT_U32(sctx, BTRFS_SEND_A_ENCRYPTION, 0);
56603ea4dc5bSOmar Sandoval 
56613ea4dc5bSOmar Sandoval 	ret = put_data_header(sctx, disk_num_bytes);
56623ea4dc5bSOmar Sandoval 	if (ret < 0)
56633ea4dc5bSOmar Sandoval 		goto out;
56643ea4dc5bSOmar Sandoval 
56653ea4dc5bSOmar Sandoval 	/*
56663ea4dc5bSOmar Sandoval 	 * We want to do I/O directly into the send buffer, so get the next page
56673ea4dc5bSOmar Sandoval 	 * boundary in the send buffer. This means that there may be a gap
56683ea4dc5bSOmar Sandoval 	 * between the beginning of the command and the file data.
56693ea4dc5bSOmar Sandoval 	 */
5670ce394a7fSYushan Zhou 	data_offset = PAGE_ALIGN(sctx->send_size);
56713ea4dc5bSOmar Sandoval 	if (data_offset > sctx->send_max_size ||
56723ea4dc5bSOmar Sandoval 	    sctx->send_max_size - data_offset < disk_num_bytes) {
56733ea4dc5bSOmar Sandoval 		ret = -EOVERFLOW;
56743ea4dc5bSOmar Sandoval 		goto out;
56753ea4dc5bSOmar Sandoval 	}
56763ea4dc5bSOmar Sandoval 
56773ea4dc5bSOmar Sandoval 	/*
56783ea4dc5bSOmar Sandoval 	 * Note that send_buf is a mapping of send_buf_pages, so this is really
56793ea4dc5bSOmar Sandoval 	 * reading into send_buf.
56803ea4dc5bSOmar Sandoval 	 */
56813ea4dc5bSOmar Sandoval 	ret = btrfs_encoded_read_regular_fill_pages(BTRFS_I(inode), offset,
56823ea4dc5bSOmar Sandoval 						    disk_bytenr, disk_num_bytes,
56833ea4dc5bSOmar Sandoval 						    sctx->send_buf_pages +
56843ea4dc5bSOmar Sandoval 						    (data_offset >> PAGE_SHIFT));
56853ea4dc5bSOmar Sandoval 	if (ret)
56863ea4dc5bSOmar Sandoval 		goto out;
56873ea4dc5bSOmar Sandoval 
56883ea4dc5bSOmar Sandoval 	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
56893ea4dc5bSOmar Sandoval 	hdr->len = cpu_to_le32(sctx->send_size + disk_num_bytes - sizeof(*hdr));
56903ea4dc5bSOmar Sandoval 	hdr->crc = 0;
56913ea4dc5bSOmar Sandoval 	crc = btrfs_crc32c(0, sctx->send_buf, sctx->send_size);
56923ea4dc5bSOmar Sandoval 	crc = btrfs_crc32c(crc, sctx->send_buf + data_offset, disk_num_bytes);
56933ea4dc5bSOmar Sandoval 	hdr->crc = cpu_to_le32(crc);
56943ea4dc5bSOmar Sandoval 
56953ea4dc5bSOmar Sandoval 	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
56963ea4dc5bSOmar Sandoval 			&sctx->send_off);
56973ea4dc5bSOmar Sandoval 	if (!ret) {
56983ea4dc5bSOmar Sandoval 		ret = write_buf(sctx->send_filp, sctx->send_buf + data_offset,
56993ea4dc5bSOmar Sandoval 				disk_num_bytes, &sctx->send_off);
57003ea4dc5bSOmar Sandoval 	}
57013ea4dc5bSOmar Sandoval 	sctx->send_size = 0;
57023ea4dc5bSOmar Sandoval 	sctx->put_data = false;
57033ea4dc5bSOmar Sandoval 
57043ea4dc5bSOmar Sandoval tlv_put_failure:
57053ea4dc5bSOmar Sandoval out:
57063ea4dc5bSOmar Sandoval 	fs_path_free(fspath);
57073ea4dc5bSOmar Sandoval 	iput(inode);
57083ea4dc5bSOmar Sandoval 	return ret;
57093ea4dc5bSOmar Sandoval }
57103ea4dc5bSOmar Sandoval 
send_extent_data(struct send_ctx * sctx,struct btrfs_path * path,const u64 offset,const u64 len)57113ea4dc5bSOmar Sandoval static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
57123ea4dc5bSOmar Sandoval 			    const u64 offset, const u64 len)
5713d906d49fSFilipe Manana {
5714152555b3SFilipe Manana 	const u64 end = offset + len;
57153ea4dc5bSOmar Sandoval 	struct extent_buffer *leaf = path->nodes[0];
57163ea4dc5bSOmar Sandoval 	struct btrfs_file_extent_item *ei;
57178c7d9fe0SOmar Sandoval 	u64 read_size = max_send_read_size(sctx);
5718d906d49fSFilipe Manana 	u64 sent = 0;
5719d906d49fSFilipe Manana 
5720d906d49fSFilipe Manana 	if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA)
5721d906d49fSFilipe Manana 		return send_update_extent(sctx, offset, len);
5722d906d49fSFilipe Manana 
57233ea4dc5bSOmar Sandoval 	ei = btrfs_item_ptr(leaf, path->slots[0],
57243ea4dc5bSOmar Sandoval 			    struct btrfs_file_extent_item);
57253ea4dc5bSOmar Sandoval 	if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
57263ea4dc5bSOmar Sandoval 	    btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
57273ea4dc5bSOmar Sandoval 		bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
57283ea4dc5bSOmar Sandoval 				  BTRFS_FILE_EXTENT_INLINE);
57293ea4dc5bSOmar Sandoval 
57303ea4dc5bSOmar Sandoval 		/*
57313ea4dc5bSOmar Sandoval 		 * Send the compressed extent unless the compressed data is
57323ea4dc5bSOmar Sandoval 		 * larger than the decompressed data. This can happen if we're
57333ea4dc5bSOmar Sandoval 		 * not sending the entire extent, either because it has been
57343ea4dc5bSOmar Sandoval 		 * partially overwritten/truncated or because this is a part of
57353ea4dc5bSOmar Sandoval 		 * the extent that we couldn't clone in clone_range().
57363ea4dc5bSOmar Sandoval 		 */
57373ea4dc5bSOmar Sandoval 		if (is_inline &&
57383ea4dc5bSOmar Sandoval 		    btrfs_file_extent_inline_item_len(leaf,
57393ea4dc5bSOmar Sandoval 						      path->slots[0]) <= len) {
57403ea4dc5bSOmar Sandoval 			return send_encoded_inline_extent(sctx, path, offset,
57413ea4dc5bSOmar Sandoval 							  len);
57423ea4dc5bSOmar Sandoval 		} else if (!is_inline &&
57433ea4dc5bSOmar Sandoval 			   btrfs_file_extent_disk_num_bytes(leaf, ei) <= len) {
57443ea4dc5bSOmar Sandoval 			return send_encoded_extent(sctx, path, offset, len);
57453ea4dc5bSOmar Sandoval 		}
57463ea4dc5bSOmar Sandoval 	}
57473ea4dc5bSOmar Sandoval 
5748521b6803SFilipe Manana 	if (sctx->cur_inode == NULL) {
5749521b6803SFilipe Manana 		struct btrfs_root *root = sctx->send_root;
5750521b6803SFilipe Manana 
5751521b6803SFilipe Manana 		sctx->cur_inode = btrfs_iget(root->fs_info->sb, sctx->cur_ino, root);
5752521b6803SFilipe Manana 		if (IS_ERR(sctx->cur_inode)) {
5753521b6803SFilipe Manana 			int err = PTR_ERR(sctx->cur_inode);
5754521b6803SFilipe Manana 
5755521b6803SFilipe Manana 			sctx->cur_inode = NULL;
5756521b6803SFilipe Manana 			return err;
5757521b6803SFilipe Manana 		}
5758521b6803SFilipe Manana 		memset(&sctx->ra, 0, sizeof(struct file_ra_state));
5759521b6803SFilipe Manana 		file_ra_state_init(&sctx->ra, sctx->cur_inode->i_mapping);
5760152555b3SFilipe Manana 
5761152555b3SFilipe Manana 		/*
5762152555b3SFilipe Manana 		 * It's very likely there are no pages from this inode in the page
5763152555b3SFilipe Manana 		 * cache, so after reading extents and sending their data, we clean
5764152555b3SFilipe Manana 		 * the page cache to avoid trashing the page cache (adding pressure
5765152555b3SFilipe Manana 		 * to the page cache and forcing eviction of other data more useful
5766152555b3SFilipe Manana 		 * for applications).
5767152555b3SFilipe Manana 		 *
5768152555b3SFilipe Manana 		 * We decide if we should clean the page cache simply by checking
5769152555b3SFilipe Manana 		 * if the inode's mapping nrpages is 0 when we first open it, and
5770152555b3SFilipe Manana 		 * not by using something like filemap_range_has_page() before
5771152555b3SFilipe Manana 		 * reading an extent because when we ask the readahead code to
5772152555b3SFilipe Manana 		 * read a given file range, it may (and almost always does) read
5773152555b3SFilipe Manana 		 * pages from beyond that range (see the documentation for
5774152555b3SFilipe Manana 		 * page_cache_sync_readahead()), so it would not be reliable,
5775152555b3SFilipe Manana 		 * because after reading the first extent future calls to
5776152555b3SFilipe Manana 		 * filemap_range_has_page() would return true because the readahead
5777152555b3SFilipe Manana 		 * on the previous extent resulted in reading pages of the current
5778152555b3SFilipe Manana 		 * extent as well.
5779152555b3SFilipe Manana 		 */
5780152555b3SFilipe Manana 		sctx->clean_page_cache = (sctx->cur_inode->i_mapping->nrpages == 0);
5781152555b3SFilipe Manana 		sctx->page_cache_clear_start = round_down(offset, PAGE_SIZE);
5782521b6803SFilipe Manana 	}
5783521b6803SFilipe Manana 
5784d906d49fSFilipe Manana 	while (sent < len) {
57858c7d9fe0SOmar Sandoval 		u64 size = min(len - sent, read_size);
5786d906d49fSFilipe Manana 		int ret;
5787d906d49fSFilipe Manana 
5788d906d49fSFilipe Manana 		ret = send_write(sctx, offset + sent, size);
5789d906d49fSFilipe Manana 		if (ret < 0)
5790d906d49fSFilipe Manana 			return ret;
5791a9b2e0deSOmar Sandoval 		sent += size;
5792d906d49fSFilipe Manana 	}
5793152555b3SFilipe Manana 
5794ce394a7fSYushan Zhou 	if (sctx->clean_page_cache && PAGE_ALIGNED(end)) {
5795152555b3SFilipe Manana 		/*
5796152555b3SFilipe Manana 		 * Always operate only on ranges that are a multiple of the page
5797152555b3SFilipe Manana 		 * size. This is not only to prevent zeroing parts of a page in
5798152555b3SFilipe Manana 		 * the case of subpage sector size, but also to guarantee we evict
5799152555b3SFilipe Manana 		 * pages, as passing a range that is smaller than page size does
5800152555b3SFilipe Manana 		 * not evict the respective page (only zeroes part of its content).
5801152555b3SFilipe Manana 		 *
5802152555b3SFilipe Manana 		 * Always start from the end offset of the last range cleared.
5803152555b3SFilipe Manana 		 * This is because the readahead code may (and very often does)
5804152555b3SFilipe Manana 		 * reads pages beyond the range we request for readahead. So if
5805152555b3SFilipe Manana 		 * we have an extent layout like this:
5806152555b3SFilipe Manana 		 *
5807152555b3SFilipe Manana 		 *            [ extent A ] [ extent B ] [ extent C ]
5808152555b3SFilipe Manana 		 *
5809152555b3SFilipe Manana 		 * When we ask page_cache_sync_readahead() to read extent A, it
5810152555b3SFilipe Manana 		 * may also trigger reads for pages of extent B. If we are doing
5811152555b3SFilipe Manana 		 * an incremental send and extent B has not changed between the
5812152555b3SFilipe Manana 		 * parent and send snapshots, some or all of its pages may end
5813152555b3SFilipe Manana 		 * up being read and placed in the page cache. So when truncating
5814152555b3SFilipe Manana 		 * the page cache we always start from the end offset of the
5815152555b3SFilipe Manana 		 * previously processed extent up to the end of the current
5816152555b3SFilipe Manana 		 * extent.
5817152555b3SFilipe Manana 		 */
5818152555b3SFilipe Manana 		truncate_inode_pages_range(&sctx->cur_inode->i_data,
5819152555b3SFilipe Manana 					   sctx->page_cache_clear_start,
5820152555b3SFilipe Manana 					   end - 1);
5821152555b3SFilipe Manana 		sctx->page_cache_clear_start = end;
5822152555b3SFilipe Manana 	}
5823152555b3SFilipe Manana 
5824d906d49fSFilipe Manana 	return 0;
5825d906d49fSFilipe Manana }
5826d906d49fSFilipe Manana 
582789efda52SMarcos Paulo de Souza /*
582889efda52SMarcos Paulo de Souza  * Search for a capability xattr related to sctx->cur_ino. If the capability is
582989efda52SMarcos Paulo de Souza  * found, call send_set_xattr function to emit it.
583089efda52SMarcos Paulo de Souza  *
583189efda52SMarcos Paulo de Souza  * Return 0 if there isn't a capability, or when the capability was emitted
583289efda52SMarcos Paulo de Souza  * successfully, or < 0 if an error occurred.
583389efda52SMarcos Paulo de Souza  */
send_capabilities(struct send_ctx * sctx)583489efda52SMarcos Paulo de Souza static int send_capabilities(struct send_ctx *sctx)
583589efda52SMarcos Paulo de Souza {
583689efda52SMarcos Paulo de Souza 	struct fs_path *fspath = NULL;
583789efda52SMarcos Paulo de Souza 	struct btrfs_path *path;
583889efda52SMarcos Paulo de Souza 	struct btrfs_dir_item *di;
583989efda52SMarcos Paulo de Souza 	struct extent_buffer *leaf;
584089efda52SMarcos Paulo de Souza 	unsigned long data_ptr;
584189efda52SMarcos Paulo de Souza 	char *buf = NULL;
584289efda52SMarcos Paulo de Souza 	int buf_len;
584389efda52SMarcos Paulo de Souza 	int ret = 0;
584489efda52SMarcos Paulo de Souza 
584589efda52SMarcos Paulo de Souza 	path = alloc_path_for_send();
584689efda52SMarcos Paulo de Souza 	if (!path)
584789efda52SMarcos Paulo de Souza 		return -ENOMEM;
584889efda52SMarcos Paulo de Souza 
584989efda52SMarcos Paulo de Souza 	di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
585089efda52SMarcos Paulo de Souza 				XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
585189efda52SMarcos Paulo de Souza 	if (!di) {
585289efda52SMarcos Paulo de Souza 		/* There is no xattr for this inode */
585389efda52SMarcos Paulo de Souza 		goto out;
585489efda52SMarcos Paulo de Souza 	} else if (IS_ERR(di)) {
585589efda52SMarcos Paulo de Souza 		ret = PTR_ERR(di);
585689efda52SMarcos Paulo de Souza 		goto out;
585789efda52SMarcos Paulo de Souza 	}
585889efda52SMarcos Paulo de Souza 
585989efda52SMarcos Paulo de Souza 	leaf = path->nodes[0];
586089efda52SMarcos Paulo de Souza 	buf_len = btrfs_dir_data_len(leaf, di);
586189efda52SMarcos Paulo de Souza 
586289efda52SMarcos Paulo de Souza 	fspath = fs_path_alloc();
586389efda52SMarcos Paulo de Souza 	buf = kmalloc(buf_len, GFP_KERNEL);
586489efda52SMarcos Paulo de Souza 	if (!fspath || !buf) {
586589efda52SMarcos Paulo de Souza 		ret = -ENOMEM;
586689efda52SMarcos Paulo de Souza 		goto out;
586789efda52SMarcos Paulo de Souza 	}
586889efda52SMarcos Paulo de Souza 
586989efda52SMarcos Paulo de Souza 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
587089efda52SMarcos Paulo de Souza 	if (ret < 0)
587189efda52SMarcos Paulo de Souza 		goto out;
587289efda52SMarcos Paulo de Souza 
587389efda52SMarcos Paulo de Souza 	data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
587489efda52SMarcos Paulo de Souza 	read_extent_buffer(leaf, buf, data_ptr, buf_len);
587589efda52SMarcos Paulo de Souza 
587689efda52SMarcos Paulo de Souza 	ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
587789efda52SMarcos Paulo de Souza 			strlen(XATTR_NAME_CAPS), buf, buf_len);
587889efda52SMarcos Paulo de Souza out:
587989efda52SMarcos Paulo de Souza 	kfree(buf);
588089efda52SMarcos Paulo de Souza 	fs_path_free(fspath);
588189efda52SMarcos Paulo de Souza 	btrfs_free_path(path);
588289efda52SMarcos Paulo de Souza 	return ret;
588389efda52SMarcos Paulo de Souza }
588489efda52SMarcos Paulo de Souza 
clone_range(struct send_ctx * sctx,struct btrfs_path * dst_path,struct clone_root * clone_root,const u64 disk_byte,u64 data_offset,u64 offset,u64 len)58853ea4dc5bSOmar Sandoval static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
58863ea4dc5bSOmar Sandoval 		       struct clone_root *clone_root, const u64 disk_byte,
58873ea4dc5bSOmar Sandoval 		       u64 data_offset, u64 offset, u64 len)
5888d906d49fSFilipe Manana {
5889d906d49fSFilipe Manana 	struct btrfs_path *path;
5890d906d49fSFilipe Manana 	struct btrfs_key key;
5891d906d49fSFilipe Manana 	int ret;
58927e93f6dcSBingJing Chang 	struct btrfs_inode_info info;
5893431d3988SAustin Kim 	u64 clone_src_i_size = 0;
5894d906d49fSFilipe Manana 
589572610b1bSFilipe Manana 	/*
589672610b1bSFilipe Manana 	 * Prevent cloning from a zero offset with a length matching the sector
589772610b1bSFilipe Manana 	 * size because in some scenarios this will make the receiver fail.
589872610b1bSFilipe Manana 	 *
589972610b1bSFilipe Manana 	 * For example, if in the source filesystem the extent at offset 0
590072610b1bSFilipe Manana 	 * has a length of sectorsize and it was written using direct IO, then
590172610b1bSFilipe Manana 	 * it can never be an inline extent (even if compression is enabled).
590272610b1bSFilipe Manana 	 * Then this extent can be cloned in the original filesystem to a non
590372610b1bSFilipe Manana 	 * zero file offset, but it may not be possible to clone in the
590472610b1bSFilipe Manana 	 * destination filesystem because it can be inlined due to compression
590572610b1bSFilipe Manana 	 * on the destination filesystem (as the receiver's write operations are
590672610b1bSFilipe Manana 	 * always done using buffered IO). The same happens when the original
590772610b1bSFilipe Manana 	 * filesystem does not have compression enabled but the destination
590872610b1bSFilipe Manana 	 * filesystem has.
590972610b1bSFilipe Manana 	 */
591072610b1bSFilipe Manana 	if (clone_root->offset == 0 &&
591172610b1bSFilipe Manana 	    len == sctx->send_root->fs_info->sectorsize)
59123ea4dc5bSOmar Sandoval 		return send_extent_data(sctx, dst_path, offset, len);
591372610b1bSFilipe Manana 
5914d906d49fSFilipe Manana 	path = alloc_path_for_send();
5915d906d49fSFilipe Manana 	if (!path)
5916d906d49fSFilipe Manana 		return -ENOMEM;
5917d906d49fSFilipe Manana 
5918d906d49fSFilipe Manana 	/*
5919040ee612SRobbie Ko 	 * There are inodes that have extents that lie behind its i_size. Don't
5920040ee612SRobbie Ko 	 * accept clones from these extents.
5921040ee612SRobbie Ko 	 */
59227e93f6dcSBingJing Chang 	ret = get_inode_info(clone_root->root, clone_root->ino, &info);
5923040ee612SRobbie Ko 	btrfs_release_path(path);
5924040ee612SRobbie Ko 	if (ret < 0)
5925040ee612SRobbie Ko 		goto out;
59267e93f6dcSBingJing Chang 	clone_src_i_size = info.size;
5927040ee612SRobbie Ko 
5928040ee612SRobbie Ko 	/*
5929d906d49fSFilipe Manana 	 * We can't send a clone operation for the entire range if we find
5930d906d49fSFilipe Manana 	 * extent items in the respective range in the source file that
5931d906d49fSFilipe Manana 	 * refer to different extents or if we find holes.
5932d906d49fSFilipe Manana 	 * So check for that and do a mix of clone and regular write/copy
5933d906d49fSFilipe Manana 	 * operations if needed.
5934d906d49fSFilipe Manana 	 *
5935d906d49fSFilipe Manana 	 * Example:
5936d906d49fSFilipe Manana 	 *
5937d906d49fSFilipe Manana 	 * mkfs.btrfs -f /dev/sda
5938d906d49fSFilipe Manana 	 * mount /dev/sda /mnt
5939d906d49fSFilipe Manana 	 * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo
5940d906d49fSFilipe Manana 	 * cp --reflink=always /mnt/foo /mnt/bar
5941d906d49fSFilipe Manana 	 * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo
5942d906d49fSFilipe Manana 	 * btrfs subvolume snapshot -r /mnt /mnt/snap
5943d906d49fSFilipe Manana 	 *
5944d906d49fSFilipe Manana 	 * If when we send the snapshot and we are processing file bar (which
5945d906d49fSFilipe Manana 	 * has a higher inode number than foo) we blindly send a clone operation
5946d906d49fSFilipe Manana 	 * for the [0, 100K[ range from foo to bar, the receiver ends up getting
5947d906d49fSFilipe Manana 	 * a file bar that matches the content of file foo - iow, doesn't match
5948d906d49fSFilipe Manana 	 * the content from bar in the original filesystem.
5949d906d49fSFilipe Manana 	 */
5950d906d49fSFilipe Manana 	key.objectid = clone_root->ino;
5951d906d49fSFilipe Manana 	key.type = BTRFS_EXTENT_DATA_KEY;
5952d906d49fSFilipe Manana 	key.offset = clone_root->offset;
5953d906d49fSFilipe Manana 	ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
5954d906d49fSFilipe Manana 	if (ret < 0)
5955d906d49fSFilipe Manana 		goto out;
5956d906d49fSFilipe Manana 	if (ret > 0 && path->slots[0] > 0) {
5957d906d49fSFilipe Manana 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
5958d906d49fSFilipe Manana 		if (key.objectid == clone_root->ino &&
5959d906d49fSFilipe Manana 		    key.type == BTRFS_EXTENT_DATA_KEY)
5960d906d49fSFilipe Manana 			path->slots[0]--;
5961d906d49fSFilipe Manana 	}
5962d906d49fSFilipe Manana 
5963d906d49fSFilipe Manana 	while (true) {
5964d906d49fSFilipe Manana 		struct extent_buffer *leaf = path->nodes[0];
5965d906d49fSFilipe Manana 		int slot = path->slots[0];
5966d906d49fSFilipe Manana 		struct btrfs_file_extent_item *ei;
5967d906d49fSFilipe Manana 		u8 type;
5968d906d49fSFilipe Manana 		u64 ext_len;
5969d906d49fSFilipe Manana 		u64 clone_len;
5970040ee612SRobbie Ko 		u64 clone_data_offset;
5971a11452a3SFilipe Manana 		bool crossed_src_i_size = false;
5972d906d49fSFilipe Manana 
5973d906d49fSFilipe Manana 		if (slot >= btrfs_header_nritems(leaf)) {
5974d906d49fSFilipe Manana 			ret = btrfs_next_leaf(clone_root->root, path);
5975d906d49fSFilipe Manana 			if (ret < 0)
5976d906d49fSFilipe Manana 				goto out;
5977d906d49fSFilipe Manana 			else if (ret > 0)
5978d906d49fSFilipe Manana 				break;
5979d906d49fSFilipe Manana 			continue;
5980d906d49fSFilipe Manana 		}
5981d906d49fSFilipe Manana 
5982d906d49fSFilipe Manana 		btrfs_item_key_to_cpu(leaf, &key, slot);
5983d906d49fSFilipe Manana 
5984d906d49fSFilipe Manana 		/*
5985d906d49fSFilipe Manana 		 * We might have an implicit trailing hole (NO_HOLES feature
5986d906d49fSFilipe Manana 		 * enabled). We deal with it after leaving this loop.
5987d906d49fSFilipe Manana 		 */
5988d906d49fSFilipe Manana 		if (key.objectid != clone_root->ino ||
5989d906d49fSFilipe Manana 		    key.type != BTRFS_EXTENT_DATA_KEY)
5990d906d49fSFilipe Manana 			break;
5991d906d49fSFilipe Manana 
5992d906d49fSFilipe Manana 		ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5993d906d49fSFilipe Manana 		type = btrfs_file_extent_type(leaf, ei);
5994d906d49fSFilipe Manana 		if (type == BTRFS_FILE_EXTENT_INLINE) {
5995e41ca589SQu Wenruo 			ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
599609cbfeafSKirill A. Shutemov 			ext_len = PAGE_ALIGN(ext_len);
5997d906d49fSFilipe Manana 		} else {
5998d906d49fSFilipe Manana 			ext_len = btrfs_file_extent_num_bytes(leaf, ei);
5999d906d49fSFilipe Manana 		}
6000d906d49fSFilipe Manana 
6001d906d49fSFilipe Manana 		if (key.offset + ext_len <= clone_root->offset)
6002d906d49fSFilipe Manana 			goto next;
6003d906d49fSFilipe Manana 
6004d906d49fSFilipe Manana 		if (key.offset > clone_root->offset) {
6005d906d49fSFilipe Manana 			/* Implicit hole, NO_HOLES feature enabled. */
6006d906d49fSFilipe Manana 			u64 hole_len = key.offset - clone_root->offset;
6007d906d49fSFilipe Manana 
6008d906d49fSFilipe Manana 			if (hole_len > len)
6009d906d49fSFilipe Manana 				hole_len = len;
60103ea4dc5bSOmar Sandoval 			ret = send_extent_data(sctx, dst_path, offset,
60113ea4dc5bSOmar Sandoval 					       hole_len);
6012d906d49fSFilipe Manana 			if (ret < 0)
6013d906d49fSFilipe Manana 				goto out;
6014d906d49fSFilipe Manana 
6015d906d49fSFilipe Manana 			len -= hole_len;
6016d906d49fSFilipe Manana 			if (len == 0)
6017d906d49fSFilipe Manana 				break;
6018d906d49fSFilipe Manana 			offset += hole_len;
6019d906d49fSFilipe Manana 			clone_root->offset += hole_len;
6020d906d49fSFilipe Manana 			data_offset += hole_len;
6021d906d49fSFilipe Manana 		}
6022d906d49fSFilipe Manana 
6023d906d49fSFilipe Manana 		if (key.offset >= clone_root->offset + len)
6024d906d49fSFilipe Manana 			break;
6025d906d49fSFilipe Manana 
6026040ee612SRobbie Ko 		if (key.offset >= clone_src_i_size)
6027040ee612SRobbie Ko 			break;
6028040ee612SRobbie Ko 
6029a11452a3SFilipe Manana 		if (key.offset + ext_len > clone_src_i_size) {
6030040ee612SRobbie Ko 			ext_len = clone_src_i_size - key.offset;
6031a11452a3SFilipe Manana 			crossed_src_i_size = true;
6032a11452a3SFilipe Manana 		}
6033040ee612SRobbie Ko 
6034040ee612SRobbie Ko 		clone_data_offset = btrfs_file_extent_offset(leaf, ei);
6035040ee612SRobbie Ko 		if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) {
6036040ee612SRobbie Ko 			clone_root->offset = key.offset;
6037040ee612SRobbie Ko 			if (clone_data_offset < data_offset &&
6038040ee612SRobbie Ko 				clone_data_offset + ext_len > data_offset) {
6039040ee612SRobbie Ko 				u64 extent_offset;
6040040ee612SRobbie Ko 
6041040ee612SRobbie Ko 				extent_offset = data_offset - clone_data_offset;
6042040ee612SRobbie Ko 				ext_len -= extent_offset;
6043040ee612SRobbie Ko 				clone_data_offset += extent_offset;
6044040ee612SRobbie Ko 				clone_root->offset += extent_offset;
6045040ee612SRobbie Ko 			}
6046040ee612SRobbie Ko 		}
6047040ee612SRobbie Ko 
6048d906d49fSFilipe Manana 		clone_len = min_t(u64, ext_len, len);
6049d906d49fSFilipe Manana 
6050d906d49fSFilipe Manana 		if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte &&
60513c850b45SFilipe Manana 		    clone_data_offset == data_offset) {
60523c850b45SFilipe Manana 			const u64 src_end = clone_root->offset + clone_len;
60533c850b45SFilipe Manana 			const u64 sectorsize = SZ_64K;
60543c850b45SFilipe Manana 
60553c850b45SFilipe Manana 			/*
60563c850b45SFilipe Manana 			 * We can't clone the last block, when its size is not
60573c850b45SFilipe Manana 			 * sector size aligned, into the middle of a file. If we
60583c850b45SFilipe Manana 			 * do so, the receiver will get a failure (-EINVAL) when
60593c850b45SFilipe Manana 			 * trying to clone or will silently corrupt the data in
60603c850b45SFilipe Manana 			 * the destination file if it's on a kernel without the
60613c850b45SFilipe Manana 			 * fix introduced by commit ac765f83f1397646
60623c850b45SFilipe Manana 			 * ("Btrfs: fix data corruption due to cloning of eof
60633c850b45SFilipe Manana 			 * block).
60643c850b45SFilipe Manana 			 *
60653c850b45SFilipe Manana 			 * So issue a clone of the aligned down range plus a
60663c850b45SFilipe Manana 			 * regular write for the eof block, if we hit that case.
60673c850b45SFilipe Manana 			 *
60683c850b45SFilipe Manana 			 * Also, we use the maximum possible sector size, 64K,
60693c850b45SFilipe Manana 			 * because we don't know what's the sector size of the
60703c850b45SFilipe Manana 			 * filesystem that receives the stream, so we have to
60713c850b45SFilipe Manana 			 * assume the largest possible sector size.
60723c850b45SFilipe Manana 			 */
60733c850b45SFilipe Manana 			if (src_end == clone_src_i_size &&
60743c850b45SFilipe Manana 			    !IS_ALIGNED(src_end, sectorsize) &&
60753c850b45SFilipe Manana 			    offset + clone_len < sctx->cur_inode_size) {
60763c850b45SFilipe Manana 				u64 slen;
60773c850b45SFilipe Manana 
60783c850b45SFilipe Manana 				slen = ALIGN_DOWN(src_end - clone_root->offset,
60793c850b45SFilipe Manana 						  sectorsize);
60803c850b45SFilipe Manana 				if (slen > 0) {
60813c850b45SFilipe Manana 					ret = send_clone(sctx, offset, slen,
60823c850b45SFilipe Manana 							 clone_root);
60833c850b45SFilipe Manana 					if (ret < 0)
60843c850b45SFilipe Manana 						goto out;
60853c850b45SFilipe Manana 				}
60863ea4dc5bSOmar Sandoval 				ret = send_extent_data(sctx, dst_path,
60873ea4dc5bSOmar Sandoval 						       offset + slen,
60883c850b45SFilipe Manana 						       clone_len - slen);
60893c850b45SFilipe Manana 			} else {
60903c850b45SFilipe Manana 				ret = send_clone(sctx, offset, clone_len,
60913c850b45SFilipe Manana 						 clone_root);
60923c850b45SFilipe Manana 			}
6093a11452a3SFilipe Manana 		} else if (crossed_src_i_size && clone_len < len) {
6094a11452a3SFilipe Manana 			/*
6095a11452a3SFilipe Manana 			 * If we are at i_size of the clone source inode and we
6096a11452a3SFilipe Manana 			 * can not clone from it, terminate the loop. This is
6097a11452a3SFilipe Manana 			 * to avoid sending two write operations, one with a
6098a11452a3SFilipe Manana 			 * length matching clone_len and the final one after
6099a11452a3SFilipe Manana 			 * this loop with a length of len - clone_len.
6100a11452a3SFilipe Manana 			 *
6101a11452a3SFilipe Manana 			 * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED
6102a11452a3SFilipe Manana 			 * was passed to the send ioctl), this helps avoid
6103a11452a3SFilipe Manana 			 * sending an encoded write for an offset that is not
6104a11452a3SFilipe Manana 			 * sector size aligned, in case the i_size of the source
6105a11452a3SFilipe Manana 			 * inode is not sector size aligned. That will make the
6106a11452a3SFilipe Manana 			 * receiver fallback to decompression of the data and
6107a11452a3SFilipe Manana 			 * writing it using regular buffered IO, therefore while
6108a11452a3SFilipe Manana 			 * not incorrect, it's not optimal due decompression and
6109a11452a3SFilipe Manana 			 * possible re-compression at the receiver.
6110a11452a3SFilipe Manana 			 */
6111a11452a3SFilipe Manana 			break;
61123c850b45SFilipe Manana 		} else {
61133ea4dc5bSOmar Sandoval 			ret = send_extent_data(sctx, dst_path, offset,
61143ea4dc5bSOmar Sandoval 					       clone_len);
61153c850b45SFilipe Manana 		}
6116d906d49fSFilipe Manana 
6117d906d49fSFilipe Manana 		if (ret < 0)
6118d906d49fSFilipe Manana 			goto out;
6119d906d49fSFilipe Manana 
6120d906d49fSFilipe Manana 		len -= clone_len;
6121d906d49fSFilipe Manana 		if (len == 0)
6122d906d49fSFilipe Manana 			break;
6123d906d49fSFilipe Manana 		offset += clone_len;
6124d906d49fSFilipe Manana 		clone_root->offset += clone_len;
6125518837e6SFilipe Manana 
6126518837e6SFilipe Manana 		/*
6127518837e6SFilipe Manana 		 * If we are cloning from the file we are currently processing,
6128518837e6SFilipe Manana 		 * and using the send root as the clone root, we must stop once
6129518837e6SFilipe Manana 		 * the current clone offset reaches the current eof of the file
6130518837e6SFilipe Manana 		 * at the receiver, otherwise we would issue an invalid clone
6131518837e6SFilipe Manana 		 * operation (source range going beyond eof) and cause the
6132518837e6SFilipe Manana 		 * receiver to fail. So if we reach the current eof, bail out
6133518837e6SFilipe Manana 		 * and fallback to a regular write.
6134518837e6SFilipe Manana 		 */
6135518837e6SFilipe Manana 		if (clone_root->root == sctx->send_root &&
6136518837e6SFilipe Manana 		    clone_root->ino == sctx->cur_ino &&
6137518837e6SFilipe Manana 		    clone_root->offset >= sctx->cur_inode_next_write_offset)
6138518837e6SFilipe Manana 			break;
6139518837e6SFilipe Manana 
6140d906d49fSFilipe Manana 		data_offset += clone_len;
6141d906d49fSFilipe Manana next:
6142d906d49fSFilipe Manana 		path->slots[0]++;
6143d906d49fSFilipe Manana 	}
6144d906d49fSFilipe Manana 
6145d906d49fSFilipe Manana 	if (len > 0)
61463ea4dc5bSOmar Sandoval 		ret = send_extent_data(sctx, dst_path, offset, len);
6147d906d49fSFilipe Manana 	else
6148d906d49fSFilipe Manana 		ret = 0;
6149d906d49fSFilipe Manana out:
6150d906d49fSFilipe Manana 	btrfs_free_path(path);
6151d906d49fSFilipe Manana 	return ret;
6152d906d49fSFilipe Manana }
6153d906d49fSFilipe Manana 
send_write_or_clone(struct send_ctx * sctx,struct btrfs_path * path,struct btrfs_key * key,struct clone_root * clone_root)615431db9f7cSAlexander Block static int send_write_or_clone(struct send_ctx *sctx,
615531db9f7cSAlexander Block 			       struct btrfs_path *path,
615631db9f7cSAlexander Block 			       struct btrfs_key *key,
615731db9f7cSAlexander Block 			       struct clone_root *clone_root)
615831db9f7cSAlexander Block {
615931db9f7cSAlexander Block 	int ret = 0;
616031db9f7cSAlexander Block 	u64 offset = key->offset;
6161c9a949afSOmar Sandoval 	u64 end;
61621bca9776SDavid Sterba 	u64 bs = sctx->send_root->fs_info->sectorsize;
6163f0e86587SFilipe Manana 	struct btrfs_file_extent_item *ei;
6164f0e86587SFilipe Manana 	u64 disk_byte;
6165f0e86587SFilipe Manana 	u64 data_offset;
6166f0e86587SFilipe Manana 	u64 num_bytes;
6167f0e86587SFilipe Manana 	struct btrfs_inode_info info = { 0 };
616831db9f7cSAlexander Block 
6169c9a949afSOmar Sandoval 	end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size);
6170c9a949afSOmar Sandoval 	if (offset >= end)
6171c9a949afSOmar Sandoval 		return 0;
617231db9f7cSAlexander Block 
6173f0e86587SFilipe Manana 	num_bytes = end - offset;
6174d906d49fSFilipe Manana 
6175f0e86587SFilipe Manana 	if (!clone_root)
6176f0e86587SFilipe Manana 		goto write_data;
6177f0e86587SFilipe Manana 
6178f0e86587SFilipe Manana 	if (IS_ALIGNED(end, bs))
6179f0e86587SFilipe Manana 		goto clone_data;
6180f0e86587SFilipe Manana 
6181f0e86587SFilipe Manana 	/*
6182f0e86587SFilipe Manana 	 * If the extent end is not aligned, we can clone if the extent ends at
6183f0e86587SFilipe Manana 	 * the i_size of the inode and the clone range ends at the i_size of the
6184f0e86587SFilipe Manana 	 * source inode, otherwise the clone operation fails with -EINVAL.
6185f0e86587SFilipe Manana 	 */
6186f0e86587SFilipe Manana 	if (end != sctx->cur_inode_size)
6187f0e86587SFilipe Manana 		goto write_data;
6188f0e86587SFilipe Manana 
6189f0e86587SFilipe Manana 	ret = get_inode_info(clone_root->root, clone_root->ino, &info);
6190f0e86587SFilipe Manana 	if (ret < 0)
6191f0e86587SFilipe Manana 		return ret;
6192f0e86587SFilipe Manana 
6193*521cfe23SFilipe Manana 	if (clone_root->offset + num_bytes == info.size) {
6194*521cfe23SFilipe Manana 		/*
6195*521cfe23SFilipe Manana 		 * The final size of our file matches the end offset, but it may
6196*521cfe23SFilipe Manana 		 * be that its current size is larger, so we have to truncate it
6197*521cfe23SFilipe Manana 		 * to any value between the start offset of the range and the
6198*521cfe23SFilipe Manana 		 * final i_size, otherwise the clone operation is invalid
6199*521cfe23SFilipe Manana 		 * because it's unaligned and it ends before the current EOF.
6200*521cfe23SFilipe Manana 		 * We do this truncate to the final i_size when we finish
6201*521cfe23SFilipe Manana 		 * processing the inode, but it's too late by then. And here we
6202*521cfe23SFilipe Manana 		 * truncate to the start offset of the range because it's always
6203*521cfe23SFilipe Manana 		 * sector size aligned while if it were the final i_size it
6204*521cfe23SFilipe Manana 		 * would result in dirtying part of a page, filling part of a
6205*521cfe23SFilipe Manana 		 * page with zeroes and then having the clone operation at the
6206*521cfe23SFilipe Manana 		 * receiver trigger IO and wait for it due to the dirty page.
6207*521cfe23SFilipe Manana 		 */
6208*521cfe23SFilipe Manana 		if (sctx->parent_root != NULL) {
6209*521cfe23SFilipe Manana 			ret = send_truncate(sctx, sctx->cur_ino,
6210*521cfe23SFilipe Manana 					    sctx->cur_inode_gen, offset);
6211*521cfe23SFilipe Manana 			if (ret < 0)
6212*521cfe23SFilipe Manana 				return ret;
6213*521cfe23SFilipe Manana 		}
6214f0e86587SFilipe Manana 		goto clone_data;
6215*521cfe23SFilipe Manana 	}
6216f0e86587SFilipe Manana 
6217f0e86587SFilipe Manana write_data:
6218f0e86587SFilipe Manana 	ret = send_extent_data(sctx, path, offset, num_bytes);
6219f0e86587SFilipe Manana 	sctx->cur_inode_next_write_offset = end;
6220f0e86587SFilipe Manana 	return ret;
6221f0e86587SFilipe Manana 
6222f0e86587SFilipe Manana clone_data:
6223c9a949afSOmar Sandoval 	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
6224c9a949afSOmar Sandoval 			    struct btrfs_file_extent_item);
6225d906d49fSFilipe Manana 	disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei);
6226d906d49fSFilipe Manana 	data_offset = btrfs_file_extent_offset(path->nodes[0], ei);
6227f0e86587SFilipe Manana 	ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset,
6228f0e86587SFilipe Manana 			  num_bytes);
6229c9a949afSOmar Sandoval 	sctx->cur_inode_next_write_offset = end;
623031db9f7cSAlexander Block 	return ret;
623131db9f7cSAlexander Block }
623231db9f7cSAlexander Block 
is_extent_unchanged(struct send_ctx * sctx,struct btrfs_path * left_path,struct btrfs_key * ekey)623331db9f7cSAlexander Block static int is_extent_unchanged(struct send_ctx *sctx,
623431db9f7cSAlexander Block 			       struct btrfs_path *left_path,
623531db9f7cSAlexander Block 			       struct btrfs_key *ekey)
623631db9f7cSAlexander Block {
623731db9f7cSAlexander Block 	int ret = 0;
623831db9f7cSAlexander Block 	struct btrfs_key key;
623931db9f7cSAlexander Block 	struct btrfs_path *path = NULL;
624031db9f7cSAlexander Block 	struct extent_buffer *eb;
624131db9f7cSAlexander Block 	int slot;
624231db9f7cSAlexander Block 	struct btrfs_key found_key;
624331db9f7cSAlexander Block 	struct btrfs_file_extent_item *ei;
624431db9f7cSAlexander Block 	u64 left_disknr;
624531db9f7cSAlexander Block 	u64 right_disknr;
624631db9f7cSAlexander Block 	u64 left_offset;
624731db9f7cSAlexander Block 	u64 right_offset;
624831db9f7cSAlexander Block 	u64 left_offset_fixed;
624931db9f7cSAlexander Block 	u64 left_len;
625031db9f7cSAlexander Block 	u64 right_len;
625174dd17fbSChris Mason 	u64 left_gen;
625274dd17fbSChris Mason 	u64 right_gen;
625331db9f7cSAlexander Block 	u8 left_type;
625431db9f7cSAlexander Block 	u8 right_type;
625531db9f7cSAlexander Block 
625631db9f7cSAlexander Block 	path = alloc_path_for_send();
625731db9f7cSAlexander Block 	if (!path)
625831db9f7cSAlexander Block 		return -ENOMEM;
625931db9f7cSAlexander Block 
626031db9f7cSAlexander Block 	eb = left_path->nodes[0];
626131db9f7cSAlexander Block 	slot = left_path->slots[0];
626231db9f7cSAlexander Block 	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
626331db9f7cSAlexander Block 	left_type = btrfs_file_extent_type(eb, ei);
626431db9f7cSAlexander Block 
626531db9f7cSAlexander Block 	if (left_type != BTRFS_FILE_EXTENT_REG) {
626631db9f7cSAlexander Block 		ret = 0;
626731db9f7cSAlexander Block 		goto out;
626831db9f7cSAlexander Block 	}
626974dd17fbSChris Mason 	left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
627074dd17fbSChris Mason 	left_len = btrfs_file_extent_num_bytes(eb, ei);
627174dd17fbSChris Mason 	left_offset = btrfs_file_extent_offset(eb, ei);
627274dd17fbSChris Mason 	left_gen = btrfs_file_extent_generation(eb, ei);
627331db9f7cSAlexander Block 
627431db9f7cSAlexander Block 	/*
627531db9f7cSAlexander Block 	 * Following comments will refer to these graphics. L is the left
627631db9f7cSAlexander Block 	 * extents which we are checking at the moment. 1-8 are the right
627731db9f7cSAlexander Block 	 * extents that we iterate.
627831db9f7cSAlexander Block 	 *
627931db9f7cSAlexander Block 	 *       |-----L-----|
628031db9f7cSAlexander Block 	 * |-1-|-2a-|-3-|-4-|-5-|-6-|
628131db9f7cSAlexander Block 	 *
628231db9f7cSAlexander Block 	 *       |-----L-----|
628331db9f7cSAlexander Block 	 * |--1--|-2b-|...(same as above)
628431db9f7cSAlexander Block 	 *
628531db9f7cSAlexander Block 	 * Alternative situation. Happens on files where extents got split.
628631db9f7cSAlexander Block 	 *       |-----L-----|
628731db9f7cSAlexander Block 	 * |-----------7-----------|-6-|
628831db9f7cSAlexander Block 	 *
628931db9f7cSAlexander Block 	 * Alternative situation. Happens on files which got larger.
629031db9f7cSAlexander Block 	 *       |-----L-----|
629131db9f7cSAlexander Block 	 * |-8-|
629231db9f7cSAlexander Block 	 * Nothing follows after 8.
629331db9f7cSAlexander Block 	 */
629431db9f7cSAlexander Block 
629531db9f7cSAlexander Block 	key.objectid = ekey->objectid;
629631db9f7cSAlexander Block 	key.type = BTRFS_EXTENT_DATA_KEY;
629731db9f7cSAlexander Block 	key.offset = ekey->offset;
629831db9f7cSAlexander Block 	ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
629931db9f7cSAlexander Block 	if (ret < 0)
630031db9f7cSAlexander Block 		goto out;
630131db9f7cSAlexander Block 	if (ret) {
630231db9f7cSAlexander Block 		ret = 0;
630331db9f7cSAlexander Block 		goto out;
630431db9f7cSAlexander Block 	}
630531db9f7cSAlexander Block 
630631db9f7cSAlexander Block 	/*
630731db9f7cSAlexander Block 	 * Handle special case where the right side has no extents at all.
630831db9f7cSAlexander Block 	 */
630931db9f7cSAlexander Block 	eb = path->nodes[0];
631031db9f7cSAlexander Block 	slot = path->slots[0];
631131db9f7cSAlexander Block 	btrfs_item_key_to_cpu(eb, &found_key, slot);
631231db9f7cSAlexander Block 	if (found_key.objectid != key.objectid ||
631331db9f7cSAlexander Block 	    found_key.type != key.type) {
631457cfd462SJosef Bacik 		/* If we're a hole then just pretend nothing changed */
631557cfd462SJosef Bacik 		ret = (left_disknr) ? 0 : 1;
631631db9f7cSAlexander Block 		goto out;
631731db9f7cSAlexander Block 	}
631831db9f7cSAlexander Block 
631931db9f7cSAlexander Block 	/*
632031db9f7cSAlexander Block 	 * We're now on 2a, 2b or 7.
632131db9f7cSAlexander Block 	 */
632231db9f7cSAlexander Block 	key = found_key;
632331db9f7cSAlexander Block 	while (key.offset < ekey->offset + left_len) {
632431db9f7cSAlexander Block 		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
632531db9f7cSAlexander Block 		right_type = btrfs_file_extent_type(eb, ei);
6326e1cbfd7bSFilipe Manana 		if (right_type != BTRFS_FILE_EXTENT_REG &&
6327e1cbfd7bSFilipe Manana 		    right_type != BTRFS_FILE_EXTENT_INLINE) {
632831db9f7cSAlexander Block 			ret = 0;
632931db9f7cSAlexander Block 			goto out;
633031db9f7cSAlexander Block 		}
633131db9f7cSAlexander Block 
6332e1cbfd7bSFilipe Manana 		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
6333e41ca589SQu Wenruo 			right_len = btrfs_file_extent_ram_bytes(eb, ei);
6334e1cbfd7bSFilipe Manana 			right_len = PAGE_ALIGN(right_len);
6335e1cbfd7bSFilipe Manana 		} else {
6336007d31f7SJosef Bacik 			right_len = btrfs_file_extent_num_bytes(eb, ei);
6337e1cbfd7bSFilipe Manana 		}
6338007d31f7SJosef Bacik 
633931db9f7cSAlexander Block 		/*
634031db9f7cSAlexander Block 		 * Are we at extent 8? If yes, we know the extent is changed.
634131db9f7cSAlexander Block 		 * This may only happen on the first iteration.
634231db9f7cSAlexander Block 		 */
6343d8347fa4SAlexander Block 		if (found_key.offset + right_len <= ekey->offset) {
634457cfd462SJosef Bacik 			/* If we're a hole just pretend nothing changed */
634557cfd462SJosef Bacik 			ret = (left_disknr) ? 0 : 1;
634631db9f7cSAlexander Block 			goto out;
634731db9f7cSAlexander Block 		}
634831db9f7cSAlexander Block 
6349e1cbfd7bSFilipe Manana 		/*
6350e1cbfd7bSFilipe Manana 		 * We just wanted to see if when we have an inline extent, what
6351e1cbfd7bSFilipe Manana 		 * follows it is a regular extent (wanted to check the above
6352e1cbfd7bSFilipe Manana 		 * condition for inline extents too). This should normally not
6353e1cbfd7bSFilipe Manana 		 * happen but it's possible for example when we have an inline
6354e1cbfd7bSFilipe Manana 		 * compressed extent representing data with a size matching
6355e1cbfd7bSFilipe Manana 		 * the page size (currently the same as sector size).
6356e1cbfd7bSFilipe Manana 		 */
6357e1cbfd7bSFilipe Manana 		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
6358e1cbfd7bSFilipe Manana 			ret = 0;
6359e1cbfd7bSFilipe Manana 			goto out;
6360e1cbfd7bSFilipe Manana 		}
6361e1cbfd7bSFilipe Manana 
636224e52b11SFilipe Manana 		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
636324e52b11SFilipe Manana 		right_offset = btrfs_file_extent_offset(eb, ei);
636424e52b11SFilipe Manana 		right_gen = btrfs_file_extent_generation(eb, ei);
636524e52b11SFilipe Manana 
636631db9f7cSAlexander Block 		left_offset_fixed = left_offset;
636731db9f7cSAlexander Block 		if (key.offset < ekey->offset) {
636831db9f7cSAlexander Block 			/* Fix the right offset for 2a and 7. */
636931db9f7cSAlexander Block 			right_offset += ekey->offset - key.offset;
637031db9f7cSAlexander Block 		} else {
637131db9f7cSAlexander Block 			/* Fix the left offset for all behind 2a and 2b */
637231db9f7cSAlexander Block 			left_offset_fixed += key.offset - ekey->offset;
637331db9f7cSAlexander Block 		}
637431db9f7cSAlexander Block 
637531db9f7cSAlexander Block 		/*
637631db9f7cSAlexander Block 		 * Check if we have the same extent.
637731db9f7cSAlexander Block 		 */
63783954096dSAlexander Block 		if (left_disknr != right_disknr ||
637974dd17fbSChris Mason 		    left_offset_fixed != right_offset ||
638074dd17fbSChris Mason 		    left_gen != right_gen) {
638131db9f7cSAlexander Block 			ret = 0;
638231db9f7cSAlexander Block 			goto out;
638331db9f7cSAlexander Block 		}
638431db9f7cSAlexander Block 
638531db9f7cSAlexander Block 		/*
638631db9f7cSAlexander Block 		 * Go to the next extent.
638731db9f7cSAlexander Block 		 */
638831db9f7cSAlexander Block 		ret = btrfs_next_item(sctx->parent_root, path);
638931db9f7cSAlexander Block 		if (ret < 0)
639031db9f7cSAlexander Block 			goto out;
639131db9f7cSAlexander Block 		if (!ret) {
639231db9f7cSAlexander Block 			eb = path->nodes[0];
639331db9f7cSAlexander Block 			slot = path->slots[0];
639431db9f7cSAlexander Block 			btrfs_item_key_to_cpu(eb, &found_key, slot);
639531db9f7cSAlexander Block 		}
639631db9f7cSAlexander Block 		if (ret || found_key.objectid != key.objectid ||
639731db9f7cSAlexander Block 		    found_key.type != key.type) {
639831db9f7cSAlexander Block 			key.offset += right_len;
639931db9f7cSAlexander Block 			break;
640031db9f7cSAlexander Block 		}
6401adaa4b8eSJan Schmidt 		if (found_key.offset != key.offset + right_len) {
6402adaa4b8eSJan Schmidt 			ret = 0;
6403adaa4b8eSJan Schmidt 			goto out;
640431db9f7cSAlexander Block 		}
640531db9f7cSAlexander Block 		key = found_key;
640631db9f7cSAlexander Block 	}
640731db9f7cSAlexander Block 
640831db9f7cSAlexander Block 	/*
640931db9f7cSAlexander Block 	 * We're now behind the left extent (treat as unchanged) or at the end
641031db9f7cSAlexander Block 	 * of the right side (treat as changed).
641131db9f7cSAlexander Block 	 */
641231db9f7cSAlexander Block 	if (key.offset >= ekey->offset + left_len)
641331db9f7cSAlexander Block 		ret = 1;
641431db9f7cSAlexander Block 	else
641531db9f7cSAlexander Block 		ret = 0;
641631db9f7cSAlexander Block 
641731db9f7cSAlexander Block 
641831db9f7cSAlexander Block out:
641931db9f7cSAlexander Block 	btrfs_free_path(path);
642031db9f7cSAlexander Block 	return ret;
642131db9f7cSAlexander Block }
642231db9f7cSAlexander Block 
get_last_extent(struct send_ctx * sctx,u64 offset)642316e7549fSJosef Bacik static int get_last_extent(struct send_ctx *sctx, u64 offset)
642416e7549fSJosef Bacik {
642516e7549fSJosef Bacik 	struct btrfs_path *path;
642616e7549fSJosef Bacik 	struct btrfs_root *root = sctx->send_root;
642716e7549fSJosef Bacik 	struct btrfs_key key;
642816e7549fSJosef Bacik 	int ret;
642916e7549fSJosef Bacik 
643016e7549fSJosef Bacik 	path = alloc_path_for_send();
643116e7549fSJosef Bacik 	if (!path)
643216e7549fSJosef Bacik 		return -ENOMEM;
643316e7549fSJosef Bacik 
643416e7549fSJosef Bacik 	sctx->cur_inode_last_extent = 0;
643516e7549fSJosef Bacik 
643616e7549fSJosef Bacik 	key.objectid = sctx->cur_ino;
643716e7549fSJosef Bacik 	key.type = BTRFS_EXTENT_DATA_KEY;
643816e7549fSJosef Bacik 	key.offset = offset;
643916e7549fSJosef Bacik 	ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
644016e7549fSJosef Bacik 	if (ret < 0)
644116e7549fSJosef Bacik 		goto out;
644216e7549fSJosef Bacik 	ret = 0;
644316e7549fSJosef Bacik 	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
644416e7549fSJosef Bacik 	if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
644516e7549fSJosef Bacik 		goto out;
644616e7549fSJosef Bacik 
6447a5eeb3d1SFilipe Manana 	sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
644816e7549fSJosef Bacik out:
644916e7549fSJosef Bacik 	btrfs_free_path(path);
645016e7549fSJosef Bacik 	return ret;
645116e7549fSJosef Bacik }
645216e7549fSJosef Bacik 
range_is_hole_in_parent(struct send_ctx * sctx,const u64 start,const u64 end)645382bfb2e7SFilipe Manana static int range_is_hole_in_parent(struct send_ctx *sctx,
645482bfb2e7SFilipe Manana 				   const u64 start,
645582bfb2e7SFilipe Manana 				   const u64 end)
645682bfb2e7SFilipe Manana {
645782bfb2e7SFilipe Manana 	struct btrfs_path *path;
645882bfb2e7SFilipe Manana 	struct btrfs_key key;
645982bfb2e7SFilipe Manana 	struct btrfs_root *root = sctx->parent_root;
646082bfb2e7SFilipe Manana 	u64 search_start = start;
646182bfb2e7SFilipe Manana 	int ret;
646282bfb2e7SFilipe Manana 
646382bfb2e7SFilipe Manana 	path = alloc_path_for_send();
646482bfb2e7SFilipe Manana 	if (!path)
646582bfb2e7SFilipe Manana 		return -ENOMEM;
646682bfb2e7SFilipe Manana 
646782bfb2e7SFilipe Manana 	key.objectid = sctx->cur_ino;
646882bfb2e7SFilipe Manana 	key.type = BTRFS_EXTENT_DATA_KEY;
646982bfb2e7SFilipe Manana 	key.offset = search_start;
647082bfb2e7SFilipe Manana 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
647182bfb2e7SFilipe Manana 	if (ret < 0)
647282bfb2e7SFilipe Manana 		goto out;
647382bfb2e7SFilipe Manana 	if (ret > 0 && path->slots[0] > 0)
647482bfb2e7SFilipe Manana 		path->slots[0]--;
647582bfb2e7SFilipe Manana 
647682bfb2e7SFilipe Manana 	while (search_start < end) {
647782bfb2e7SFilipe Manana 		struct extent_buffer *leaf = path->nodes[0];
647882bfb2e7SFilipe Manana 		int slot = path->slots[0];
647982bfb2e7SFilipe Manana 		struct btrfs_file_extent_item *fi;
648082bfb2e7SFilipe Manana 		u64 extent_end;
648182bfb2e7SFilipe Manana 
648282bfb2e7SFilipe Manana 		if (slot >= btrfs_header_nritems(leaf)) {
648382bfb2e7SFilipe Manana 			ret = btrfs_next_leaf(root, path);
648482bfb2e7SFilipe Manana 			if (ret < 0)
648582bfb2e7SFilipe Manana 				goto out;
648682bfb2e7SFilipe Manana 			else if (ret > 0)
648782bfb2e7SFilipe Manana 				break;
648882bfb2e7SFilipe Manana 			continue;
648982bfb2e7SFilipe Manana 		}
649082bfb2e7SFilipe Manana 
649182bfb2e7SFilipe Manana 		btrfs_item_key_to_cpu(leaf, &key, slot);
649282bfb2e7SFilipe Manana 		if (key.objectid < sctx->cur_ino ||
649382bfb2e7SFilipe Manana 		    key.type < BTRFS_EXTENT_DATA_KEY)
649482bfb2e7SFilipe Manana 			goto next;
649582bfb2e7SFilipe Manana 		if (key.objectid > sctx->cur_ino ||
649682bfb2e7SFilipe Manana 		    key.type > BTRFS_EXTENT_DATA_KEY ||
649782bfb2e7SFilipe Manana 		    key.offset >= end)
649882bfb2e7SFilipe Manana 			break;
649982bfb2e7SFilipe Manana 
650082bfb2e7SFilipe Manana 		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
6501a5eeb3d1SFilipe Manana 		extent_end = btrfs_file_extent_end(path);
650282bfb2e7SFilipe Manana 		if (extent_end <= start)
650382bfb2e7SFilipe Manana 			goto next;
650482bfb2e7SFilipe Manana 		if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
650582bfb2e7SFilipe Manana 			search_start = extent_end;
650682bfb2e7SFilipe Manana 			goto next;
650782bfb2e7SFilipe Manana 		}
650882bfb2e7SFilipe Manana 		ret = 0;
650982bfb2e7SFilipe Manana 		goto out;
651082bfb2e7SFilipe Manana next:
651182bfb2e7SFilipe Manana 		path->slots[0]++;
651282bfb2e7SFilipe Manana 	}
651382bfb2e7SFilipe Manana 	ret = 1;
651482bfb2e7SFilipe Manana out:
651582bfb2e7SFilipe Manana 	btrfs_free_path(path);
651682bfb2e7SFilipe Manana 	return ret;
651782bfb2e7SFilipe Manana }
651882bfb2e7SFilipe Manana 
maybe_send_hole(struct send_ctx * sctx,struct btrfs_path * path,struct btrfs_key * key)651916e7549fSJosef Bacik static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
652016e7549fSJosef Bacik 			   struct btrfs_key *key)
652116e7549fSJosef Bacik {
652216e7549fSJosef Bacik 	int ret = 0;
652316e7549fSJosef Bacik 
652416e7549fSJosef Bacik 	if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
652516e7549fSJosef Bacik 		return 0;
652616e7549fSJosef Bacik 
652716e7549fSJosef Bacik 	if (sctx->cur_inode_last_extent == (u64)-1) {
652816e7549fSJosef Bacik 		ret = get_last_extent(sctx, key->offset - 1);
652916e7549fSJosef Bacik 		if (ret)
653016e7549fSJosef Bacik 			return ret;
653116e7549fSJosef Bacik 	}
653216e7549fSJosef Bacik 
6533bf54f412SFilipe David Borba Manana 	if (path->slots[0] == 0 &&
6534bf54f412SFilipe David Borba Manana 	    sctx->cur_inode_last_extent < key->offset) {
6535bf54f412SFilipe David Borba Manana 		/*
6536bf54f412SFilipe David Borba Manana 		 * We might have skipped entire leafs that contained only
6537bf54f412SFilipe David Borba Manana 		 * file extent items for our current inode. These leafs have
6538bf54f412SFilipe David Borba Manana 		 * a generation number smaller (older) than the one in the
6539bf54f412SFilipe David Borba Manana 		 * current leaf and the leaf our last extent came from, and
6540bf54f412SFilipe David Borba Manana 		 * are located between these 2 leafs.
6541bf54f412SFilipe David Borba Manana 		 */
6542bf54f412SFilipe David Borba Manana 		ret = get_last_extent(sctx, key->offset - 1);
6543bf54f412SFilipe David Borba Manana 		if (ret)
6544bf54f412SFilipe David Borba Manana 			return ret;
6545bf54f412SFilipe David Borba Manana 	}
6546bf54f412SFilipe David Borba Manana 
654782bfb2e7SFilipe Manana 	if (sctx->cur_inode_last_extent < key->offset) {
654882bfb2e7SFilipe Manana 		ret = range_is_hole_in_parent(sctx,
654982bfb2e7SFilipe Manana 					      sctx->cur_inode_last_extent,
655082bfb2e7SFilipe Manana 					      key->offset);
655182bfb2e7SFilipe Manana 		if (ret < 0)
655282bfb2e7SFilipe Manana 			return ret;
655382bfb2e7SFilipe Manana 		else if (ret == 0)
655416e7549fSJosef Bacik 			ret = send_hole(sctx, key->offset);
655582bfb2e7SFilipe Manana 		else
655682bfb2e7SFilipe Manana 			ret = 0;
655782bfb2e7SFilipe Manana 	}
6558a5eeb3d1SFilipe Manana 	sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
655916e7549fSJosef Bacik 	return ret;
656016e7549fSJosef Bacik }
656116e7549fSJosef Bacik 
process_extent(struct send_ctx * sctx,struct btrfs_path * path,struct btrfs_key * key)656231db9f7cSAlexander Block static int process_extent(struct send_ctx *sctx,
656331db9f7cSAlexander Block 			  struct btrfs_path *path,
656431db9f7cSAlexander Block 			  struct btrfs_key *key)
656531db9f7cSAlexander Block {
656631db9f7cSAlexander Block 	struct clone_root *found_clone = NULL;
656757cfd462SJosef Bacik 	int ret = 0;
656831db9f7cSAlexander Block 
656931db9f7cSAlexander Block 	if (S_ISLNK(sctx->cur_inode_mode))
657031db9f7cSAlexander Block 		return 0;
657131db9f7cSAlexander Block 
657231db9f7cSAlexander Block 	if (sctx->parent_root && !sctx->cur_inode_new) {
657331db9f7cSAlexander Block 		ret = is_extent_unchanged(sctx, path, key);
657431db9f7cSAlexander Block 		if (ret < 0)
657531db9f7cSAlexander Block 			goto out;
657631db9f7cSAlexander Block 		if (ret) {
657731db9f7cSAlexander Block 			ret = 0;
657816e7549fSJosef Bacik 			goto out_hole;
657931db9f7cSAlexander Block 		}
658057cfd462SJosef Bacik 	} else {
658157cfd462SJosef Bacik 		struct btrfs_file_extent_item *ei;
658257cfd462SJosef Bacik 		u8 type;
658357cfd462SJosef Bacik 
658457cfd462SJosef Bacik 		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
658557cfd462SJosef Bacik 				    struct btrfs_file_extent_item);
658657cfd462SJosef Bacik 		type = btrfs_file_extent_type(path->nodes[0], ei);
658757cfd462SJosef Bacik 		if (type == BTRFS_FILE_EXTENT_PREALLOC ||
658857cfd462SJosef Bacik 		    type == BTRFS_FILE_EXTENT_REG) {
658957cfd462SJosef Bacik 			/*
659057cfd462SJosef Bacik 			 * The send spec does not have a prealloc command yet,
659157cfd462SJosef Bacik 			 * so just leave a hole for prealloc'ed extents until
659257cfd462SJosef Bacik 			 * we have enough commands queued up to justify rev'ing
659357cfd462SJosef Bacik 			 * the send spec.
659457cfd462SJosef Bacik 			 */
659557cfd462SJosef Bacik 			if (type == BTRFS_FILE_EXTENT_PREALLOC) {
659657cfd462SJosef Bacik 				ret = 0;
659757cfd462SJosef Bacik 				goto out;
659857cfd462SJosef Bacik 			}
659957cfd462SJosef Bacik 
660057cfd462SJosef Bacik 			/* Have a hole, just skip it. */
660157cfd462SJosef Bacik 			if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
660257cfd462SJosef Bacik 				ret = 0;
660357cfd462SJosef Bacik 				goto out;
660457cfd462SJosef Bacik 			}
660557cfd462SJosef Bacik 		}
660631db9f7cSAlexander Block 	}
660731db9f7cSAlexander Block 
660831db9f7cSAlexander Block 	ret = find_extent_clone(sctx, path, key->objectid, key->offset,
660931db9f7cSAlexander Block 			sctx->cur_inode_size, &found_clone);
661031db9f7cSAlexander Block 	if (ret != -ENOENT && ret < 0)
661131db9f7cSAlexander Block 		goto out;
661231db9f7cSAlexander Block 
661331db9f7cSAlexander Block 	ret = send_write_or_clone(sctx, path, key, found_clone);
661416e7549fSJosef Bacik 	if (ret)
661516e7549fSJosef Bacik 		goto out;
661616e7549fSJosef Bacik out_hole:
661716e7549fSJosef Bacik 	ret = maybe_send_hole(sctx, path, key);
661831db9f7cSAlexander Block out:
661931db9f7cSAlexander Block 	return ret;
662031db9f7cSAlexander Block }
662131db9f7cSAlexander Block 
process_all_extents(struct send_ctx * sctx)662231db9f7cSAlexander Block static int process_all_extents(struct send_ctx *sctx)
662331db9f7cSAlexander Block {
66249930e9d4SGabriel Niebler 	int ret = 0;
66259930e9d4SGabriel Niebler 	int iter_ret = 0;
662631db9f7cSAlexander Block 	struct btrfs_root *root;
662731db9f7cSAlexander Block 	struct btrfs_path *path;
662831db9f7cSAlexander Block 	struct btrfs_key key;
662931db9f7cSAlexander Block 	struct btrfs_key found_key;
663031db9f7cSAlexander Block 
663131db9f7cSAlexander Block 	root = sctx->send_root;
663231db9f7cSAlexander Block 	path = alloc_path_for_send();
663331db9f7cSAlexander Block 	if (!path)
663431db9f7cSAlexander Block 		return -ENOMEM;
663531db9f7cSAlexander Block 
663631db9f7cSAlexander Block 	key.objectid = sctx->cmp_key->objectid;
663731db9f7cSAlexander Block 	key.type = BTRFS_EXTENT_DATA_KEY;
663831db9f7cSAlexander Block 	key.offset = 0;
66399930e9d4SGabriel Niebler 	btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
664031db9f7cSAlexander Block 		if (found_key.objectid != key.objectid ||
664131db9f7cSAlexander Block 		    found_key.type != key.type) {
664231db9f7cSAlexander Block 			ret = 0;
66439930e9d4SGabriel Niebler 			break;
664431db9f7cSAlexander Block 		}
664531db9f7cSAlexander Block 
664631db9f7cSAlexander Block 		ret = process_extent(sctx, path, &found_key);
664731db9f7cSAlexander Block 		if (ret < 0)
66489930e9d4SGabriel Niebler 			break;
664931db9f7cSAlexander Block 	}
66509930e9d4SGabriel Niebler 	/* Catch error found during iteration */
66519930e9d4SGabriel Niebler 	if (iter_ret < 0)
66529930e9d4SGabriel Niebler 		ret = iter_ret;
665331db9f7cSAlexander Block 
665431db9f7cSAlexander Block 	btrfs_free_path(path);
665531db9f7cSAlexander Block 	return ret;
665631db9f7cSAlexander Block }
665731db9f7cSAlexander Block 
process_recorded_refs_if_needed(struct send_ctx * sctx,int at_end,int * pending_move,int * refs_processed)66589f03740aSFilipe David Borba Manana static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
66599f03740aSFilipe David Borba Manana 					   int *pending_move,
66609f03740aSFilipe David Borba Manana 					   int *refs_processed)
666131db9f7cSAlexander Block {
666231db9f7cSAlexander Block 	int ret = 0;
666331db9f7cSAlexander Block 
666431db9f7cSAlexander Block 	if (sctx->cur_ino == 0)
666531db9f7cSAlexander Block 		goto out;
666631db9f7cSAlexander Block 	if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
666796b5bd77SJan Schmidt 	    sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
666831db9f7cSAlexander Block 		goto out;
666931db9f7cSAlexander Block 	if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
667031db9f7cSAlexander Block 		goto out;
667131db9f7cSAlexander Block 
66729f03740aSFilipe David Borba Manana 	ret = process_recorded_refs(sctx, pending_move);
6673e479d9bbSAlexander Block 	if (ret < 0)
6674e479d9bbSAlexander Block 		goto out;
6675e479d9bbSAlexander Block 
66769f03740aSFilipe David Borba Manana 	*refs_processed = 1;
667731db9f7cSAlexander Block out:
667831db9f7cSAlexander Block 	return ret;
667931db9f7cSAlexander Block }
668031db9f7cSAlexander Block 
finish_inode_if_needed(struct send_ctx * sctx,int at_end)668131db9f7cSAlexander Block static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
668231db9f7cSAlexander Block {
668331db9f7cSAlexander Block 	int ret = 0;
66847e93f6dcSBingJing Chang 	struct btrfs_inode_info info;
668531db9f7cSAlexander Block 	u64 left_mode;
668631db9f7cSAlexander Block 	u64 left_uid;
668731db9f7cSAlexander Block 	u64 left_gid;
668848247359SDavid Sterba 	u64 left_fileattr;
668931db9f7cSAlexander Block 	u64 right_mode;
669031db9f7cSAlexander Block 	u64 right_uid;
669131db9f7cSAlexander Block 	u64 right_gid;
669248247359SDavid Sterba 	u64 right_fileattr;
669331db9f7cSAlexander Block 	int need_chmod = 0;
669431db9f7cSAlexander Block 	int need_chown = 0;
669548247359SDavid Sterba 	bool need_fileattr = false;
6696ffa7c429SFilipe Manana 	int need_truncate = 1;
66979f03740aSFilipe David Borba Manana 	int pending_move = 0;
66989f03740aSFilipe David Borba Manana 	int refs_processed = 0;
669931db9f7cSAlexander Block 
670046b2f459SFilipe Manana 	if (sctx->ignore_cur_inode)
670146b2f459SFilipe Manana 		return 0;
670246b2f459SFilipe Manana 
67039f03740aSFilipe David Borba Manana 	ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move,
67049f03740aSFilipe David Borba Manana 					      &refs_processed);
670531db9f7cSAlexander Block 	if (ret < 0)
670631db9f7cSAlexander Block 		goto out;
670731db9f7cSAlexander Block 
67089f03740aSFilipe David Borba Manana 	/*
67099f03740aSFilipe David Borba Manana 	 * We have processed the refs and thus need to advance send_progress.
67109f03740aSFilipe David Borba Manana 	 * Now, calls to get_cur_xxx will take the updated refs of the current
67119f03740aSFilipe David Borba Manana 	 * inode into account.
67129f03740aSFilipe David Borba Manana 	 *
67139f03740aSFilipe David Borba Manana 	 * On the other hand, if our current inode is a directory and couldn't
67149f03740aSFilipe David Borba Manana 	 * be moved/renamed because its parent was renamed/moved too and it has
67159f03740aSFilipe David Borba Manana 	 * a higher inode number, we can only move/rename our current inode
67169f03740aSFilipe David Borba Manana 	 * after we moved/renamed its parent. Therefore in this case operate on
67179f03740aSFilipe David Borba Manana 	 * the old path (pre move/rename) of our current inode, and the
67189f03740aSFilipe David Borba Manana 	 * move/rename will be performed later.
67199f03740aSFilipe David Borba Manana 	 */
67209f03740aSFilipe David Borba Manana 	if (refs_processed && !pending_move)
67219f03740aSFilipe David Borba Manana 		sctx->send_progress = sctx->cur_ino + 1;
67229f03740aSFilipe David Borba Manana 
672331db9f7cSAlexander Block 	if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
672431db9f7cSAlexander Block 		goto out;
672531db9f7cSAlexander Block 	if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
672631db9f7cSAlexander Block 		goto out;
67277e93f6dcSBingJing Chang 	ret = get_inode_info(sctx->send_root, sctx->cur_ino, &info);
672831db9f7cSAlexander Block 	if (ret < 0)
672931db9f7cSAlexander Block 		goto out;
67307e93f6dcSBingJing Chang 	left_mode = info.mode;
67317e93f6dcSBingJing Chang 	left_uid = info.uid;
67327e93f6dcSBingJing Chang 	left_gid = info.gid;
67337e93f6dcSBingJing Chang 	left_fileattr = info.fileattr;
673431db9f7cSAlexander Block 
673531db9f7cSAlexander Block 	if (!sctx->parent_root || sctx->cur_inode_new) {
673631db9f7cSAlexander Block 		need_chown = 1;
6737e2d044feSAlex Lyakas 		if (!S_ISLNK(sctx->cur_inode_mode))
6738e2d044feSAlex Lyakas 			need_chmod = 1;
6739ffa7c429SFilipe Manana 		if (sctx->cur_inode_next_write_offset == sctx->cur_inode_size)
6740ffa7c429SFilipe Manana 			need_truncate = 0;
674131db9f7cSAlexander Block 	} else {
6742ffa7c429SFilipe Manana 		u64 old_size;
6743ffa7c429SFilipe Manana 
67447e93f6dcSBingJing Chang 		ret = get_inode_info(sctx->parent_root, sctx->cur_ino, &info);
674531db9f7cSAlexander Block 		if (ret < 0)
674631db9f7cSAlexander Block 			goto out;
67477e93f6dcSBingJing Chang 		old_size = info.size;
67487e93f6dcSBingJing Chang 		right_mode = info.mode;
67497e93f6dcSBingJing Chang 		right_uid = info.uid;
67507e93f6dcSBingJing Chang 		right_gid = info.gid;
67517e93f6dcSBingJing Chang 		right_fileattr = info.fileattr;
675231db9f7cSAlexander Block 
675331db9f7cSAlexander Block 		if (left_uid != right_uid || left_gid != right_gid)
675431db9f7cSAlexander Block 			need_chown = 1;
6755e2d044feSAlex Lyakas 		if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
675631db9f7cSAlexander Block 			need_chmod = 1;
675748247359SDavid Sterba 		if (!S_ISLNK(sctx->cur_inode_mode) && left_fileattr != right_fileattr)
675848247359SDavid Sterba 			need_fileattr = true;
6759ffa7c429SFilipe Manana 		if ((old_size == sctx->cur_inode_size) ||
6760ffa7c429SFilipe Manana 		    (sctx->cur_inode_size > old_size &&
6761ffa7c429SFilipe Manana 		     sctx->cur_inode_next_write_offset == sctx->cur_inode_size))
6762ffa7c429SFilipe Manana 			need_truncate = 0;
676331db9f7cSAlexander Block 	}
676431db9f7cSAlexander Block 
676531db9f7cSAlexander Block 	if (S_ISREG(sctx->cur_inode_mode)) {
676616e7549fSJosef Bacik 		if (need_send_hole(sctx)) {
6767766b5e5aSFilipe Manana 			if (sctx->cur_inode_last_extent == (u64)-1 ||
6768766b5e5aSFilipe Manana 			    sctx->cur_inode_last_extent <
6769766b5e5aSFilipe Manana 			    sctx->cur_inode_size) {
677016e7549fSJosef Bacik 				ret = get_last_extent(sctx, (u64)-1);
677116e7549fSJosef Bacik 				if (ret)
677216e7549fSJosef Bacik 					goto out;
677316e7549fSJosef Bacik 			}
6774202e4f4bSFilipe Manana 			if (sctx->cur_inode_last_extent < sctx->cur_inode_size) {
6775202e4f4bSFilipe Manana 				ret = range_is_hole_in_parent(sctx,
6776202e4f4bSFilipe Manana 						      sctx->cur_inode_last_extent,
6777202e4f4bSFilipe Manana 						      sctx->cur_inode_size);
6778202e4f4bSFilipe Manana 				if (ret < 0) {
677916e7549fSJosef Bacik 					goto out;
6780202e4f4bSFilipe Manana 				} else if (ret == 0) {
6781202e4f4bSFilipe Manana 					ret = send_hole(sctx, sctx->cur_inode_size);
6782202e4f4bSFilipe Manana 					if (ret < 0)
6783202e4f4bSFilipe Manana 						goto out;
6784202e4f4bSFilipe Manana 				} else {
6785202e4f4bSFilipe Manana 					/* Range is already a hole, skip. */
6786202e4f4bSFilipe Manana 					ret = 0;
6787202e4f4bSFilipe Manana 				}
678816e7549fSJosef Bacik 			}
678916e7549fSJosef Bacik 		}
6790ffa7c429SFilipe Manana 		if (need_truncate) {
6791ffa7c429SFilipe Manana 			ret = send_truncate(sctx, sctx->cur_ino,
6792ffa7c429SFilipe Manana 					    sctx->cur_inode_gen,
679331db9f7cSAlexander Block 					    sctx->cur_inode_size);
679431db9f7cSAlexander Block 			if (ret < 0)
679531db9f7cSAlexander Block 				goto out;
679631db9f7cSAlexander Block 		}
6797ffa7c429SFilipe Manana 	}
679831db9f7cSAlexander Block 
679931db9f7cSAlexander Block 	if (need_chown) {
680031db9f7cSAlexander Block 		ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
680131db9f7cSAlexander Block 				left_uid, left_gid);
680231db9f7cSAlexander Block 		if (ret < 0)
680331db9f7cSAlexander Block 			goto out;
680431db9f7cSAlexander Block 	}
680531db9f7cSAlexander Block 	if (need_chmod) {
680631db9f7cSAlexander Block 		ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
680731db9f7cSAlexander Block 				left_mode);
680831db9f7cSAlexander Block 		if (ret < 0)
680931db9f7cSAlexander Block 			goto out;
681031db9f7cSAlexander Block 	}
681148247359SDavid Sterba 	if (need_fileattr) {
681248247359SDavid Sterba 		ret = send_fileattr(sctx, sctx->cur_ino, sctx->cur_inode_gen,
681348247359SDavid Sterba 				    left_fileattr);
681448247359SDavid Sterba 		if (ret < 0)
681548247359SDavid Sterba 			goto out;
681648247359SDavid Sterba 	}
6817c86eab81SDavid Sterba 
6818c86eab81SDavid Sterba 	if (proto_cmd_ok(sctx, BTRFS_SEND_C_ENABLE_VERITY)
6819c86eab81SDavid Sterba 	    && sctx->cur_inode_needs_verity) {
682038622010SBoris Burkov 		ret = process_verity(sctx);
682138622010SBoris Burkov 		if (ret < 0)
682238622010SBoris Burkov 			goto out;
682338622010SBoris Burkov 	}
682431db9f7cSAlexander Block 
682589efda52SMarcos Paulo de Souza 	ret = send_capabilities(sctx);
682689efda52SMarcos Paulo de Souza 	if (ret < 0)
682789efda52SMarcos Paulo de Souza 		goto out;
682889efda52SMarcos Paulo de Souza 
682931db9f7cSAlexander Block 	/*
68309f03740aSFilipe David Borba Manana 	 * If other directory inodes depended on our current directory
68319f03740aSFilipe David Borba Manana 	 * inode's move/rename, now do their move/rename operations.
683231db9f7cSAlexander Block 	 */
68339f03740aSFilipe David Borba Manana 	if (!is_waiting_for_move(sctx, sctx->cur_ino)) {
68349f03740aSFilipe David Borba Manana 		ret = apply_children_dir_moves(sctx);
68359f03740aSFilipe David Borba Manana 		if (ret)
68369f03740aSFilipe David Borba Manana 			goto out;
68379f03740aSFilipe David Borba Manana 		/*
68389f03740aSFilipe David Borba Manana 		 * Need to send that every time, no matter if it actually
68399f03740aSFilipe David Borba Manana 		 * changed between the two trees as we have done changes to
6840fcbd2154SFilipe Manana 		 * the inode before. If our inode is a directory and it's
6841fcbd2154SFilipe Manana 		 * waiting to be moved/renamed, we will send its utimes when
6842fcbd2154SFilipe Manana 		 * it's moved/renamed, therefore we don't need to do it here.
68439f03740aSFilipe David Borba Manana 		 */
68449f03740aSFilipe David Borba Manana 		sctx->send_progress = sctx->cur_ino + 1;
68453e49363bSFilipe Manana 
68463e49363bSFilipe Manana 		/*
68473e49363bSFilipe Manana 		 * If the current inode is a non-empty directory, delay issuing
68483e49363bSFilipe Manana 		 * the utimes command for it, as it's very likely we have inodes
68493e49363bSFilipe Manana 		 * with an higher number inside it. We want to issue the utimes
68503e49363bSFilipe Manana 		 * command only after adding all dentries to it.
68513e49363bSFilipe Manana 		 */
68523e49363bSFilipe Manana 		if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_size > 0)
68533e49363bSFilipe Manana 			ret = cache_dir_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
68543e49363bSFilipe Manana 		else
685531db9f7cSAlexander Block 			ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
68563e49363bSFilipe Manana 
685731db9f7cSAlexander Block 		if (ret < 0)
685831db9f7cSAlexander Block 			goto out;
6859fcbd2154SFilipe Manana 	}
686031db9f7cSAlexander Block 
686131db9f7cSAlexander Block out:
68623e49363bSFilipe Manana 	if (!ret)
68633e49363bSFilipe Manana 		ret = trim_dir_utimes_cache(sctx);
68643e49363bSFilipe Manana 
686531db9f7cSAlexander Block 	return ret;
686631db9f7cSAlexander Block }
686731db9f7cSAlexander Block 
close_current_inode(struct send_ctx * sctx)6868152555b3SFilipe Manana static void close_current_inode(struct send_ctx *sctx)
6869152555b3SFilipe Manana {
6870152555b3SFilipe Manana 	u64 i_size;
6871152555b3SFilipe Manana 
6872152555b3SFilipe Manana 	if (sctx->cur_inode == NULL)
6873152555b3SFilipe Manana 		return;
6874152555b3SFilipe Manana 
6875152555b3SFilipe Manana 	i_size = i_size_read(sctx->cur_inode);
6876152555b3SFilipe Manana 
6877152555b3SFilipe Manana 	/*
6878152555b3SFilipe Manana 	 * If we are doing an incremental send, we may have extents between the
6879152555b3SFilipe Manana 	 * last processed extent and the i_size that have not been processed
6880152555b3SFilipe Manana 	 * because they haven't changed but we may have read some of their pages
6881152555b3SFilipe Manana 	 * through readahead, see the comments at send_extent_data().
6882152555b3SFilipe Manana 	 */
6883152555b3SFilipe Manana 	if (sctx->clean_page_cache && sctx->page_cache_clear_start < i_size)
6884152555b3SFilipe Manana 		truncate_inode_pages_range(&sctx->cur_inode->i_data,
6885152555b3SFilipe Manana 					   sctx->page_cache_clear_start,
6886152555b3SFilipe Manana 					   round_up(i_size, PAGE_SIZE) - 1);
6887152555b3SFilipe Manana 
6888152555b3SFilipe Manana 	iput(sctx->cur_inode);
6889152555b3SFilipe Manana 	sctx->cur_inode = NULL;
6890152555b3SFilipe Manana }
6891152555b3SFilipe Manana 
changed_inode(struct send_ctx * sctx,enum btrfs_compare_tree_result result)689231db9f7cSAlexander Block static int changed_inode(struct send_ctx *sctx,
689331db9f7cSAlexander Block 			 enum btrfs_compare_tree_result result)
689431db9f7cSAlexander Block {
689531db9f7cSAlexander Block 	int ret = 0;
689631db9f7cSAlexander Block 	struct btrfs_key *key = sctx->cmp_key;
689731db9f7cSAlexander Block 	struct btrfs_inode_item *left_ii = NULL;
689831db9f7cSAlexander Block 	struct btrfs_inode_item *right_ii = NULL;
689931db9f7cSAlexander Block 	u64 left_gen = 0;
690031db9f7cSAlexander Block 	u64 right_gen = 0;
690131db9f7cSAlexander Block 
6902152555b3SFilipe Manana 	close_current_inode(sctx);
6903521b6803SFilipe Manana 
690431db9f7cSAlexander Block 	sctx->cur_ino = key->objectid;
69059555e1f1SDavid Sterba 	sctx->cur_inode_new_gen = false;
690616e7549fSJosef Bacik 	sctx->cur_inode_last_extent = (u64)-1;
6907ffa7c429SFilipe Manana 	sctx->cur_inode_next_write_offset = 0;
690846b2f459SFilipe Manana 	sctx->ignore_cur_inode = false;
6909e479d9bbSAlexander Block 
6910e479d9bbSAlexander Block 	/*
6911e479d9bbSAlexander Block 	 * Set send_progress to current inode. This will tell all get_cur_xxx
6912e479d9bbSAlexander Block 	 * functions that the current inode's refs are not updated yet. Later,
6913e479d9bbSAlexander Block 	 * when process_recorded_refs is finished, it is set to cur_ino + 1.
6914e479d9bbSAlexander Block 	 */
691531db9f7cSAlexander Block 	sctx->send_progress = sctx->cur_ino;
691631db9f7cSAlexander Block 
691731db9f7cSAlexander Block 	if (result == BTRFS_COMPARE_TREE_NEW ||
691831db9f7cSAlexander Block 	    result == BTRFS_COMPARE_TREE_CHANGED) {
691931db9f7cSAlexander Block 		left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
692031db9f7cSAlexander Block 				sctx->left_path->slots[0],
692131db9f7cSAlexander Block 				struct btrfs_inode_item);
692231db9f7cSAlexander Block 		left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
692331db9f7cSAlexander Block 				left_ii);
692431db9f7cSAlexander Block 	} else {
692531db9f7cSAlexander Block 		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
692631db9f7cSAlexander Block 				sctx->right_path->slots[0],
692731db9f7cSAlexander Block 				struct btrfs_inode_item);
692831db9f7cSAlexander Block 		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
692931db9f7cSAlexander Block 				right_ii);
693031db9f7cSAlexander Block 	}
693131db9f7cSAlexander Block 	if (result == BTRFS_COMPARE_TREE_CHANGED) {
693231db9f7cSAlexander Block 		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
693331db9f7cSAlexander Block 				sctx->right_path->slots[0],
693431db9f7cSAlexander Block 				struct btrfs_inode_item);
693531db9f7cSAlexander Block 
693631db9f7cSAlexander Block 		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
693731db9f7cSAlexander Block 				right_ii);
69386d85ed05SAlexander Block 
69396d85ed05SAlexander Block 		/*
69406d85ed05SAlexander Block 		 * The cur_ino = root dir case is special here. We can't treat
69416d85ed05SAlexander Block 		 * the inode as deleted+reused because it would generate a
69426d85ed05SAlexander Block 		 * stream that tries to delete/mkdir the root dir.
69436d85ed05SAlexander Block 		 */
69446d85ed05SAlexander Block 		if (left_gen != right_gen &&
69456d85ed05SAlexander Block 		    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
69469555e1f1SDavid Sterba 			sctx->cur_inode_new_gen = true;
694731db9f7cSAlexander Block 	}
694831db9f7cSAlexander Block 
694946b2f459SFilipe Manana 	/*
695046b2f459SFilipe Manana 	 * Normally we do not find inodes with a link count of zero (orphans)
695146b2f459SFilipe Manana 	 * because the most common case is to create a snapshot and use it
695246b2f459SFilipe Manana 	 * for a send operation. However other less common use cases involve
695346b2f459SFilipe Manana 	 * using a subvolume and send it after turning it to RO mode just
695446b2f459SFilipe Manana 	 * after deleting all hard links of a file while holding an open
695546b2f459SFilipe Manana 	 * file descriptor against it or turning a RO snapshot into RW mode,
695646b2f459SFilipe Manana 	 * keep an open file descriptor against a file, delete it and then
695746b2f459SFilipe Manana 	 * turn the snapshot back to RO mode before using it for a send
69589ed0a72eSBingJing Chang 	 * operation. The former is what the receiver operation does.
69599ed0a72eSBingJing Chang 	 * Therefore, if we want to send these snapshots soon after they're
69609ed0a72eSBingJing Chang 	 * received, we need to handle orphan inodes as well. Moreover, orphans
69619ed0a72eSBingJing Chang 	 * can appear not only in the send snapshot but also in the parent
69629ed0a72eSBingJing Chang 	 * snapshot. Here are several cases:
69639ed0a72eSBingJing Chang 	 *
69649ed0a72eSBingJing Chang 	 * Case 1: BTRFS_COMPARE_TREE_NEW
69659ed0a72eSBingJing Chang 	 *       |  send snapshot  | action
69669ed0a72eSBingJing Chang 	 * --------------------------------
69679ed0a72eSBingJing Chang 	 * nlink |        0        | ignore
69689ed0a72eSBingJing Chang 	 *
69699ed0a72eSBingJing Chang 	 * Case 2: BTRFS_COMPARE_TREE_DELETED
69709ed0a72eSBingJing Chang 	 *       | parent snapshot | action
69719ed0a72eSBingJing Chang 	 * ----------------------------------
69729ed0a72eSBingJing Chang 	 * nlink |        0        | as usual
69739ed0a72eSBingJing Chang 	 * Note: No unlinks will be sent because there're no paths for it.
69749ed0a72eSBingJing Chang 	 *
69759ed0a72eSBingJing Chang 	 * Case 3: BTRFS_COMPARE_TREE_CHANGED
69769ed0a72eSBingJing Chang 	 *           |       | parent snapshot | send snapshot | action
69779ed0a72eSBingJing Chang 	 * -----------------------------------------------------------------------
69789ed0a72eSBingJing Chang 	 * subcase 1 | nlink |        0        |       0       | ignore
69799ed0a72eSBingJing Chang 	 * subcase 2 | nlink |       >0        |       0       | new_gen(deletion)
69809ed0a72eSBingJing Chang 	 * subcase 3 | nlink |        0        |      >0       | new_gen(creation)
69819ed0a72eSBingJing Chang 	 *
698246b2f459SFilipe Manana 	 */
69839ed0a72eSBingJing Chang 	if (result == BTRFS_COMPARE_TREE_NEW) {
69849ed0a72eSBingJing Chang 		if (btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii) == 0) {
698546b2f459SFilipe Manana 			sctx->ignore_cur_inode = true;
698646b2f459SFilipe Manana 			goto out;
698746b2f459SFilipe Manana 		}
698831db9f7cSAlexander Block 		sctx->cur_inode_gen = left_gen;
69899555e1f1SDavid Sterba 		sctx->cur_inode_new = true;
69909555e1f1SDavid Sterba 		sctx->cur_inode_deleted = false;
699131db9f7cSAlexander Block 		sctx->cur_inode_size = btrfs_inode_size(
699231db9f7cSAlexander Block 				sctx->left_path->nodes[0], left_ii);
699331db9f7cSAlexander Block 		sctx->cur_inode_mode = btrfs_inode_mode(
699431db9f7cSAlexander Block 				sctx->left_path->nodes[0], left_ii);
6995644d1940SLiu Bo 		sctx->cur_inode_rdev = btrfs_inode_rdev(
6996644d1940SLiu Bo 				sctx->left_path->nodes[0], left_ii);
699731db9f7cSAlexander Block 		if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
69981f4692daSAlexander Block 			ret = send_create_inode_if_needed(sctx);
699931db9f7cSAlexander Block 	} else if (result == BTRFS_COMPARE_TREE_DELETED) {
700031db9f7cSAlexander Block 		sctx->cur_inode_gen = right_gen;
70019555e1f1SDavid Sterba 		sctx->cur_inode_new = false;
70029555e1f1SDavid Sterba 		sctx->cur_inode_deleted = true;
700331db9f7cSAlexander Block 		sctx->cur_inode_size = btrfs_inode_size(
700431db9f7cSAlexander Block 				sctx->right_path->nodes[0], right_ii);
700531db9f7cSAlexander Block 		sctx->cur_inode_mode = btrfs_inode_mode(
700631db9f7cSAlexander Block 				sctx->right_path->nodes[0], right_ii);
700731db9f7cSAlexander Block 	} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
70089ed0a72eSBingJing Chang 		u32 new_nlinks, old_nlinks;
70099ed0a72eSBingJing Chang 
70109ed0a72eSBingJing Chang 		new_nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
70119ed0a72eSBingJing Chang 		old_nlinks = btrfs_inode_nlink(sctx->right_path->nodes[0], right_ii);
70129ed0a72eSBingJing Chang 		if (new_nlinks == 0 && old_nlinks == 0) {
70139ed0a72eSBingJing Chang 			sctx->ignore_cur_inode = true;
70149ed0a72eSBingJing Chang 			goto out;
70159ed0a72eSBingJing Chang 		} else if (new_nlinks == 0 || old_nlinks == 0) {
70169ed0a72eSBingJing Chang 			sctx->cur_inode_new_gen = 1;
70179ed0a72eSBingJing Chang 		}
7018766702efSAlexander Block 		/*
7019766702efSAlexander Block 		 * We need to do some special handling in case the inode was
7020766702efSAlexander Block 		 * reported as changed with a changed generation number. This
7021766702efSAlexander Block 		 * means that the original inode was deleted and new inode
7022766702efSAlexander Block 		 * reused the same inum. So we have to treat the old inode as
7023766702efSAlexander Block 		 * deleted and the new one as new.
7024766702efSAlexander Block 		 */
702531db9f7cSAlexander Block 		if (sctx->cur_inode_new_gen) {
7026766702efSAlexander Block 			/*
7027766702efSAlexander Block 			 * First, process the inode as if it was deleted.
7028766702efSAlexander Block 			 */
70299b8be45fSBingJing Chang 			if (old_nlinks > 0) {
703031db9f7cSAlexander Block 				sctx->cur_inode_gen = right_gen;
70319555e1f1SDavid Sterba 				sctx->cur_inode_new = false;
70329555e1f1SDavid Sterba 				sctx->cur_inode_deleted = true;
703331db9f7cSAlexander Block 				sctx->cur_inode_size = btrfs_inode_size(
703431db9f7cSAlexander Block 						sctx->right_path->nodes[0], right_ii);
703531db9f7cSAlexander Block 				sctx->cur_inode_mode = btrfs_inode_mode(
703631db9f7cSAlexander Block 						sctx->right_path->nodes[0], right_ii);
703731db9f7cSAlexander Block 				ret = process_all_refs(sctx,
703831db9f7cSAlexander Block 						BTRFS_COMPARE_TREE_DELETED);
703931db9f7cSAlexander Block 				if (ret < 0)
704031db9f7cSAlexander Block 					goto out;
70419b8be45fSBingJing Chang 			}
704231db9f7cSAlexander Block 
7043766702efSAlexander Block 			/*
7044766702efSAlexander Block 			 * Now process the inode as if it was new.
7045766702efSAlexander Block 			 */
70469ed0a72eSBingJing Chang 			if (new_nlinks > 0) {
704731db9f7cSAlexander Block 				sctx->cur_inode_gen = left_gen;
70489555e1f1SDavid Sterba 				sctx->cur_inode_new = true;
70499555e1f1SDavid Sterba 				sctx->cur_inode_deleted = false;
705031db9f7cSAlexander Block 				sctx->cur_inode_size = btrfs_inode_size(
70519ed0a72eSBingJing Chang 						sctx->left_path->nodes[0],
70529ed0a72eSBingJing Chang 						left_ii);
705331db9f7cSAlexander Block 				sctx->cur_inode_mode = btrfs_inode_mode(
70549ed0a72eSBingJing Chang 						sctx->left_path->nodes[0],
70559ed0a72eSBingJing Chang 						left_ii);
7056644d1940SLiu Bo 				sctx->cur_inode_rdev = btrfs_inode_rdev(
70579ed0a72eSBingJing Chang 						sctx->left_path->nodes[0],
70589ed0a72eSBingJing Chang 						left_ii);
70591f4692daSAlexander Block 				ret = send_create_inode_if_needed(sctx);
706031db9f7cSAlexander Block 				if (ret < 0)
706131db9f7cSAlexander Block 					goto out;
706231db9f7cSAlexander Block 
706331db9f7cSAlexander Block 				ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
706431db9f7cSAlexander Block 				if (ret < 0)
706531db9f7cSAlexander Block 					goto out;
7066e479d9bbSAlexander Block 				/*
70679ed0a72eSBingJing Chang 				 * Advance send_progress now as we did not get
70689ed0a72eSBingJing Chang 				 * into process_recorded_refs_if_needed in the
70699ed0a72eSBingJing Chang 				 * new_gen case.
7070e479d9bbSAlexander Block 				 */
7071e479d9bbSAlexander Block 				sctx->send_progress = sctx->cur_ino + 1;
7072766702efSAlexander Block 
7073766702efSAlexander Block 				/*
70749ed0a72eSBingJing Chang 				 * Now process all extents and xattrs of the
70759ed0a72eSBingJing Chang 				 * inode as if they were all new.
7076766702efSAlexander Block 				 */
707731db9f7cSAlexander Block 				ret = process_all_extents(sctx);
707831db9f7cSAlexander Block 				if (ret < 0)
707931db9f7cSAlexander Block 					goto out;
708031db9f7cSAlexander Block 				ret = process_all_new_xattrs(sctx);
708131db9f7cSAlexander Block 				if (ret < 0)
708231db9f7cSAlexander Block 					goto out;
70839ed0a72eSBingJing Chang 			}
708431db9f7cSAlexander Block 		} else {
708531db9f7cSAlexander Block 			sctx->cur_inode_gen = left_gen;
70869555e1f1SDavid Sterba 			sctx->cur_inode_new = false;
70879555e1f1SDavid Sterba 			sctx->cur_inode_new_gen = false;
70889555e1f1SDavid Sterba 			sctx->cur_inode_deleted = false;
708931db9f7cSAlexander Block 			sctx->cur_inode_size = btrfs_inode_size(
709031db9f7cSAlexander Block 					sctx->left_path->nodes[0], left_ii);
709131db9f7cSAlexander Block 			sctx->cur_inode_mode = btrfs_inode_mode(
709231db9f7cSAlexander Block 					sctx->left_path->nodes[0], left_ii);
709331db9f7cSAlexander Block 		}
709431db9f7cSAlexander Block 	}
709531db9f7cSAlexander Block 
709631db9f7cSAlexander Block out:
709731db9f7cSAlexander Block 	return ret;
709831db9f7cSAlexander Block }
709931db9f7cSAlexander Block 
7100766702efSAlexander Block /*
7101766702efSAlexander Block  * We have to process new refs before deleted refs, but compare_trees gives us
7102766702efSAlexander Block  * the new and deleted refs mixed. To fix this, we record the new/deleted refs
7103766702efSAlexander Block  * first and later process them in process_recorded_refs.
7104766702efSAlexander Block  * For the cur_inode_new_gen case, we skip recording completely because
7105766702efSAlexander Block  * changed_inode did already initiate processing of refs. The reason for this is
7106766702efSAlexander Block  * that in this case, compare_tree actually compares the refs of 2 different
7107766702efSAlexander Block  * inodes. To fix this, process_all_refs is used in changed_inode to handle all
7108766702efSAlexander Block  * refs of the right tree as deleted and all refs of the left tree as new.
7109766702efSAlexander Block  */
changed_ref(struct send_ctx * sctx,enum btrfs_compare_tree_result result)711031db9f7cSAlexander Block static int changed_ref(struct send_ctx *sctx,
711131db9f7cSAlexander Block 		       enum btrfs_compare_tree_result result)
711231db9f7cSAlexander Block {
711331db9f7cSAlexander Block 	int ret = 0;
711431db9f7cSAlexander Block 
711595155585SFilipe Manana 	if (sctx->cur_ino != sctx->cmp_key->objectid) {
711695155585SFilipe Manana 		inconsistent_snapshot_error(sctx, result, "reference");
711795155585SFilipe Manana 		return -EIO;
711895155585SFilipe Manana 	}
711931db9f7cSAlexander Block 
712031db9f7cSAlexander Block 	if (!sctx->cur_inode_new_gen &&
712131db9f7cSAlexander Block 	    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
712231db9f7cSAlexander Block 		if (result == BTRFS_COMPARE_TREE_NEW)
712331db9f7cSAlexander Block 			ret = record_new_ref(sctx);
712431db9f7cSAlexander Block 		else if (result == BTRFS_COMPARE_TREE_DELETED)
712531db9f7cSAlexander Block 			ret = record_deleted_ref(sctx);
712631db9f7cSAlexander Block 		else if (result == BTRFS_COMPARE_TREE_CHANGED)
712731db9f7cSAlexander Block 			ret = record_changed_ref(sctx);
712831db9f7cSAlexander Block 	}
712931db9f7cSAlexander Block 
713031db9f7cSAlexander Block 	return ret;
713131db9f7cSAlexander Block }
713231db9f7cSAlexander Block 
7133766702efSAlexander Block /*
7134766702efSAlexander Block  * Process new/deleted/changed xattrs. We skip processing in the
7135766702efSAlexander Block  * cur_inode_new_gen case because changed_inode did already initiate processing
7136766702efSAlexander Block  * of xattrs. The reason is the same as in changed_ref
7137766702efSAlexander Block  */
changed_xattr(struct send_ctx * sctx,enum btrfs_compare_tree_result result)713831db9f7cSAlexander Block static int changed_xattr(struct send_ctx *sctx,
713931db9f7cSAlexander Block 			 enum btrfs_compare_tree_result result)
714031db9f7cSAlexander Block {
714131db9f7cSAlexander Block 	int ret = 0;
714231db9f7cSAlexander Block 
714395155585SFilipe Manana 	if (sctx->cur_ino != sctx->cmp_key->objectid) {
714495155585SFilipe Manana 		inconsistent_snapshot_error(sctx, result, "xattr");
714595155585SFilipe Manana 		return -EIO;
714695155585SFilipe Manana 	}
714731db9f7cSAlexander Block 
714831db9f7cSAlexander Block 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
714931db9f7cSAlexander Block 		if (result == BTRFS_COMPARE_TREE_NEW)
715031db9f7cSAlexander Block 			ret = process_new_xattr(sctx);
715131db9f7cSAlexander Block 		else if (result == BTRFS_COMPARE_TREE_DELETED)
715231db9f7cSAlexander Block 			ret = process_deleted_xattr(sctx);
715331db9f7cSAlexander Block 		else if (result == BTRFS_COMPARE_TREE_CHANGED)
715431db9f7cSAlexander Block 			ret = process_changed_xattr(sctx);
715531db9f7cSAlexander Block 	}
715631db9f7cSAlexander Block 
715731db9f7cSAlexander Block 	return ret;
715831db9f7cSAlexander Block }
715931db9f7cSAlexander Block 
7160766702efSAlexander Block /*
7161766702efSAlexander Block  * Process new/deleted/changed extents. We skip processing in the
7162766702efSAlexander Block  * cur_inode_new_gen case because changed_inode did already initiate processing
7163766702efSAlexander Block  * of extents. The reason is the same as in changed_ref
7164766702efSAlexander Block  */
changed_extent(struct send_ctx * sctx,enum btrfs_compare_tree_result result)716531db9f7cSAlexander Block static int changed_extent(struct send_ctx *sctx,
716631db9f7cSAlexander Block 			  enum btrfs_compare_tree_result result)
716731db9f7cSAlexander Block {
716831db9f7cSAlexander Block 	int ret = 0;
716931db9f7cSAlexander Block 
7170d5e84fd8SFilipe Manana 	/*
7171b4f9a1a8SFilipe Manana 	 * We have found an extent item that changed without the inode item
7172b4f9a1a8SFilipe Manana 	 * having changed. This can happen either after relocation (where the
7173b4f9a1a8SFilipe Manana 	 * disk_bytenr of an extent item is replaced at
7174b4f9a1a8SFilipe Manana 	 * relocation.c:replace_file_extents()) or after deduplication into a
7175b4f9a1a8SFilipe Manana 	 * file in both the parent and send snapshots (where an extent item can
7176b4f9a1a8SFilipe Manana 	 * get modified or replaced with a new one). Note that deduplication
7177b4f9a1a8SFilipe Manana 	 * updates the inode item, but it only changes the iversion (sequence
7178b4f9a1a8SFilipe Manana 	 * field in the inode item) of the inode, so if a file is deduplicated
7179b4f9a1a8SFilipe Manana 	 * the same amount of times in both the parent and send snapshots, its
71801a9fd417SDavid Sterba 	 * iversion becomes the same in both snapshots, whence the inode item is
7181b4f9a1a8SFilipe Manana 	 * the same on both snapshots.
7182d5e84fd8SFilipe Manana 	 */
7183b4f9a1a8SFilipe Manana 	if (sctx->cur_ino != sctx->cmp_key->objectid)
7184d5e84fd8SFilipe Manana 		return 0;
718531db9f7cSAlexander Block 
718631db9f7cSAlexander Block 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
718731db9f7cSAlexander Block 		if (result != BTRFS_COMPARE_TREE_DELETED)
718831db9f7cSAlexander Block 			ret = process_extent(sctx, sctx->left_path,
718931db9f7cSAlexander Block 					sctx->cmp_key);
719031db9f7cSAlexander Block 	}
719131db9f7cSAlexander Block 
719231db9f7cSAlexander Block 	return ret;
719331db9f7cSAlexander Block }
719431db9f7cSAlexander Block 
changed_verity(struct send_ctx * sctx,enum btrfs_compare_tree_result result)719538622010SBoris Burkov static int changed_verity(struct send_ctx *sctx, enum btrfs_compare_tree_result result)
719638622010SBoris Burkov {
719738622010SBoris Burkov 	int ret = 0;
719838622010SBoris Burkov 
719938622010SBoris Burkov 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
720038622010SBoris Burkov 		if (result == BTRFS_COMPARE_TREE_NEW)
720138622010SBoris Burkov 			sctx->cur_inode_needs_verity = true;
720238622010SBoris Burkov 	}
720338622010SBoris Burkov 	return ret;
720438622010SBoris Burkov }
720538622010SBoris Burkov 
dir_changed(struct send_ctx * sctx,u64 dir)7206ba5e8f2eSJosef Bacik static int dir_changed(struct send_ctx *sctx, u64 dir)
7207ba5e8f2eSJosef Bacik {
7208ba5e8f2eSJosef Bacik 	u64 orig_gen, new_gen;
7209ba5e8f2eSJosef Bacik 	int ret;
7210ba5e8f2eSJosef Bacik 
72117e93f6dcSBingJing Chang 	ret = get_inode_gen(sctx->send_root, dir, &new_gen);
7212ba5e8f2eSJosef Bacik 	if (ret)
7213ba5e8f2eSJosef Bacik 		return ret;
7214ba5e8f2eSJosef Bacik 
72157e93f6dcSBingJing Chang 	ret = get_inode_gen(sctx->parent_root, dir, &orig_gen);
7216ba5e8f2eSJosef Bacik 	if (ret)
7217ba5e8f2eSJosef Bacik 		return ret;
7218ba5e8f2eSJosef Bacik 
7219ba5e8f2eSJosef Bacik 	return (orig_gen != new_gen) ? 1 : 0;
7220ba5e8f2eSJosef Bacik }
7221ba5e8f2eSJosef Bacik 
compare_refs(struct send_ctx * sctx,struct btrfs_path * path,struct btrfs_key * key)7222ba5e8f2eSJosef Bacik static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
7223ba5e8f2eSJosef Bacik 			struct btrfs_key *key)
7224ba5e8f2eSJosef Bacik {
7225ba5e8f2eSJosef Bacik 	struct btrfs_inode_extref *extref;
7226ba5e8f2eSJosef Bacik 	struct extent_buffer *leaf;
7227ba5e8f2eSJosef Bacik 	u64 dirid = 0, last_dirid = 0;
7228ba5e8f2eSJosef Bacik 	unsigned long ptr;
7229ba5e8f2eSJosef Bacik 	u32 item_size;
7230ba5e8f2eSJosef Bacik 	u32 cur_offset = 0;
7231ba5e8f2eSJosef Bacik 	int ref_name_len;
7232ba5e8f2eSJosef Bacik 	int ret = 0;
7233ba5e8f2eSJosef Bacik 
7234ba5e8f2eSJosef Bacik 	/* Easy case, just check this one dirid */
7235ba5e8f2eSJosef Bacik 	if (key->type == BTRFS_INODE_REF_KEY) {
7236ba5e8f2eSJosef Bacik 		dirid = key->offset;
7237ba5e8f2eSJosef Bacik 
7238ba5e8f2eSJosef Bacik 		ret = dir_changed(sctx, dirid);
7239ba5e8f2eSJosef Bacik 		goto out;
7240ba5e8f2eSJosef Bacik 	}
7241ba5e8f2eSJosef Bacik 
7242ba5e8f2eSJosef Bacik 	leaf = path->nodes[0];
72433212fa14SJosef Bacik 	item_size = btrfs_item_size(leaf, path->slots[0]);
7244ba5e8f2eSJosef Bacik 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
7245ba5e8f2eSJosef Bacik 	while (cur_offset < item_size) {
7246ba5e8f2eSJosef Bacik 		extref = (struct btrfs_inode_extref *)(ptr +
7247ba5e8f2eSJosef Bacik 						       cur_offset);
7248ba5e8f2eSJosef Bacik 		dirid = btrfs_inode_extref_parent(leaf, extref);
7249ba5e8f2eSJosef Bacik 		ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
7250ba5e8f2eSJosef Bacik 		cur_offset += ref_name_len + sizeof(*extref);
7251ba5e8f2eSJosef Bacik 		if (dirid == last_dirid)
7252ba5e8f2eSJosef Bacik 			continue;
7253ba5e8f2eSJosef Bacik 		ret = dir_changed(sctx, dirid);
7254ba5e8f2eSJosef Bacik 		if (ret)
7255ba5e8f2eSJosef Bacik 			break;
7256ba5e8f2eSJosef Bacik 		last_dirid = dirid;
7257ba5e8f2eSJosef Bacik 	}
7258ba5e8f2eSJosef Bacik out:
7259ba5e8f2eSJosef Bacik 	return ret;
7260ba5e8f2eSJosef Bacik }
7261ba5e8f2eSJosef Bacik 
7262766702efSAlexander Block /*
7263766702efSAlexander Block  * Updates compare related fields in sctx and simply forwards to the actual
7264766702efSAlexander Block  * changed_xxx functions.
7265766702efSAlexander Block  */
changed_cb(struct btrfs_path * left_path,struct btrfs_path * right_path,struct btrfs_key * key,enum btrfs_compare_tree_result result,struct send_ctx * sctx)7266ee8c494fSNikolay Borisov static int changed_cb(struct btrfs_path *left_path,
726731db9f7cSAlexander Block 		      struct btrfs_path *right_path,
726831db9f7cSAlexander Block 		      struct btrfs_key *key,
726931db9f7cSAlexander Block 		      enum btrfs_compare_tree_result result,
727088980383SRoman Anasal 		      struct send_ctx *sctx)
727131db9f7cSAlexander Block {
727231db9f7cSAlexander Block 	int ret = 0;
727331db9f7cSAlexander Block 
7274d96b3424SFilipe Manana 	/*
7275d96b3424SFilipe Manana 	 * We can not hold the commit root semaphore here. This is because in
7276d96b3424SFilipe Manana 	 * the case of sending and receiving to the same filesystem, using a
7277d96b3424SFilipe Manana 	 * pipe, could result in a deadlock:
7278d96b3424SFilipe Manana 	 *
7279d96b3424SFilipe Manana 	 * 1) The task running send blocks on the pipe because it's full;
7280d96b3424SFilipe Manana 	 *
7281d96b3424SFilipe Manana 	 * 2) The task running receive, which is the only consumer of the pipe,
7282d96b3424SFilipe Manana 	 *    is waiting for a transaction commit (for example due to a space
7283d96b3424SFilipe Manana 	 *    reservation when doing a write or triggering a transaction commit
7284d96b3424SFilipe Manana 	 *    when creating a subvolume);
7285d96b3424SFilipe Manana 	 *
7286d96b3424SFilipe Manana 	 * 3) The transaction is waiting to write lock the commit root semaphore,
7287d96b3424SFilipe Manana 	 *    but can not acquire it since it's being held at 1).
7288d96b3424SFilipe Manana 	 *
7289d96b3424SFilipe Manana 	 * Down this call chain we write to the pipe through kernel_write().
7290d96b3424SFilipe Manana 	 * The same type of problem can also happen when sending to a file that
7291d96b3424SFilipe Manana 	 * is stored in the same filesystem - when reserving space for a write
7292d96b3424SFilipe Manana 	 * into the file, we can trigger a transaction commit.
7293d96b3424SFilipe Manana 	 *
7294d96b3424SFilipe Manana 	 * Our caller has supplied us with clones of leaves from the send and
7295d96b3424SFilipe Manana 	 * parent roots, so we're safe here from a concurrent relocation and
7296d96b3424SFilipe Manana 	 * further reallocation of metadata extents while we are here. Below we
7297d96b3424SFilipe Manana 	 * also assert that the leaves are clones.
7298d96b3424SFilipe Manana 	 */
7299d96b3424SFilipe Manana 	lockdep_assert_not_held(&sctx->send_root->fs_info->commit_root_sem);
7300d96b3424SFilipe Manana 
7301d96b3424SFilipe Manana 	/*
7302d96b3424SFilipe Manana 	 * We always have a send root, so left_path is never NULL. We will not
7303d96b3424SFilipe Manana 	 * have a leaf when we have reached the end of the send root but have
7304d96b3424SFilipe Manana 	 * not yet reached the end of the parent root.
7305d96b3424SFilipe Manana 	 */
7306d96b3424SFilipe Manana 	if (left_path->nodes[0])
7307d96b3424SFilipe Manana 		ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
7308d96b3424SFilipe Manana 				&left_path->nodes[0]->bflags));
7309d96b3424SFilipe Manana 	/*
7310d96b3424SFilipe Manana 	 * When doing a full send we don't have a parent root, so right_path is
7311d96b3424SFilipe Manana 	 * NULL. When doing an incremental send, we may have reached the end of
7312d96b3424SFilipe Manana 	 * the parent root already, so we don't have a leaf at right_path.
7313d96b3424SFilipe Manana 	 */
7314d96b3424SFilipe Manana 	if (right_path && right_path->nodes[0])
7315d96b3424SFilipe Manana 		ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
7316d96b3424SFilipe Manana 				&right_path->nodes[0]->bflags));
7317d96b3424SFilipe Manana 
7318ba5e8f2eSJosef Bacik 	if (result == BTRFS_COMPARE_TREE_SAME) {
731916e7549fSJosef Bacik 		if (key->type == BTRFS_INODE_REF_KEY ||
732016e7549fSJosef Bacik 		    key->type == BTRFS_INODE_EXTREF_KEY) {
7321ba5e8f2eSJosef Bacik 			ret = compare_refs(sctx, left_path, key);
7322ba5e8f2eSJosef Bacik 			if (!ret)
7323ba5e8f2eSJosef Bacik 				return 0;
7324ba5e8f2eSJosef Bacik 			if (ret < 0)
7325ba5e8f2eSJosef Bacik 				return ret;
732616e7549fSJosef Bacik 		} else if (key->type == BTRFS_EXTENT_DATA_KEY) {
732716e7549fSJosef Bacik 			return maybe_send_hole(sctx, left_path, key);
732816e7549fSJosef Bacik 		} else {
732916e7549fSJosef Bacik 			return 0;
733016e7549fSJosef Bacik 		}
7331ba5e8f2eSJosef Bacik 		result = BTRFS_COMPARE_TREE_CHANGED;
7332ba5e8f2eSJosef Bacik 		ret = 0;
7333ba5e8f2eSJosef Bacik 	}
7334ba5e8f2eSJosef Bacik 
733531db9f7cSAlexander Block 	sctx->left_path = left_path;
733631db9f7cSAlexander Block 	sctx->right_path = right_path;
733731db9f7cSAlexander Block 	sctx->cmp_key = key;
733831db9f7cSAlexander Block 
733931db9f7cSAlexander Block 	ret = finish_inode_if_needed(sctx, 0);
734031db9f7cSAlexander Block 	if (ret < 0)
734131db9f7cSAlexander Block 		goto out;
734231db9f7cSAlexander Block 
73432981e225SAlexander Block 	/* Ignore non-FS objects */
73442981e225SAlexander Block 	if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
73452981e225SAlexander Block 	    key->objectid == BTRFS_FREE_SPACE_OBJECTID)
73462981e225SAlexander Block 		goto out;
73472981e225SAlexander Block 
734846b2f459SFilipe Manana 	if (key->type == BTRFS_INODE_ITEM_KEY) {
734931db9f7cSAlexander Block 		ret = changed_inode(sctx, result);
735046b2f459SFilipe Manana 	} else if (!sctx->ignore_cur_inode) {
735146b2f459SFilipe Manana 		if (key->type == BTRFS_INODE_REF_KEY ||
735296b5bd77SJan Schmidt 		    key->type == BTRFS_INODE_EXTREF_KEY)
735331db9f7cSAlexander Block 			ret = changed_ref(sctx, result);
735431db9f7cSAlexander Block 		else if (key->type == BTRFS_XATTR_ITEM_KEY)
735531db9f7cSAlexander Block 			ret = changed_xattr(sctx, result);
735631db9f7cSAlexander Block 		else if (key->type == BTRFS_EXTENT_DATA_KEY)
735731db9f7cSAlexander Block 			ret = changed_extent(sctx, result);
735838622010SBoris Burkov 		else if (key->type == BTRFS_VERITY_DESC_ITEM_KEY &&
735938622010SBoris Burkov 			 key->offset == 0)
736038622010SBoris Burkov 			ret = changed_verity(sctx, result);
736146b2f459SFilipe Manana 	}
736231db9f7cSAlexander Block 
736331db9f7cSAlexander Block out:
736431db9f7cSAlexander Block 	return ret;
736531db9f7cSAlexander Block }
736631db9f7cSAlexander Block 
search_key_again(const struct send_ctx * sctx,struct btrfs_root * root,struct btrfs_path * path,const struct btrfs_key * key)7367d96b3424SFilipe Manana static int search_key_again(const struct send_ctx *sctx,
7368d96b3424SFilipe Manana 			    struct btrfs_root *root,
7369d96b3424SFilipe Manana 			    struct btrfs_path *path,
7370d96b3424SFilipe Manana 			    const struct btrfs_key *key)
7371d96b3424SFilipe Manana {
7372d96b3424SFilipe Manana 	int ret;
7373d96b3424SFilipe Manana 
7374d96b3424SFilipe Manana 	if (!path->need_commit_sem)
7375d96b3424SFilipe Manana 		lockdep_assert_held_read(&root->fs_info->commit_root_sem);
7376d96b3424SFilipe Manana 
7377d96b3424SFilipe Manana 	/*
7378d96b3424SFilipe Manana 	 * Roots used for send operations are readonly and no one can add,
7379d96b3424SFilipe Manana 	 * update or remove keys from them, so we should be able to find our
7380d96b3424SFilipe Manana 	 * key again. The only exception is deduplication, which can operate on
7381d96b3424SFilipe Manana 	 * readonly roots and add, update or remove keys to/from them - but at
7382d96b3424SFilipe Manana 	 * the moment we don't allow it to run in parallel with send.
7383d96b3424SFilipe Manana 	 */
7384d96b3424SFilipe Manana 	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
7385d96b3424SFilipe Manana 	ASSERT(ret <= 0);
7386d96b3424SFilipe Manana 	if (ret > 0) {
7387d96b3424SFilipe Manana 		btrfs_print_tree(path->nodes[path->lowest_level], false);
7388d96b3424SFilipe Manana 		btrfs_err(root->fs_info,
7389d96b3424SFilipe Manana "send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
7390d96b3424SFilipe Manana 			  key->objectid, key->type, key->offset,
7391d96b3424SFilipe Manana 			  (root == sctx->parent_root ? "parent" : "send"),
7392d96b3424SFilipe Manana 			  root->root_key.objectid, path->lowest_level,
7393d96b3424SFilipe Manana 			  path->slots[path->lowest_level]);
7394d96b3424SFilipe Manana 		return -EUCLEAN;
7395d96b3424SFilipe Manana 	}
7396d96b3424SFilipe Manana 
7397d96b3424SFilipe Manana 	return ret;
7398d96b3424SFilipe Manana }
7399d96b3424SFilipe Manana 
full_send_tree(struct send_ctx * sctx)740031db9f7cSAlexander Block static int full_send_tree(struct send_ctx *sctx)
740131db9f7cSAlexander Block {
740231db9f7cSAlexander Block 	int ret;
740331db9f7cSAlexander Block 	struct btrfs_root *send_root = sctx->send_root;
740431db9f7cSAlexander Block 	struct btrfs_key key;
7405d96b3424SFilipe Manana 	struct btrfs_fs_info *fs_info = send_root->fs_info;
740631db9f7cSAlexander Block 	struct btrfs_path *path;
740731db9f7cSAlexander Block 
740831db9f7cSAlexander Block 	path = alloc_path_for_send();
740931db9f7cSAlexander Block 	if (!path)
741031db9f7cSAlexander Block 		return -ENOMEM;
7411ace75066SFilipe Manana 	path->reada = READA_FORWARD_ALWAYS;
741231db9f7cSAlexander Block 
741331db9f7cSAlexander Block 	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
741431db9f7cSAlexander Block 	key.type = BTRFS_INODE_ITEM_KEY;
741531db9f7cSAlexander Block 	key.offset = 0;
741631db9f7cSAlexander Block 
7417d96b3424SFilipe Manana 	down_read(&fs_info->commit_root_sem);
7418d96b3424SFilipe Manana 	sctx->last_reloc_trans = fs_info->last_reloc_trans;
7419d96b3424SFilipe Manana 	up_read(&fs_info->commit_root_sem);
7420d96b3424SFilipe Manana 
742131db9f7cSAlexander Block 	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
742231db9f7cSAlexander Block 	if (ret < 0)
742331db9f7cSAlexander Block 		goto out;
742431db9f7cSAlexander Block 	if (ret)
742531db9f7cSAlexander Block 		goto out_finish;
742631db9f7cSAlexander Block 
742731db9f7cSAlexander Block 	while (1) {
7428d96b3424SFilipe Manana 		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
742931db9f7cSAlexander Block 
7430ca5d2ba1SFilipe Manana 		ret = changed_cb(path, NULL, &key,
7431ee8c494fSNikolay Borisov 				 BTRFS_COMPARE_TREE_NEW, sctx);
743231db9f7cSAlexander Block 		if (ret < 0)
743331db9f7cSAlexander Block 			goto out;
743431db9f7cSAlexander Block 
7435d96b3424SFilipe Manana 		down_read(&fs_info->commit_root_sem);
7436d96b3424SFilipe Manana 		if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
7437d96b3424SFilipe Manana 			sctx->last_reloc_trans = fs_info->last_reloc_trans;
7438d96b3424SFilipe Manana 			up_read(&fs_info->commit_root_sem);
7439d96b3424SFilipe Manana 			/*
7440d96b3424SFilipe Manana 			 * A transaction used for relocating a block group was
7441d96b3424SFilipe Manana 			 * committed or is about to finish its commit. Release
7442d96b3424SFilipe Manana 			 * our path (leaf) and restart the search, so that we
7443d96b3424SFilipe Manana 			 * avoid operating on any file extent items that are
7444d96b3424SFilipe Manana 			 * stale, with a disk_bytenr that reflects a pre
7445d96b3424SFilipe Manana 			 * relocation value. This way we avoid as much as
7446d96b3424SFilipe Manana 			 * possible to fallback to regular writes when checking
7447d96b3424SFilipe Manana 			 * if we can clone file ranges.
7448d96b3424SFilipe Manana 			 */
7449d96b3424SFilipe Manana 			btrfs_release_path(path);
7450d96b3424SFilipe Manana 			ret = search_key_again(sctx, send_root, path, &key);
7451d96b3424SFilipe Manana 			if (ret < 0)
7452d96b3424SFilipe Manana 				goto out;
7453d96b3424SFilipe Manana 		} else {
7454d96b3424SFilipe Manana 			up_read(&fs_info->commit_root_sem);
7455d96b3424SFilipe Manana 		}
7456d96b3424SFilipe Manana 
745731db9f7cSAlexander Block 		ret = btrfs_next_item(send_root, path);
745831db9f7cSAlexander Block 		if (ret < 0)
745931db9f7cSAlexander Block 			goto out;
746031db9f7cSAlexander Block 		if (ret) {
746131db9f7cSAlexander Block 			ret  = 0;
746231db9f7cSAlexander Block 			break;
746331db9f7cSAlexander Block 		}
746431db9f7cSAlexander Block 	}
746531db9f7cSAlexander Block 
746631db9f7cSAlexander Block out_finish:
746731db9f7cSAlexander Block 	ret = finish_inode_if_needed(sctx, 1);
746831db9f7cSAlexander Block 
746931db9f7cSAlexander Block out:
747031db9f7cSAlexander Block 	btrfs_free_path(path);
747131db9f7cSAlexander Block 	return ret;
747231db9f7cSAlexander Block }
747331db9f7cSAlexander Block 
replace_node_with_clone(struct btrfs_path * path,int level)7474d96b3424SFilipe Manana static int replace_node_with_clone(struct btrfs_path *path, int level)
7475d96b3424SFilipe Manana {
7476d96b3424SFilipe Manana 	struct extent_buffer *clone;
7477d96b3424SFilipe Manana 
7478d96b3424SFilipe Manana 	clone = btrfs_clone_extent_buffer(path->nodes[level]);
7479d96b3424SFilipe Manana 	if (!clone)
7480d96b3424SFilipe Manana 		return -ENOMEM;
7481d96b3424SFilipe Manana 
7482d96b3424SFilipe Manana 	free_extent_buffer(path->nodes[level]);
7483d96b3424SFilipe Manana 	path->nodes[level] = clone;
7484d96b3424SFilipe Manana 
7485d96b3424SFilipe Manana 	return 0;
7486d96b3424SFilipe Manana }
7487d96b3424SFilipe Manana 
tree_move_down(struct btrfs_path * path,int * level,u64 reada_min_gen)74882ce73c63SFilipe Manana static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen)
748918d0f5c6SDavid Sterba {
749018d0f5c6SDavid Sterba 	struct extent_buffer *eb;
74912ce73c63SFilipe Manana 	struct extent_buffer *parent = path->nodes[*level];
74922ce73c63SFilipe Manana 	int slot = path->slots[*level];
74932ce73c63SFilipe Manana 	const int nritems = btrfs_header_nritems(parent);
74942ce73c63SFilipe Manana 	u64 reada_max;
74952ce73c63SFilipe Manana 	u64 reada_done = 0;
749618d0f5c6SDavid Sterba 
7497d96b3424SFilipe Manana 	lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
7498c7e0e8acSDavid Sterba 	ASSERT(*level != 0);
7499d96b3424SFilipe Manana 
75002ce73c63SFilipe Manana 	eb = btrfs_read_node_slot(parent, slot);
750118d0f5c6SDavid Sterba 	if (IS_ERR(eb))
750218d0f5c6SDavid Sterba 		return PTR_ERR(eb);
750318d0f5c6SDavid Sterba 
75042ce73c63SFilipe Manana 	/*
75052ce73c63SFilipe Manana 	 * Trigger readahead for the next leaves we will process, so that it is
75062ce73c63SFilipe Manana 	 * very likely that when we need them they are already in memory and we
75072ce73c63SFilipe Manana 	 * will not block on disk IO. For nodes we only do readahead for one,
75082ce73c63SFilipe Manana 	 * since the time window between processing nodes is typically larger.
75092ce73c63SFilipe Manana 	 */
75102ce73c63SFilipe Manana 	reada_max = (*level == 1 ? SZ_128K : eb->fs_info->nodesize);
75112ce73c63SFilipe Manana 
75122ce73c63SFilipe Manana 	for (slot++; slot < nritems && reada_done < reada_max; slot++) {
75132ce73c63SFilipe Manana 		if (btrfs_node_ptr_generation(parent, slot) > reada_min_gen) {
75142ce73c63SFilipe Manana 			btrfs_readahead_node_child(parent, slot);
75152ce73c63SFilipe Manana 			reada_done += eb->fs_info->nodesize;
75162ce73c63SFilipe Manana 		}
75172ce73c63SFilipe Manana 	}
75182ce73c63SFilipe Manana 
751918d0f5c6SDavid Sterba 	path->nodes[*level - 1] = eb;
752018d0f5c6SDavid Sterba 	path->slots[*level - 1] = 0;
752118d0f5c6SDavid Sterba 	(*level)--;
7522d96b3424SFilipe Manana 
7523d96b3424SFilipe Manana 	if (*level == 0)
7524d96b3424SFilipe Manana 		return replace_node_with_clone(path, 0);
7525d96b3424SFilipe Manana 
752618d0f5c6SDavid Sterba 	return 0;
752718d0f5c6SDavid Sterba }
752818d0f5c6SDavid Sterba 
tree_move_next_or_upnext(struct btrfs_path * path,int * level,int root_level)752918d0f5c6SDavid Sterba static int tree_move_next_or_upnext(struct btrfs_path *path,
753018d0f5c6SDavid Sterba 				    int *level, int root_level)
753118d0f5c6SDavid Sterba {
753218d0f5c6SDavid Sterba 	int ret = 0;
753318d0f5c6SDavid Sterba 	int nritems;
753418d0f5c6SDavid Sterba 	nritems = btrfs_header_nritems(path->nodes[*level]);
753518d0f5c6SDavid Sterba 
753618d0f5c6SDavid Sterba 	path->slots[*level]++;
753718d0f5c6SDavid Sterba 
753818d0f5c6SDavid Sterba 	while (path->slots[*level] >= nritems) {
7539d96b3424SFilipe Manana 		if (*level == root_level) {
7540d96b3424SFilipe Manana 			path->slots[*level] = nritems - 1;
754118d0f5c6SDavid Sterba 			return -1;
7542d96b3424SFilipe Manana 		}
754318d0f5c6SDavid Sterba 
754418d0f5c6SDavid Sterba 		/* move upnext */
754518d0f5c6SDavid Sterba 		path->slots[*level] = 0;
754618d0f5c6SDavid Sterba 		free_extent_buffer(path->nodes[*level]);
754718d0f5c6SDavid Sterba 		path->nodes[*level] = NULL;
754818d0f5c6SDavid Sterba 		(*level)++;
754918d0f5c6SDavid Sterba 		path->slots[*level]++;
755018d0f5c6SDavid Sterba 
755118d0f5c6SDavid Sterba 		nritems = btrfs_header_nritems(path->nodes[*level]);
755218d0f5c6SDavid Sterba 		ret = 1;
755318d0f5c6SDavid Sterba 	}
755418d0f5c6SDavid Sterba 	return ret;
755518d0f5c6SDavid Sterba }
755618d0f5c6SDavid Sterba 
755718d0f5c6SDavid Sterba /*
755818d0f5c6SDavid Sterba  * Returns 1 if it had to move up and next. 0 is returned if it moved only next
755918d0f5c6SDavid Sterba  * or down.
756018d0f5c6SDavid Sterba  */
tree_advance(struct btrfs_path * path,int * level,int root_level,int allow_down,struct btrfs_key * key,u64 reada_min_gen)756118d0f5c6SDavid Sterba static int tree_advance(struct btrfs_path *path,
756218d0f5c6SDavid Sterba 			int *level, int root_level,
756318d0f5c6SDavid Sterba 			int allow_down,
75642ce73c63SFilipe Manana 			struct btrfs_key *key,
75652ce73c63SFilipe Manana 			u64 reada_min_gen)
756618d0f5c6SDavid Sterba {
756718d0f5c6SDavid Sterba 	int ret;
756818d0f5c6SDavid Sterba 
756918d0f5c6SDavid Sterba 	if (*level == 0 || !allow_down) {
757018d0f5c6SDavid Sterba 		ret = tree_move_next_or_upnext(path, level, root_level);
757118d0f5c6SDavid Sterba 	} else {
75722ce73c63SFilipe Manana 		ret = tree_move_down(path, level, reada_min_gen);
757318d0f5c6SDavid Sterba 	}
7574d96b3424SFilipe Manana 
7575d96b3424SFilipe Manana 	/*
7576d96b3424SFilipe Manana 	 * Even if we have reached the end of a tree, ret is -1, update the key
7577d96b3424SFilipe Manana 	 * anyway, so that in case we need to restart due to a block group
7578d96b3424SFilipe Manana 	 * relocation, we can assert that the last key of the root node still
7579d96b3424SFilipe Manana 	 * exists in the tree.
7580d96b3424SFilipe Manana 	 */
758118d0f5c6SDavid Sterba 	if (*level == 0)
758218d0f5c6SDavid Sterba 		btrfs_item_key_to_cpu(path->nodes[*level], key,
758318d0f5c6SDavid Sterba 				      path->slots[*level]);
758418d0f5c6SDavid Sterba 	else
758518d0f5c6SDavid Sterba 		btrfs_node_key_to_cpu(path->nodes[*level], key,
758618d0f5c6SDavid Sterba 				      path->slots[*level]);
7587d96b3424SFilipe Manana 
758818d0f5c6SDavid Sterba 	return ret;
758918d0f5c6SDavid Sterba }
759018d0f5c6SDavid Sterba 
tree_compare_item(struct btrfs_path * left_path,struct btrfs_path * right_path,char * tmp_buf)759118d0f5c6SDavid Sterba static int tree_compare_item(struct btrfs_path *left_path,
759218d0f5c6SDavid Sterba 			     struct btrfs_path *right_path,
759318d0f5c6SDavid Sterba 			     char *tmp_buf)
759418d0f5c6SDavid Sterba {
759518d0f5c6SDavid Sterba 	int cmp;
759618d0f5c6SDavid Sterba 	int len1, len2;
759718d0f5c6SDavid Sterba 	unsigned long off1, off2;
759818d0f5c6SDavid Sterba 
75993212fa14SJosef Bacik 	len1 = btrfs_item_size(left_path->nodes[0], left_path->slots[0]);
76003212fa14SJosef Bacik 	len2 = btrfs_item_size(right_path->nodes[0], right_path->slots[0]);
760118d0f5c6SDavid Sterba 	if (len1 != len2)
760218d0f5c6SDavid Sterba 		return 1;
760318d0f5c6SDavid Sterba 
760418d0f5c6SDavid Sterba 	off1 = btrfs_item_ptr_offset(left_path->nodes[0], left_path->slots[0]);
760518d0f5c6SDavid Sterba 	off2 = btrfs_item_ptr_offset(right_path->nodes[0],
760618d0f5c6SDavid Sterba 				right_path->slots[0]);
760718d0f5c6SDavid Sterba 
760818d0f5c6SDavid Sterba 	read_extent_buffer(left_path->nodes[0], tmp_buf, off1, len1);
760918d0f5c6SDavid Sterba 
761018d0f5c6SDavid Sterba 	cmp = memcmp_extent_buffer(right_path->nodes[0], tmp_buf, off2, len1);
761118d0f5c6SDavid Sterba 	if (cmp)
761218d0f5c6SDavid Sterba 		return 1;
761318d0f5c6SDavid Sterba 	return 0;
761418d0f5c6SDavid Sterba }
761518d0f5c6SDavid Sterba 
761618d0f5c6SDavid Sterba /*
7617d96b3424SFilipe Manana  * A transaction used for relocating a block group was committed or is about to
7618d96b3424SFilipe Manana  * finish its commit. Release our paths and restart the search, so that we are
7619d96b3424SFilipe Manana  * not using stale extent buffers:
7620d96b3424SFilipe Manana  *
7621d96b3424SFilipe Manana  * 1) For levels > 0, we are only holding references of extent buffers, without
7622d96b3424SFilipe Manana  *    any locks on them, which does not prevent them from having been relocated
7623d96b3424SFilipe Manana  *    and reallocated after the last time we released the commit root semaphore.
7624d96b3424SFilipe Manana  *    The exception are the root nodes, for which we always have a clone, see
7625d96b3424SFilipe Manana  *    the comment at btrfs_compare_trees();
7626d96b3424SFilipe Manana  *
7627d96b3424SFilipe Manana  * 2) For leaves, level 0, we are holding copies (clones) of extent buffers, so
7628d96b3424SFilipe Manana  *    we are safe from the concurrent relocation and reallocation. However they
7629d96b3424SFilipe Manana  *    can have file extent items with a pre relocation disk_bytenr value, so we
7630d96b3424SFilipe Manana  *    restart the start from the current commit roots and clone the new leaves so
7631d96b3424SFilipe Manana  *    that we get the post relocation disk_bytenr values. Not doing so, could
7632d96b3424SFilipe Manana  *    make us clone the wrong data in case there are new extents using the old
7633d96b3424SFilipe Manana  *    disk_bytenr that happen to be shared.
7634d96b3424SFilipe Manana  */
restart_after_relocation(struct btrfs_path * left_path,struct btrfs_path * right_path,const struct btrfs_key * left_key,const struct btrfs_key * right_key,int left_level,int right_level,const struct send_ctx * sctx)7635d96b3424SFilipe Manana static int restart_after_relocation(struct btrfs_path *left_path,
7636d96b3424SFilipe Manana 				    struct btrfs_path *right_path,
7637d96b3424SFilipe Manana 				    const struct btrfs_key *left_key,
7638d96b3424SFilipe Manana 				    const struct btrfs_key *right_key,
7639d96b3424SFilipe Manana 				    int left_level,
7640d96b3424SFilipe Manana 				    int right_level,
7641d96b3424SFilipe Manana 				    const struct send_ctx *sctx)
7642d96b3424SFilipe Manana {
7643d96b3424SFilipe Manana 	int root_level;
7644d96b3424SFilipe Manana 	int ret;
7645d96b3424SFilipe Manana 
7646d96b3424SFilipe Manana 	lockdep_assert_held_read(&sctx->send_root->fs_info->commit_root_sem);
7647d96b3424SFilipe Manana 
7648d96b3424SFilipe Manana 	btrfs_release_path(left_path);
7649d96b3424SFilipe Manana 	btrfs_release_path(right_path);
7650d96b3424SFilipe Manana 
7651d96b3424SFilipe Manana 	/*
7652d96b3424SFilipe Manana 	 * Since keys can not be added or removed to/from our roots because they
7653d96b3424SFilipe Manana 	 * are readonly and we do not allow deduplication to run in parallel
7654d96b3424SFilipe Manana 	 * (which can add, remove or change keys), the layout of the trees should
7655d96b3424SFilipe Manana 	 * not change.
7656d96b3424SFilipe Manana 	 */
7657d96b3424SFilipe Manana 	left_path->lowest_level = left_level;
7658d96b3424SFilipe Manana 	ret = search_key_again(sctx, sctx->send_root, left_path, left_key);
7659d96b3424SFilipe Manana 	if (ret < 0)
7660d96b3424SFilipe Manana 		return ret;
7661d96b3424SFilipe Manana 
7662d96b3424SFilipe Manana 	right_path->lowest_level = right_level;
7663d96b3424SFilipe Manana 	ret = search_key_again(sctx, sctx->parent_root, right_path, right_key);
7664d96b3424SFilipe Manana 	if (ret < 0)
7665d96b3424SFilipe Manana 		return ret;
7666d96b3424SFilipe Manana 
7667d96b3424SFilipe Manana 	/*
7668d96b3424SFilipe Manana 	 * If the lowest level nodes are leaves, clone them so that they can be
7669d96b3424SFilipe Manana 	 * safely used by changed_cb() while not under the protection of the
7670d96b3424SFilipe Manana 	 * commit root semaphore, even if relocation and reallocation happens in
7671d96b3424SFilipe Manana 	 * parallel.
7672d96b3424SFilipe Manana 	 */
7673d96b3424SFilipe Manana 	if (left_level == 0) {
7674d96b3424SFilipe Manana 		ret = replace_node_with_clone(left_path, 0);
7675d96b3424SFilipe Manana 		if (ret < 0)
7676d96b3424SFilipe Manana 			return ret;
7677d96b3424SFilipe Manana 	}
7678d96b3424SFilipe Manana 
7679d96b3424SFilipe Manana 	if (right_level == 0) {
7680d96b3424SFilipe Manana 		ret = replace_node_with_clone(right_path, 0);
7681d96b3424SFilipe Manana 		if (ret < 0)
7682d96b3424SFilipe Manana 			return ret;
7683d96b3424SFilipe Manana 	}
7684d96b3424SFilipe Manana 
7685d96b3424SFilipe Manana 	/*
7686d96b3424SFilipe Manana 	 * Now clone the root nodes (unless they happen to be the leaves we have
7687d96b3424SFilipe Manana 	 * already cloned). This is to protect against concurrent snapshotting of
7688d96b3424SFilipe Manana 	 * the send and parent roots (see the comment at btrfs_compare_trees()).
7689d96b3424SFilipe Manana 	 */
7690d96b3424SFilipe Manana 	root_level = btrfs_header_level(sctx->send_root->commit_root);
7691d96b3424SFilipe Manana 	if (root_level > 0) {
7692d96b3424SFilipe Manana 		ret = replace_node_with_clone(left_path, root_level);
7693d96b3424SFilipe Manana 		if (ret < 0)
7694d96b3424SFilipe Manana 			return ret;
7695d96b3424SFilipe Manana 	}
7696d96b3424SFilipe Manana 
7697d96b3424SFilipe Manana 	root_level = btrfs_header_level(sctx->parent_root->commit_root);
7698d96b3424SFilipe Manana 	if (root_level > 0) {
7699d96b3424SFilipe Manana 		ret = replace_node_with_clone(right_path, root_level);
7700d96b3424SFilipe Manana 		if (ret < 0)
7701d96b3424SFilipe Manana 			return ret;
7702d96b3424SFilipe Manana 	}
7703d96b3424SFilipe Manana 
7704d96b3424SFilipe Manana 	return 0;
7705d96b3424SFilipe Manana }
7706d96b3424SFilipe Manana 
7707d96b3424SFilipe Manana /*
770818d0f5c6SDavid Sterba  * This function compares two trees and calls the provided callback for
770918d0f5c6SDavid Sterba  * every changed/new/deleted item it finds.
771018d0f5c6SDavid Sterba  * If shared tree blocks are encountered, whole subtrees are skipped, making
771118d0f5c6SDavid Sterba  * the compare pretty fast on snapshotted subvolumes.
771218d0f5c6SDavid Sterba  *
771318d0f5c6SDavid Sterba  * This currently works on commit roots only. As commit roots are read only,
771418d0f5c6SDavid Sterba  * we don't do any locking. The commit roots are protected with transactions.
771518d0f5c6SDavid Sterba  * Transactions are ended and rejoined when a commit is tried in between.
771618d0f5c6SDavid Sterba  *
771718d0f5c6SDavid Sterba  * This function checks for modifications done to the trees while comparing.
771818d0f5c6SDavid Sterba  * If it detects a change, it aborts immediately.
771918d0f5c6SDavid Sterba  */
btrfs_compare_trees(struct btrfs_root * left_root,struct btrfs_root * right_root,struct send_ctx * sctx)772018d0f5c6SDavid Sterba static int btrfs_compare_trees(struct btrfs_root *left_root,
772188980383SRoman Anasal 			struct btrfs_root *right_root, struct send_ctx *sctx)
772218d0f5c6SDavid Sterba {
772318d0f5c6SDavid Sterba 	struct btrfs_fs_info *fs_info = left_root->fs_info;
772418d0f5c6SDavid Sterba 	int ret;
772518d0f5c6SDavid Sterba 	int cmp;
772618d0f5c6SDavid Sterba 	struct btrfs_path *left_path = NULL;
772718d0f5c6SDavid Sterba 	struct btrfs_path *right_path = NULL;
772818d0f5c6SDavid Sterba 	struct btrfs_key left_key;
772918d0f5c6SDavid Sterba 	struct btrfs_key right_key;
773018d0f5c6SDavid Sterba 	char *tmp_buf = NULL;
773118d0f5c6SDavid Sterba 	int left_root_level;
773218d0f5c6SDavid Sterba 	int right_root_level;
773318d0f5c6SDavid Sterba 	int left_level;
773418d0f5c6SDavid Sterba 	int right_level;
7735d96b3424SFilipe Manana 	int left_end_reached = 0;
7736d96b3424SFilipe Manana 	int right_end_reached = 0;
7737d96b3424SFilipe Manana 	int advance_left = 0;
7738d96b3424SFilipe Manana 	int advance_right = 0;
773918d0f5c6SDavid Sterba 	u64 left_blockptr;
774018d0f5c6SDavid Sterba 	u64 right_blockptr;
774118d0f5c6SDavid Sterba 	u64 left_gen;
774218d0f5c6SDavid Sterba 	u64 right_gen;
77432ce73c63SFilipe Manana 	u64 reada_min_gen;
774418d0f5c6SDavid Sterba 
774518d0f5c6SDavid Sterba 	left_path = btrfs_alloc_path();
774618d0f5c6SDavid Sterba 	if (!left_path) {
774718d0f5c6SDavid Sterba 		ret = -ENOMEM;
774818d0f5c6SDavid Sterba 		goto out;
774918d0f5c6SDavid Sterba 	}
775018d0f5c6SDavid Sterba 	right_path = btrfs_alloc_path();
775118d0f5c6SDavid Sterba 	if (!right_path) {
775218d0f5c6SDavid Sterba 		ret = -ENOMEM;
775318d0f5c6SDavid Sterba 		goto out;
775418d0f5c6SDavid Sterba 	}
775518d0f5c6SDavid Sterba 
775618d0f5c6SDavid Sterba 	tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
775718d0f5c6SDavid Sterba 	if (!tmp_buf) {
775818d0f5c6SDavid Sterba 		ret = -ENOMEM;
775918d0f5c6SDavid Sterba 		goto out;
776018d0f5c6SDavid Sterba 	}
776118d0f5c6SDavid Sterba 
776218d0f5c6SDavid Sterba 	left_path->search_commit_root = 1;
776318d0f5c6SDavid Sterba 	left_path->skip_locking = 1;
776418d0f5c6SDavid Sterba 	right_path->search_commit_root = 1;
776518d0f5c6SDavid Sterba 	right_path->skip_locking = 1;
776618d0f5c6SDavid Sterba 
776718d0f5c6SDavid Sterba 	/*
776818d0f5c6SDavid Sterba 	 * Strategy: Go to the first items of both trees. Then do
776918d0f5c6SDavid Sterba 	 *
777018d0f5c6SDavid Sterba 	 * If both trees are at level 0
777118d0f5c6SDavid Sterba 	 *   Compare keys of current items
777218d0f5c6SDavid Sterba 	 *     If left < right treat left item as new, advance left tree
777318d0f5c6SDavid Sterba 	 *       and repeat
777418d0f5c6SDavid Sterba 	 *     If left > right treat right item as deleted, advance right tree
777518d0f5c6SDavid Sterba 	 *       and repeat
777618d0f5c6SDavid Sterba 	 *     If left == right do deep compare of items, treat as changed if
777718d0f5c6SDavid Sterba 	 *       needed, advance both trees and repeat
777818d0f5c6SDavid Sterba 	 * If both trees are at the same level but not at level 0
777918d0f5c6SDavid Sterba 	 *   Compare keys of current nodes/leafs
778018d0f5c6SDavid Sterba 	 *     If left < right advance left tree and repeat
778118d0f5c6SDavid Sterba 	 *     If left > right advance right tree and repeat
778218d0f5c6SDavid Sterba 	 *     If left == right compare blockptrs of the next nodes/leafs
778318d0f5c6SDavid Sterba 	 *       If they match advance both trees but stay at the same level
778418d0f5c6SDavid Sterba 	 *         and repeat
778518d0f5c6SDavid Sterba 	 *       If they don't match advance both trees while allowing to go
778618d0f5c6SDavid Sterba 	 *         deeper and repeat
778718d0f5c6SDavid Sterba 	 * If tree levels are different
778818d0f5c6SDavid Sterba 	 *   Advance the tree that needs it and repeat
778918d0f5c6SDavid Sterba 	 *
779018d0f5c6SDavid Sterba 	 * Advancing a tree means:
779118d0f5c6SDavid Sterba 	 *   If we are at level 0, try to go to the next slot. If that's not
779218d0f5c6SDavid Sterba 	 *   possible, go one level up and repeat. Stop when we found a level
779318d0f5c6SDavid Sterba 	 *   where we could go to the next slot. We may at this point be on a
779418d0f5c6SDavid Sterba 	 *   node or a leaf.
779518d0f5c6SDavid Sterba 	 *
779618d0f5c6SDavid Sterba 	 *   If we are not at level 0 and not on shared tree blocks, go one
779718d0f5c6SDavid Sterba 	 *   level deeper.
779818d0f5c6SDavid Sterba 	 *
779918d0f5c6SDavid Sterba 	 *   If we are not at level 0 and on shared tree blocks, go one slot to
780018d0f5c6SDavid Sterba 	 *   the right if possible or go up and right.
780118d0f5c6SDavid Sterba 	 */
780218d0f5c6SDavid Sterba 
780318d0f5c6SDavid Sterba 	down_read(&fs_info->commit_root_sem);
780418d0f5c6SDavid Sterba 	left_level = btrfs_header_level(left_root->commit_root);
780518d0f5c6SDavid Sterba 	left_root_level = left_level;
7806d96b3424SFilipe Manana 	/*
7807d96b3424SFilipe Manana 	 * We clone the root node of the send and parent roots to prevent races
7808d96b3424SFilipe Manana 	 * with snapshot creation of these roots. Snapshot creation COWs the
7809d96b3424SFilipe Manana 	 * root node of a tree, so after the transaction is committed the old
7810d96b3424SFilipe Manana 	 * extent can be reallocated while this send operation is still ongoing.
7811d96b3424SFilipe Manana 	 * So we clone them, under the commit root semaphore, to be race free.
7812d96b3424SFilipe Manana 	 */
781318d0f5c6SDavid Sterba 	left_path->nodes[left_level] =
781418d0f5c6SDavid Sterba 			btrfs_clone_extent_buffer(left_root->commit_root);
781518d0f5c6SDavid Sterba 	if (!left_path->nodes[left_level]) {
781618d0f5c6SDavid Sterba 		ret = -ENOMEM;
7817d96b3424SFilipe Manana 		goto out_unlock;
781818d0f5c6SDavid Sterba 	}
781918d0f5c6SDavid Sterba 
782018d0f5c6SDavid Sterba 	right_level = btrfs_header_level(right_root->commit_root);
782118d0f5c6SDavid Sterba 	right_root_level = right_level;
782218d0f5c6SDavid Sterba 	right_path->nodes[right_level] =
782318d0f5c6SDavid Sterba 			btrfs_clone_extent_buffer(right_root->commit_root);
782418d0f5c6SDavid Sterba 	if (!right_path->nodes[right_level]) {
782518d0f5c6SDavid Sterba 		ret = -ENOMEM;
7826d96b3424SFilipe Manana 		goto out_unlock;
782718d0f5c6SDavid Sterba 	}
78282ce73c63SFilipe Manana 	/*
78292ce73c63SFilipe Manana 	 * Our right root is the parent root, while the left root is the "send"
78302ce73c63SFilipe Manana 	 * root. We know that all new nodes/leaves in the left root must have
78312ce73c63SFilipe Manana 	 * a generation greater than the right root's generation, so we trigger
78322ce73c63SFilipe Manana 	 * readahead for those nodes and leaves of the left root, as we know we
78332ce73c63SFilipe Manana 	 * will need to read them at some point.
78342ce73c63SFilipe Manana 	 */
78352ce73c63SFilipe Manana 	reada_min_gen = btrfs_header_generation(right_root->commit_root);
783618d0f5c6SDavid Sterba 
783718d0f5c6SDavid Sterba 	if (left_level == 0)
783818d0f5c6SDavid Sterba 		btrfs_item_key_to_cpu(left_path->nodes[left_level],
783918d0f5c6SDavid Sterba 				&left_key, left_path->slots[left_level]);
784018d0f5c6SDavid Sterba 	else
784118d0f5c6SDavid Sterba 		btrfs_node_key_to_cpu(left_path->nodes[left_level],
784218d0f5c6SDavid Sterba 				&left_key, left_path->slots[left_level]);
784318d0f5c6SDavid Sterba 	if (right_level == 0)
784418d0f5c6SDavid Sterba 		btrfs_item_key_to_cpu(right_path->nodes[right_level],
784518d0f5c6SDavid Sterba 				&right_key, right_path->slots[right_level]);
784618d0f5c6SDavid Sterba 	else
784718d0f5c6SDavid Sterba 		btrfs_node_key_to_cpu(right_path->nodes[right_level],
784818d0f5c6SDavid Sterba 				&right_key, right_path->slots[right_level]);
784918d0f5c6SDavid Sterba 
7850d96b3424SFilipe Manana 	sctx->last_reloc_trans = fs_info->last_reloc_trans;
785118d0f5c6SDavid Sterba 
785218d0f5c6SDavid Sterba 	while (1) {
7853d96b3424SFilipe Manana 		if (need_resched() ||
7854d96b3424SFilipe Manana 		    rwsem_is_contended(&fs_info->commit_root_sem)) {
7855d96b3424SFilipe Manana 			up_read(&fs_info->commit_root_sem);
78566af112b1SNikolay Borisov 			cond_resched();
7857d96b3424SFilipe Manana 			down_read(&fs_info->commit_root_sem);
7858d96b3424SFilipe Manana 		}
7859d96b3424SFilipe Manana 
7860d96b3424SFilipe Manana 		if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
7861d96b3424SFilipe Manana 			ret = restart_after_relocation(left_path, right_path,
7862d96b3424SFilipe Manana 						       &left_key, &right_key,
7863d96b3424SFilipe Manana 						       left_level, right_level,
7864d96b3424SFilipe Manana 						       sctx);
7865d96b3424SFilipe Manana 			if (ret < 0)
7866d96b3424SFilipe Manana 				goto out_unlock;
7867d96b3424SFilipe Manana 			sctx->last_reloc_trans = fs_info->last_reloc_trans;
7868d96b3424SFilipe Manana 		}
7869d96b3424SFilipe Manana 
787018d0f5c6SDavid Sterba 		if (advance_left && !left_end_reached) {
787118d0f5c6SDavid Sterba 			ret = tree_advance(left_path, &left_level,
787218d0f5c6SDavid Sterba 					left_root_level,
787318d0f5c6SDavid Sterba 					advance_left != ADVANCE_ONLY_NEXT,
78742ce73c63SFilipe Manana 					&left_key, reada_min_gen);
787518d0f5c6SDavid Sterba 			if (ret == -1)
787618d0f5c6SDavid Sterba 				left_end_reached = ADVANCE;
787718d0f5c6SDavid Sterba 			else if (ret < 0)
7878d96b3424SFilipe Manana 				goto out_unlock;
787918d0f5c6SDavid Sterba 			advance_left = 0;
788018d0f5c6SDavid Sterba 		}
788118d0f5c6SDavid Sterba 		if (advance_right && !right_end_reached) {
788218d0f5c6SDavid Sterba 			ret = tree_advance(right_path, &right_level,
788318d0f5c6SDavid Sterba 					right_root_level,
788418d0f5c6SDavid Sterba 					advance_right != ADVANCE_ONLY_NEXT,
78852ce73c63SFilipe Manana 					&right_key, reada_min_gen);
788618d0f5c6SDavid Sterba 			if (ret == -1)
788718d0f5c6SDavid Sterba 				right_end_reached = ADVANCE;
788818d0f5c6SDavid Sterba 			else if (ret < 0)
7889d96b3424SFilipe Manana 				goto out_unlock;
789018d0f5c6SDavid Sterba 			advance_right = 0;
789118d0f5c6SDavid Sterba 		}
789218d0f5c6SDavid Sterba 
789318d0f5c6SDavid Sterba 		if (left_end_reached && right_end_reached) {
789418d0f5c6SDavid Sterba 			ret = 0;
7895d96b3424SFilipe Manana 			goto out_unlock;
789618d0f5c6SDavid Sterba 		} else if (left_end_reached) {
789718d0f5c6SDavid Sterba 			if (right_level == 0) {
7898d96b3424SFilipe Manana 				up_read(&fs_info->commit_root_sem);
789918d0f5c6SDavid Sterba 				ret = changed_cb(left_path, right_path,
790018d0f5c6SDavid Sterba 						&right_key,
790118d0f5c6SDavid Sterba 						BTRFS_COMPARE_TREE_DELETED,
790288980383SRoman Anasal 						sctx);
790318d0f5c6SDavid Sterba 				if (ret < 0)
790418d0f5c6SDavid Sterba 					goto out;
7905d96b3424SFilipe Manana 				down_read(&fs_info->commit_root_sem);
790618d0f5c6SDavid Sterba 			}
790718d0f5c6SDavid Sterba 			advance_right = ADVANCE;
790818d0f5c6SDavid Sterba 			continue;
790918d0f5c6SDavid Sterba 		} else if (right_end_reached) {
791018d0f5c6SDavid Sterba 			if (left_level == 0) {
7911d96b3424SFilipe Manana 				up_read(&fs_info->commit_root_sem);
791218d0f5c6SDavid Sterba 				ret = changed_cb(left_path, right_path,
791318d0f5c6SDavid Sterba 						&left_key,
791418d0f5c6SDavid Sterba 						BTRFS_COMPARE_TREE_NEW,
791588980383SRoman Anasal 						sctx);
791618d0f5c6SDavid Sterba 				if (ret < 0)
791718d0f5c6SDavid Sterba 					goto out;
7918d96b3424SFilipe Manana 				down_read(&fs_info->commit_root_sem);
791918d0f5c6SDavid Sterba 			}
792018d0f5c6SDavid Sterba 			advance_left = ADVANCE;
792118d0f5c6SDavid Sterba 			continue;
792218d0f5c6SDavid Sterba 		}
792318d0f5c6SDavid Sterba 
792418d0f5c6SDavid Sterba 		if (left_level == 0 && right_level == 0) {
7925d96b3424SFilipe Manana 			up_read(&fs_info->commit_root_sem);
792618d0f5c6SDavid Sterba 			cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
792718d0f5c6SDavid Sterba 			if (cmp < 0) {
792818d0f5c6SDavid Sterba 				ret = changed_cb(left_path, right_path,
792918d0f5c6SDavid Sterba 						&left_key,
793018d0f5c6SDavid Sterba 						BTRFS_COMPARE_TREE_NEW,
793188980383SRoman Anasal 						sctx);
793218d0f5c6SDavid Sterba 				advance_left = ADVANCE;
793318d0f5c6SDavid Sterba 			} else if (cmp > 0) {
793418d0f5c6SDavid Sterba 				ret = changed_cb(left_path, right_path,
793518d0f5c6SDavid Sterba 						&right_key,
793618d0f5c6SDavid Sterba 						BTRFS_COMPARE_TREE_DELETED,
793788980383SRoman Anasal 						sctx);
793818d0f5c6SDavid Sterba 				advance_right = ADVANCE;
793918d0f5c6SDavid Sterba 			} else {
794018d0f5c6SDavid Sterba 				enum btrfs_compare_tree_result result;
794118d0f5c6SDavid Sterba 
794218d0f5c6SDavid Sterba 				WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
794318d0f5c6SDavid Sterba 				ret = tree_compare_item(left_path, right_path,
794418d0f5c6SDavid Sterba 							tmp_buf);
794518d0f5c6SDavid Sterba 				if (ret)
794618d0f5c6SDavid Sterba 					result = BTRFS_COMPARE_TREE_CHANGED;
794718d0f5c6SDavid Sterba 				else
794818d0f5c6SDavid Sterba 					result = BTRFS_COMPARE_TREE_SAME;
794918d0f5c6SDavid Sterba 				ret = changed_cb(left_path, right_path,
795088980383SRoman Anasal 						 &left_key, result, sctx);
795118d0f5c6SDavid Sterba 				advance_left = ADVANCE;
795218d0f5c6SDavid Sterba 				advance_right = ADVANCE;
795318d0f5c6SDavid Sterba 			}
7954d96b3424SFilipe Manana 
7955d96b3424SFilipe Manana 			if (ret < 0)
7956d96b3424SFilipe Manana 				goto out;
7957d96b3424SFilipe Manana 			down_read(&fs_info->commit_root_sem);
795818d0f5c6SDavid Sterba 		} else if (left_level == right_level) {
795918d0f5c6SDavid Sterba 			cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
796018d0f5c6SDavid Sterba 			if (cmp < 0) {
796118d0f5c6SDavid Sterba 				advance_left = ADVANCE;
796218d0f5c6SDavid Sterba 			} else if (cmp > 0) {
796318d0f5c6SDavid Sterba 				advance_right = ADVANCE;
796418d0f5c6SDavid Sterba 			} else {
796518d0f5c6SDavid Sterba 				left_blockptr = btrfs_node_blockptr(
796618d0f5c6SDavid Sterba 						left_path->nodes[left_level],
796718d0f5c6SDavid Sterba 						left_path->slots[left_level]);
796818d0f5c6SDavid Sterba 				right_blockptr = btrfs_node_blockptr(
796918d0f5c6SDavid Sterba 						right_path->nodes[right_level],
797018d0f5c6SDavid Sterba 						right_path->slots[right_level]);
797118d0f5c6SDavid Sterba 				left_gen = btrfs_node_ptr_generation(
797218d0f5c6SDavid Sterba 						left_path->nodes[left_level],
797318d0f5c6SDavid Sterba 						left_path->slots[left_level]);
797418d0f5c6SDavid Sterba 				right_gen = btrfs_node_ptr_generation(
797518d0f5c6SDavid Sterba 						right_path->nodes[right_level],
797618d0f5c6SDavid Sterba 						right_path->slots[right_level]);
797718d0f5c6SDavid Sterba 				if (left_blockptr == right_blockptr &&
797818d0f5c6SDavid Sterba 				    left_gen == right_gen) {
797918d0f5c6SDavid Sterba 					/*
798018d0f5c6SDavid Sterba 					 * As we're on a shared block, don't
798118d0f5c6SDavid Sterba 					 * allow to go deeper.
798218d0f5c6SDavid Sterba 					 */
798318d0f5c6SDavid Sterba 					advance_left = ADVANCE_ONLY_NEXT;
798418d0f5c6SDavid Sterba 					advance_right = ADVANCE_ONLY_NEXT;
798518d0f5c6SDavid Sterba 				} else {
798618d0f5c6SDavid Sterba 					advance_left = ADVANCE;
798718d0f5c6SDavid Sterba 					advance_right = ADVANCE;
798818d0f5c6SDavid Sterba 				}
798918d0f5c6SDavid Sterba 			}
799018d0f5c6SDavid Sterba 		} else if (left_level < right_level) {
799118d0f5c6SDavid Sterba 			advance_right = ADVANCE;
799218d0f5c6SDavid Sterba 		} else {
799318d0f5c6SDavid Sterba 			advance_left = ADVANCE;
799418d0f5c6SDavid Sterba 		}
799518d0f5c6SDavid Sterba 	}
799618d0f5c6SDavid Sterba 
7997d96b3424SFilipe Manana out_unlock:
7998d96b3424SFilipe Manana 	up_read(&fs_info->commit_root_sem);
799918d0f5c6SDavid Sterba out:
800018d0f5c6SDavid Sterba 	btrfs_free_path(left_path);
800118d0f5c6SDavid Sterba 	btrfs_free_path(right_path);
800218d0f5c6SDavid Sterba 	kvfree(tmp_buf);
800318d0f5c6SDavid Sterba 	return ret;
800418d0f5c6SDavid Sterba }
800518d0f5c6SDavid Sterba 
send_subvol(struct send_ctx * sctx)800631db9f7cSAlexander Block static int send_subvol(struct send_ctx *sctx)
800731db9f7cSAlexander Block {
800831db9f7cSAlexander Block 	int ret;
800931db9f7cSAlexander Block 
8010c2c71324SStefan Behrens 	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
801131db9f7cSAlexander Block 		ret = send_header(sctx);
801231db9f7cSAlexander Block 		if (ret < 0)
801331db9f7cSAlexander Block 			goto out;
8014c2c71324SStefan Behrens 	}
801531db9f7cSAlexander Block 
801631db9f7cSAlexander Block 	ret = send_subvol_begin(sctx);
801731db9f7cSAlexander Block 	if (ret < 0)
801831db9f7cSAlexander Block 		goto out;
801931db9f7cSAlexander Block 
802031db9f7cSAlexander Block 	if (sctx->parent_root) {
80211b51d6fcSDavid Sterba 		ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, sctx);
802231db9f7cSAlexander Block 		if (ret < 0)
802331db9f7cSAlexander Block 			goto out;
802431db9f7cSAlexander Block 		ret = finish_inode_if_needed(sctx, 1);
802531db9f7cSAlexander Block 		if (ret < 0)
802631db9f7cSAlexander Block 			goto out;
802731db9f7cSAlexander Block 	} else {
802831db9f7cSAlexander Block 		ret = full_send_tree(sctx);
802931db9f7cSAlexander Block 		if (ret < 0)
803031db9f7cSAlexander Block 			goto out;
803131db9f7cSAlexander Block 	}
803231db9f7cSAlexander Block 
803331db9f7cSAlexander Block out:
803431db9f7cSAlexander Block 	free_recorded_refs(sctx);
803531db9f7cSAlexander Block 	return ret;
803631db9f7cSAlexander Block }
803731db9f7cSAlexander Block 
8038e5fa8f86SFilipe Manana /*
8039e5fa8f86SFilipe Manana  * If orphan cleanup did remove any orphans from a root, it means the tree
8040e5fa8f86SFilipe Manana  * was modified and therefore the commit root is not the same as the current
8041e5fa8f86SFilipe Manana  * root anymore. This is a problem, because send uses the commit root and
8042e5fa8f86SFilipe Manana  * therefore can see inode items that don't exist in the current root anymore,
8043e5fa8f86SFilipe Manana  * and for example make calls to btrfs_iget, which will do tree lookups based
8044e5fa8f86SFilipe Manana  * on the current root and not on the commit root. Those lookups will fail,
8045e5fa8f86SFilipe Manana  * returning a -ESTALE error, and making send fail with that error. So make
8046e5fa8f86SFilipe Manana  * sure a send does not see any orphans we have just removed, and that it will
8047e5fa8f86SFilipe Manana  * see the same inodes regardless of whether a transaction commit happened
8048e5fa8f86SFilipe Manana  * before it started (meaning that the commit root will be the same as the
8049e5fa8f86SFilipe Manana  * current root) or not.
8050e5fa8f86SFilipe Manana  */
ensure_commit_roots_uptodate(struct send_ctx * sctx)8051e5fa8f86SFilipe Manana static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
8052e5fa8f86SFilipe Manana {
8053e5fa8f86SFilipe Manana 	int i;
8054e5fa8f86SFilipe Manana 	struct btrfs_trans_handle *trans = NULL;
8055e5fa8f86SFilipe Manana 
8056e5fa8f86SFilipe Manana again:
8057e5fa8f86SFilipe Manana 	if (sctx->parent_root &&
8058e5fa8f86SFilipe Manana 	    sctx->parent_root->node != sctx->parent_root->commit_root)
8059e5fa8f86SFilipe Manana 		goto commit_trans;
8060e5fa8f86SFilipe Manana 
8061e5fa8f86SFilipe Manana 	for (i = 0; i < sctx->clone_roots_cnt; i++)
8062e5fa8f86SFilipe Manana 		if (sctx->clone_roots[i].root->node !=
8063e5fa8f86SFilipe Manana 		    sctx->clone_roots[i].root->commit_root)
8064e5fa8f86SFilipe Manana 			goto commit_trans;
8065e5fa8f86SFilipe Manana 
8066e5fa8f86SFilipe Manana 	if (trans)
80673a45bb20SJeff Mahoney 		return btrfs_end_transaction(trans);
8068e5fa8f86SFilipe Manana 
8069e5fa8f86SFilipe Manana 	return 0;
8070e5fa8f86SFilipe Manana 
8071e5fa8f86SFilipe Manana commit_trans:
8072e5fa8f86SFilipe Manana 	/* Use any root, all fs roots will get their commit roots updated. */
8073e5fa8f86SFilipe Manana 	if (!trans) {
8074e5fa8f86SFilipe Manana 		trans = btrfs_join_transaction(sctx->send_root);
8075e5fa8f86SFilipe Manana 		if (IS_ERR(trans))
8076e5fa8f86SFilipe Manana 			return PTR_ERR(trans);
8077e5fa8f86SFilipe Manana 		goto again;
8078e5fa8f86SFilipe Manana 	}
8079e5fa8f86SFilipe Manana 
80803a45bb20SJeff Mahoney 	return btrfs_commit_transaction(trans);
8081e5fa8f86SFilipe Manana }
8082e5fa8f86SFilipe Manana 
80839f89d5deSFilipe Manana /*
80849f89d5deSFilipe Manana  * Make sure any existing dellaloc is flushed for any root used by a send
80859f89d5deSFilipe Manana  * operation so that we do not miss any data and we do not race with writeback
80869f89d5deSFilipe Manana  * finishing and changing a tree while send is using the tree. This could
80879f89d5deSFilipe Manana  * happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and
80889f89d5deSFilipe Manana  * a send operation then uses the subvolume.
80899f89d5deSFilipe Manana  * After flushing delalloc ensure_commit_roots_uptodate() must be called.
80909f89d5deSFilipe Manana  */
flush_delalloc_roots(struct send_ctx * sctx)80919f89d5deSFilipe Manana static int flush_delalloc_roots(struct send_ctx *sctx)
80929f89d5deSFilipe Manana {
80939f89d5deSFilipe Manana 	struct btrfs_root *root = sctx->parent_root;
80949f89d5deSFilipe Manana 	int ret;
80959f89d5deSFilipe Manana 	int i;
80969f89d5deSFilipe Manana 
80979f89d5deSFilipe Manana 	if (root) {
8098f9baa501SFilipe Manana 		ret = btrfs_start_delalloc_snapshot(root, false);
80999f89d5deSFilipe Manana 		if (ret)
81009f89d5deSFilipe Manana 			return ret;
81019f89d5deSFilipe Manana 		btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
81029f89d5deSFilipe Manana 	}
81039f89d5deSFilipe Manana 
81049f89d5deSFilipe Manana 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
81059f89d5deSFilipe Manana 		root = sctx->clone_roots[i].root;
8106f9baa501SFilipe Manana 		ret = btrfs_start_delalloc_snapshot(root, false);
81079f89d5deSFilipe Manana 		if (ret)
81089f89d5deSFilipe Manana 			return ret;
81099f89d5deSFilipe Manana 		btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
81109f89d5deSFilipe Manana 	}
81119f89d5deSFilipe Manana 
81129f89d5deSFilipe Manana 	return 0;
81139f89d5deSFilipe Manana }
81149f89d5deSFilipe Manana 
btrfs_root_dec_send_in_progress(struct btrfs_root * root)811566ef7d65SDavid Sterba static void btrfs_root_dec_send_in_progress(struct btrfs_root* root)
811666ef7d65SDavid Sterba {
811766ef7d65SDavid Sterba 	spin_lock(&root->root_item_lock);
811866ef7d65SDavid Sterba 	root->send_in_progress--;
811966ef7d65SDavid Sterba 	/*
812066ef7d65SDavid Sterba 	 * Not much left to do, we don't know why it's unbalanced and
812166ef7d65SDavid Sterba 	 * can't blindly reset it to 0.
812266ef7d65SDavid Sterba 	 */
812366ef7d65SDavid Sterba 	if (root->send_in_progress < 0)
812466ef7d65SDavid Sterba 		btrfs_err(root->fs_info,
8125f5686e3aSColin Ian King 			  "send_in_progress unbalanced %d root %llu",
812666ef7d65SDavid Sterba 			  root->send_in_progress, root->root_key.objectid);
812766ef7d65SDavid Sterba 	spin_unlock(&root->root_item_lock);
812866ef7d65SDavid Sterba }
812966ef7d65SDavid Sterba 
dedupe_in_progress_warn(const struct btrfs_root * root)813062d54f3aSFilipe Manana static void dedupe_in_progress_warn(const struct btrfs_root *root)
813162d54f3aSFilipe Manana {
813262d54f3aSFilipe Manana 	btrfs_warn_rl(root->fs_info,
813362d54f3aSFilipe Manana "cannot use root %llu for send while deduplications on it are in progress (%d in progress)",
813462d54f3aSFilipe Manana 		      root->root_key.objectid, root->dedupe_in_progress);
813562d54f3aSFilipe Manana }
813662d54f3aSFilipe Manana 
btrfs_ioctl_send(struct inode * inode,struct btrfs_ioctl_send_args * arg)81379ad12305SSahil Kang long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
813831db9f7cSAlexander Block {
813931db9f7cSAlexander Block 	int ret = 0;
81409ad12305SSahil Kang 	struct btrfs_root *send_root = BTRFS_I(inode)->root;
81410b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = send_root->fs_info;
814231db9f7cSAlexander Block 	struct btrfs_root *clone_root;
814331db9f7cSAlexander Block 	struct send_ctx *sctx = NULL;
814431db9f7cSAlexander Block 	u32 i;
814531db9f7cSAlexander Block 	u64 *clone_sources_tmp = NULL;
81462c686537SDavid Sterba 	int clone_sources_to_rollback = 0;
8147bae12df9SDenis Efremov 	size_t alloc_size;
8148896c14f9SWang Shilong 	int sort_clone_roots = 0;
81493e49363bSFilipe Manana 	struct btrfs_lru_cache_entry *entry;
81503e49363bSFilipe Manana 	struct btrfs_lru_cache_entry *tmp;
815131db9f7cSAlexander Block 
815231db9f7cSAlexander Block 	if (!capable(CAP_SYS_ADMIN))
815331db9f7cSAlexander Block 		return -EPERM;
815431db9f7cSAlexander Block 
8155139f807aSJosef Bacik 	/*
81562c686537SDavid Sterba 	 * The subvolume must remain read-only during send, protect against
8157521e0546SDavid Sterba 	 * making it RW. This also protects against deletion.
81582c686537SDavid Sterba 	 */
81592c686537SDavid Sterba 	spin_lock(&send_root->root_item_lock);
816062d54f3aSFilipe Manana 	if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
816162d54f3aSFilipe Manana 		dedupe_in_progress_warn(send_root);
816262d54f3aSFilipe Manana 		spin_unlock(&send_root->root_item_lock);
816362d54f3aSFilipe Manana 		return -EAGAIN;
816462d54f3aSFilipe Manana 	}
81652c686537SDavid Sterba 	send_root->send_in_progress++;
81662c686537SDavid Sterba 	spin_unlock(&send_root->root_item_lock);
81672c686537SDavid Sterba 
81682c686537SDavid Sterba 	/*
81692c686537SDavid Sterba 	 * Userspace tools do the checks and warn the user if it's
81702c686537SDavid Sterba 	 * not RO.
81712c686537SDavid Sterba 	 */
81722c686537SDavid Sterba 	if (!btrfs_root_readonly(send_root)) {
81732c686537SDavid Sterba 		ret = -EPERM;
81742c686537SDavid Sterba 		goto out;
81752c686537SDavid Sterba 	}
81762c686537SDavid Sterba 
8177457ae726SDan Carpenter 	/*
8178457ae726SDan Carpenter 	 * Check that we don't overflow at later allocations, we request
8179457ae726SDan Carpenter 	 * clone_sources_count + 1 items, and compare to unsigned long inside
818033e17b3fSDavid Sterba 	 * access_ok. Also set an upper limit for allocation size so this can't
818133e17b3fSDavid Sterba 	 * easily exhaust memory. Max number of clone sources is about 200K.
8182457ae726SDan Carpenter 	 */
818333e17b3fSDavid Sterba 	if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
8184f5ecec3cSDan Carpenter 		ret = -EINVAL;
8185f5ecec3cSDan Carpenter 		goto out;
8186f5ecec3cSDan Carpenter 	}
8187f5ecec3cSDan Carpenter 
8188c2c71324SStefan Behrens 	if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
81897efadbcbSDavid Sterba 		ret = -EOPNOTSUPP;
8190cb95e7bfSMark Fasheh 		goto out;
8191cb95e7bfSMark Fasheh 	}
8192cb95e7bfSMark Fasheh 
8193e780b0d1SDavid Sterba 	sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
819431db9f7cSAlexander Block 	if (!sctx) {
819531db9f7cSAlexander Block 		ret = -ENOMEM;
819631db9f7cSAlexander Block 		goto out;
819731db9f7cSAlexander Block 	}
819831db9f7cSAlexander Block 
819931db9f7cSAlexander Block 	INIT_LIST_HEAD(&sctx->new_refs);
820031db9f7cSAlexander Block 	INIT_LIST_HEAD(&sctx->deleted_refs);
820131db9f7cSAlexander Block 
8202c48545deSFilipe Manana 	btrfs_lru_cache_init(&sctx->name_cache, SEND_MAX_NAME_CACHE_SIZE);
820390b90d4aSFilipe Manana 	btrfs_lru_cache_init(&sctx->backref_cache, SEND_MAX_BACKREF_CACHE_SIZE);
8204e8a7f49dSFilipe Manana 	btrfs_lru_cache_init(&sctx->dir_created_cache,
8205e8a7f49dSFilipe Manana 			     SEND_MAX_DIR_CREATED_CACHE_SIZE);
82063e49363bSFilipe Manana 	/*
82073e49363bSFilipe Manana 	 * This cache is periodically trimmed to a fixed size elsewhere, see
82083e49363bSFilipe Manana 	 * cache_dir_utimes() and trim_dir_utimes_cache().
82093e49363bSFilipe Manana 	 */
82103e49363bSFilipe Manana 	btrfs_lru_cache_init(&sctx->dir_utimes_cache, 0);
821166d04209SFilipe Manana 
8212d307d2f3SFilipe Manana 	sctx->pending_dir_moves = RB_ROOT;
8213d307d2f3SFilipe Manana 	sctx->waiting_dir_moves = RB_ROOT;
8214d307d2f3SFilipe Manana 	sctx->orphan_dirs = RB_ROOT;
8215d307d2f3SFilipe Manana 	sctx->rbtree_new_refs = RB_ROOT;
8216d307d2f3SFilipe Manana 	sctx->rbtree_deleted_refs = RB_ROOT;
8217d307d2f3SFilipe Manana 
8218cb95e7bfSMark Fasheh 	sctx->flags = arg->flags;
8219cb95e7bfSMark Fasheh 
8220e77fbf99SDavid Sterba 	if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
8221e77fbf99SDavid Sterba 		if (arg->version > BTRFS_SEND_STREAM_VERSION) {
8222e77fbf99SDavid Sterba 			ret = -EPROTO;
8223e77fbf99SDavid Sterba 			goto out;
8224e77fbf99SDavid Sterba 		}
8225e77fbf99SDavid Sterba 		/* Zero means "use the highest version" */
8226e77fbf99SDavid Sterba 		sctx->proto = arg->version ?: BTRFS_SEND_STREAM_VERSION;
8227e77fbf99SDavid Sterba 	} else {
8228e77fbf99SDavid Sterba 		sctx->proto = 1;
8229e77fbf99SDavid Sterba 	}
8230d6815592SOmar Sandoval 	if ((arg->flags & BTRFS_SEND_FLAG_COMPRESSED) && sctx->proto < 2) {
8231d6815592SOmar Sandoval 		ret = -EINVAL;
8232d6815592SOmar Sandoval 		goto out;
8233d6815592SOmar Sandoval 	}
8234e77fbf99SDavid Sterba 
823531db9f7cSAlexander Block 	sctx->send_filp = fget(arg->send_fd);
8236da2dbbb7SJann Horn 	if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) {
8237ecc7ada7STsutomu Itoh 		ret = -EBADF;
823831db9f7cSAlexander Block 		goto out;
823931db9f7cSAlexander Block 	}
824031db9f7cSAlexander Block 
824131db9f7cSAlexander Block 	sctx->send_root = send_root;
8242521e0546SDavid Sterba 	/*
8243521e0546SDavid Sterba 	 * Unlikely but possible, if the subvolume is marked for deletion but
8244521e0546SDavid Sterba 	 * is slow to remove the directory entry, send can still be started
8245521e0546SDavid Sterba 	 */
8246521e0546SDavid Sterba 	if (btrfs_root_dead(sctx->send_root)) {
8247521e0546SDavid Sterba 		ret = -EPERM;
8248521e0546SDavid Sterba 		goto out;
8249521e0546SDavid Sterba 	}
8250521e0546SDavid Sterba 
825131db9f7cSAlexander Block 	sctx->clone_roots_cnt = arg->clone_sources_count;
825231db9f7cSAlexander Block 
8253a4b333f2SOmar Sandoval 	if (sctx->proto >= 2) {
8254a4b333f2SOmar Sandoval 		u32 send_buf_num_pages;
8255356bbbb6SOmar Sandoval 
8256875c627cSWang Yugui 		sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V2;
8257a4b333f2SOmar Sandoval 		sctx->send_buf = vmalloc(sctx->send_max_size);
8258a4b333f2SOmar Sandoval 		if (!sctx->send_buf) {
8259a4b333f2SOmar Sandoval 			ret = -ENOMEM;
8260a4b333f2SOmar Sandoval 			goto out;
8261a4b333f2SOmar Sandoval 		}
8262a4b333f2SOmar Sandoval 		send_buf_num_pages = sctx->send_max_size >> PAGE_SHIFT;
8263a4b333f2SOmar Sandoval 		sctx->send_buf_pages = kcalloc(send_buf_num_pages,
8264a4b333f2SOmar Sandoval 					       sizeof(*sctx->send_buf_pages),
8265a4b333f2SOmar Sandoval 					       GFP_KERNEL);
8266a4b333f2SOmar Sandoval 		if (!sctx->send_buf_pages) {
8267a4b333f2SOmar Sandoval 			ret = -ENOMEM;
8268a4b333f2SOmar Sandoval 			goto out;
8269a4b333f2SOmar Sandoval 		}
8270a4b333f2SOmar Sandoval 		for (i = 0; i < send_buf_num_pages; i++) {
8271a4b333f2SOmar Sandoval 			sctx->send_buf_pages[i] =
8272a4b333f2SOmar Sandoval 				vmalloc_to_page(sctx->send_buf + (i << PAGE_SHIFT));
8273a4b333f2SOmar Sandoval 		}
8274a4b333f2SOmar Sandoval 	} else {
8275a4b333f2SOmar Sandoval 		sctx->send_max_size = BTRFS_SEND_BUF_SIZE_V1;
8276752ade68SMichal Hocko 		sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
8277a4b333f2SOmar Sandoval 	}
827831db9f7cSAlexander Block 	if (!sctx->send_buf) {
827931db9f7cSAlexander Block 		ret = -ENOMEM;
828031db9f7cSAlexander Block 		goto out;
828131db9f7cSAlexander Block 	}
828231db9f7cSAlexander Block 
82830b76a4f7SDmitry Antipov 	sctx->clone_roots = kvcalloc(arg->clone_sources_count + 1,
82840b76a4f7SDmitry Antipov 				     sizeof(*sctx->clone_roots),
8285bae12df9SDenis Efremov 				     GFP_KERNEL);
828631db9f7cSAlexander Block 	if (!sctx->clone_roots) {
828731db9f7cSAlexander Block 		ret = -ENOMEM;
828831db9f7cSAlexander Block 		goto out;
828931db9f7cSAlexander Block 	}
829031db9f7cSAlexander Block 
8291bae12df9SDenis Efremov 	alloc_size = array_size(sizeof(*arg->clone_sources),
8292bae12df9SDenis Efremov 				arg->clone_sources_count);
8293e55d1153SDavid Sterba 
829431db9f7cSAlexander Block 	if (arg->clone_sources_count) {
8295752ade68SMichal Hocko 		clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
829631db9f7cSAlexander Block 		if (!clone_sources_tmp) {
829731db9f7cSAlexander Block 			ret = -ENOMEM;
829831db9f7cSAlexander Block 			goto out;
829931db9f7cSAlexander Block 		}
830031db9f7cSAlexander Block 
830131db9f7cSAlexander Block 		ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
8302e55d1153SDavid Sterba 				alloc_size);
830331db9f7cSAlexander Block 		if (ret) {
830431db9f7cSAlexander Block 			ret = -EFAULT;
830531db9f7cSAlexander Block 			goto out;
830631db9f7cSAlexander Block 		}
830731db9f7cSAlexander Block 
830831db9f7cSAlexander Block 		for (i = 0; i < arg->clone_sources_count; i++) {
830956e9357aSDavid Sterba 			clone_root = btrfs_get_fs_root(fs_info,
831056e9357aSDavid Sterba 						clone_sources_tmp[i], true);
831131db9f7cSAlexander Block 			if (IS_ERR(clone_root)) {
831231db9f7cSAlexander Block 				ret = PTR_ERR(clone_root);
831331db9f7cSAlexander Block 				goto out;
831431db9f7cSAlexander Block 			}
83152c686537SDavid Sterba 			spin_lock(&clone_root->root_item_lock);
83165cc2b17eSFilipe Manana 			if (!btrfs_root_readonly(clone_root) ||
83175cc2b17eSFilipe Manana 			    btrfs_root_dead(clone_root)) {
83182c686537SDavid Sterba 				spin_unlock(&clone_root->root_item_lock);
831900246528SJosef Bacik 				btrfs_put_root(clone_root);
83202c686537SDavid Sterba 				ret = -EPERM;
83212c686537SDavid Sterba 				goto out;
83222c686537SDavid Sterba 			}
832362d54f3aSFilipe Manana 			if (clone_root->dedupe_in_progress) {
832462d54f3aSFilipe Manana 				dedupe_in_progress_warn(clone_root);
832562d54f3aSFilipe Manana 				spin_unlock(&clone_root->root_item_lock);
832600246528SJosef Bacik 				btrfs_put_root(clone_root);
832762d54f3aSFilipe Manana 				ret = -EAGAIN;
832862d54f3aSFilipe Manana 				goto out;
832962d54f3aSFilipe Manana 			}
83302f1f465aSFilipe Manana 			clone_root->send_in_progress++;
83312c686537SDavid Sterba 			spin_unlock(&clone_root->root_item_lock);
833218f687d5SWang Shilong 
833331db9f7cSAlexander Block 			sctx->clone_roots[i].root = clone_root;
83342f1f465aSFilipe Manana 			clone_sources_to_rollback = i + 1;
833531db9f7cSAlexander Block 		}
83362f91306aSDavid Sterba 		kvfree(clone_sources_tmp);
833731db9f7cSAlexander Block 		clone_sources_tmp = NULL;
833831db9f7cSAlexander Block 	}
833931db9f7cSAlexander Block 
834031db9f7cSAlexander Block 	if (arg->parent_root) {
834156e9357aSDavid Sterba 		sctx->parent_root = btrfs_get_fs_root(fs_info, arg->parent_root,
834256e9357aSDavid Sterba 						      true);
8343b1b19596SStefan Behrens 		if (IS_ERR(sctx->parent_root)) {
8344b1b19596SStefan Behrens 			ret = PTR_ERR(sctx->parent_root);
834531db9f7cSAlexander Block 			goto out;
834631db9f7cSAlexander Block 		}
834718f687d5SWang Shilong 
83482c686537SDavid Sterba 		spin_lock(&sctx->parent_root->root_item_lock);
83492c686537SDavid Sterba 		sctx->parent_root->send_in_progress++;
8350521e0546SDavid Sterba 		if (!btrfs_root_readonly(sctx->parent_root) ||
8351521e0546SDavid Sterba 				btrfs_root_dead(sctx->parent_root)) {
83522c686537SDavid Sterba 			spin_unlock(&sctx->parent_root->root_item_lock);
83532c686537SDavid Sterba 			ret = -EPERM;
83542c686537SDavid Sterba 			goto out;
83552c686537SDavid Sterba 		}
835662d54f3aSFilipe Manana 		if (sctx->parent_root->dedupe_in_progress) {
835762d54f3aSFilipe Manana 			dedupe_in_progress_warn(sctx->parent_root);
835862d54f3aSFilipe Manana 			spin_unlock(&sctx->parent_root->root_item_lock);
835962d54f3aSFilipe Manana 			ret = -EAGAIN;
836062d54f3aSFilipe Manana 			goto out;
836162d54f3aSFilipe Manana 		}
83622c686537SDavid Sterba 		spin_unlock(&sctx->parent_root->root_item_lock);
836331db9f7cSAlexander Block 	}
836431db9f7cSAlexander Block 
836531db9f7cSAlexander Block 	/*
836631db9f7cSAlexander Block 	 * Clones from send_root are allowed, but only if the clone source
836731db9f7cSAlexander Block 	 * is behind the current send position. This is checked while searching
836831db9f7cSAlexander Block 	 * for possible clone sources.
836931db9f7cSAlexander Block 	 */
83706f9a3da5SJosef Bacik 	sctx->clone_roots[sctx->clone_roots_cnt++].root =
837100246528SJosef Bacik 		btrfs_grab_root(sctx->send_root);
837231db9f7cSAlexander Block 
837331db9f7cSAlexander Block 	/* We do a bsearch later */
837431db9f7cSAlexander Block 	sort(sctx->clone_roots, sctx->clone_roots_cnt,
837531db9f7cSAlexander Block 			sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
837631db9f7cSAlexander Block 			NULL);
8377896c14f9SWang Shilong 	sort_clone_roots = 1;
837831db9f7cSAlexander Block 
83799f89d5deSFilipe Manana 	ret = flush_delalloc_roots(sctx);
83809f89d5deSFilipe Manana 	if (ret)
83819f89d5deSFilipe Manana 		goto out;
83829f89d5deSFilipe Manana 
8383e5fa8f86SFilipe Manana 	ret = ensure_commit_roots_uptodate(sctx);
8384e5fa8f86SFilipe Manana 	if (ret)
8385e5fa8f86SFilipe Manana 		goto out;
8386e5fa8f86SFilipe Manana 
838731db9f7cSAlexander Block 	ret = send_subvol(sctx);
838831db9f7cSAlexander Block 	if (ret < 0)
838931db9f7cSAlexander Block 		goto out;
839031db9f7cSAlexander Block 
83913e49363bSFilipe Manana 	btrfs_lru_cache_for_each_entry_safe(&sctx->dir_utimes_cache, entry, tmp) {
83923e49363bSFilipe Manana 		ret = send_utimes(sctx, entry->key, entry->gen);
83933e49363bSFilipe Manana 		if (ret < 0)
83943e49363bSFilipe Manana 			goto out;
83953e49363bSFilipe Manana 		btrfs_lru_cache_remove(&sctx->dir_utimes_cache, entry);
83963e49363bSFilipe Manana 	}
83973e49363bSFilipe Manana 
8398c2c71324SStefan Behrens 	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
839931db9f7cSAlexander Block 		ret = begin_cmd(sctx, BTRFS_SEND_C_END);
840031db9f7cSAlexander Block 		if (ret < 0)
840131db9f7cSAlexander Block 			goto out;
840231db9f7cSAlexander Block 		ret = send_cmd(sctx);
840331db9f7cSAlexander Block 		if (ret < 0)
840431db9f7cSAlexander Block 			goto out;
8405c2c71324SStefan Behrens 	}
840631db9f7cSAlexander Block 
840731db9f7cSAlexander Block out:
84089f03740aSFilipe David Borba Manana 	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves));
84099f03740aSFilipe David Borba Manana 	while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) {
84109f03740aSFilipe David Borba Manana 		struct rb_node *n;
84119f03740aSFilipe David Borba Manana 		struct pending_dir_move *pm;
84129f03740aSFilipe David Borba Manana 
84139f03740aSFilipe David Borba Manana 		n = rb_first(&sctx->pending_dir_moves);
84149f03740aSFilipe David Borba Manana 		pm = rb_entry(n, struct pending_dir_move, node);
84159f03740aSFilipe David Borba Manana 		while (!list_empty(&pm->list)) {
84169f03740aSFilipe David Borba Manana 			struct pending_dir_move *pm2;
84179f03740aSFilipe David Borba Manana 
84189f03740aSFilipe David Borba Manana 			pm2 = list_first_entry(&pm->list,
84199f03740aSFilipe David Borba Manana 					       struct pending_dir_move, list);
84209f03740aSFilipe David Borba Manana 			free_pending_move(sctx, pm2);
84219f03740aSFilipe David Borba Manana 		}
84229f03740aSFilipe David Borba Manana 		free_pending_move(sctx, pm);
84239f03740aSFilipe David Borba Manana 	}
84249f03740aSFilipe David Borba Manana 
84259f03740aSFilipe David Borba Manana 	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves));
84269f03740aSFilipe David Borba Manana 	while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) {
84279f03740aSFilipe David Borba Manana 		struct rb_node *n;
84289f03740aSFilipe David Borba Manana 		struct waiting_dir_move *dm;
84299f03740aSFilipe David Borba Manana 
84309f03740aSFilipe David Borba Manana 		n = rb_first(&sctx->waiting_dir_moves);
84319f03740aSFilipe David Borba Manana 		dm = rb_entry(n, struct waiting_dir_move, node);
84329f03740aSFilipe David Borba Manana 		rb_erase(&dm->node, &sctx->waiting_dir_moves);
84339f03740aSFilipe David Borba Manana 		kfree(dm);
84349f03740aSFilipe David Borba Manana 	}
84359f03740aSFilipe David Borba Manana 
84369dc44214SFilipe Manana 	WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs));
84379dc44214SFilipe Manana 	while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) {
84389dc44214SFilipe Manana 		struct rb_node *n;
84399dc44214SFilipe Manana 		struct orphan_dir_info *odi;
84409dc44214SFilipe Manana 
84419dc44214SFilipe Manana 		n = rb_first(&sctx->orphan_dirs);
84429dc44214SFilipe Manana 		odi = rb_entry(n, struct orphan_dir_info, node);
84439dc44214SFilipe Manana 		free_orphan_dir_info(sctx, odi);
84449dc44214SFilipe Manana 	}
84459dc44214SFilipe Manana 
8446896c14f9SWang Shilong 	if (sort_clone_roots) {
84476f9a3da5SJosef Bacik 		for (i = 0; i < sctx->clone_roots_cnt; i++) {
8448896c14f9SWang Shilong 			btrfs_root_dec_send_in_progress(
8449896c14f9SWang Shilong 					sctx->clone_roots[i].root);
845000246528SJosef Bacik 			btrfs_put_root(sctx->clone_roots[i].root);
84516f9a3da5SJosef Bacik 		}
8452896c14f9SWang Shilong 	} else {
84536f9a3da5SJosef Bacik 		for (i = 0; sctx && i < clone_sources_to_rollback; i++) {
8454896c14f9SWang Shilong 			btrfs_root_dec_send_in_progress(
8455896c14f9SWang Shilong 					sctx->clone_roots[i].root);
845600246528SJosef Bacik 			btrfs_put_root(sctx->clone_roots[i].root);
84576f9a3da5SJosef Bacik 		}
8458896c14f9SWang Shilong 
8459896c14f9SWang Shilong 		btrfs_root_dec_send_in_progress(send_root);
8460896c14f9SWang Shilong 	}
84616f9a3da5SJosef Bacik 	if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) {
846266ef7d65SDavid Sterba 		btrfs_root_dec_send_in_progress(sctx->parent_root);
846300246528SJosef Bacik 		btrfs_put_root(sctx->parent_root);
84646f9a3da5SJosef Bacik 	}
84652c686537SDavid Sterba 
84662f91306aSDavid Sterba 	kvfree(clone_sources_tmp);
846731db9f7cSAlexander Block 
846831db9f7cSAlexander Block 	if (sctx) {
846931db9f7cSAlexander Block 		if (sctx->send_filp)
847031db9f7cSAlexander Block 			fput(sctx->send_filp);
847131db9f7cSAlexander Block 
8472c03d01f3SDavid Sterba 		kvfree(sctx->clone_roots);
8473a4b333f2SOmar Sandoval 		kfree(sctx->send_buf_pages);
84746ff48ce0SDavid Sterba 		kvfree(sctx->send_buf);
847538622010SBoris Burkov 		kvfree(sctx->verity_descriptor);
847631db9f7cSAlexander Block 
8477152555b3SFilipe Manana 		close_current_inode(sctx);
8478521b6803SFilipe Manana 
8479c48545deSFilipe Manana 		btrfs_lru_cache_clear(&sctx->name_cache);
848090b90d4aSFilipe Manana 		btrfs_lru_cache_clear(&sctx->backref_cache);
8481e8a7f49dSFilipe Manana 		btrfs_lru_cache_clear(&sctx->dir_created_cache);
84823e49363bSFilipe Manana 		btrfs_lru_cache_clear(&sctx->dir_utimes_cache);
848366d04209SFilipe Manana 
848431db9f7cSAlexander Block 		kfree(sctx);
848531db9f7cSAlexander Block 	}
848631db9f7cSAlexander Block 
848731db9f7cSAlexander Block 	return ret;
848831db9f7cSAlexander Block }
8489