xref: /openbmc/linux/fs/btrfs/send.c (revision 77d84ff8)
1 /*
2  * Copyright (C) 2012 Alexander Block.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/bsearch.h>
20 #include <linux/fs.h>
21 #include <linux/file.h>
22 #include <linux/sort.h>
23 #include <linux/mount.h>
24 #include <linux/xattr.h>
25 #include <linux/posix_acl_xattr.h>
26 #include <linux/radix-tree.h>
27 #include <linux/crc32c.h>
28 #include <linux/vmalloc.h>
29 #include <linux/string.h>
30 
31 #include "send.h"
32 #include "backref.h"
33 #include "locking.h"
34 #include "disk-io.h"
35 #include "btrfs_inode.h"
36 #include "transaction.h"
37 
38 static int g_verbose = 0;
39 
40 #define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
41 
42 /*
43  * A fs_path is a helper to dynamically build path names with unknown size.
44  * It reallocates the internal buffer on demand.
45  * It allows fast adding of path elements on the right side (normal path) and
46  * fast adding to the left side (reversed path). A reversed path can also be
47  * unreversed if needed.
48  */
49 struct fs_path {
50 	union {
51 		struct {
52 			char *start;
53 			char *end;
54 			char *prepared;
55 
56 			char *buf;
57 			int buf_len;
58 			unsigned int reversed:1;
59 			unsigned int virtual_mem:1;
60 			char inline_buf[];
61 		};
62 		char pad[PAGE_SIZE];
63 	};
64 };
65 #define FS_PATH_INLINE_SIZE \
66 	(sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
67 
68 
69 /* reused for each extent */
70 struct clone_root {
71 	struct btrfs_root *root;
72 	u64 ino;
73 	u64 offset;
74 
75 	u64 found_refs;
76 };
77 
78 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128
79 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
80 
81 struct send_ctx {
82 	struct file *send_filp;
83 	loff_t send_off;
84 	char *send_buf;
85 	u32 send_size;
86 	u32 send_max_size;
87 	u64 total_send_size;
88 	u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
89 	u64 flags;	/* 'flags' member of btrfs_ioctl_send_args is u64 */
90 
91 	struct vfsmount *mnt;
92 
93 	struct btrfs_root *send_root;
94 	struct btrfs_root *parent_root;
95 	struct clone_root *clone_roots;
96 	int clone_roots_cnt;
97 
98 	/* current state of the compare_tree call */
99 	struct btrfs_path *left_path;
100 	struct btrfs_path *right_path;
101 	struct btrfs_key *cmp_key;
102 
103 	/*
104 	 * infos of the currently processed inode. In case of deleted inodes,
105 	 * these are the values from the deleted inode.
106 	 */
107 	u64 cur_ino;
108 	u64 cur_inode_gen;
109 	int cur_inode_new;
110 	int cur_inode_new_gen;
111 	int cur_inode_deleted;
112 	u64 cur_inode_size;
113 	u64 cur_inode_mode;
114 
115 	u64 send_progress;
116 
117 	struct list_head new_refs;
118 	struct list_head deleted_refs;
119 
120 	struct radix_tree_root name_cache;
121 	struct list_head name_cache_list;
122 	int name_cache_size;
123 
124 	char *read_buf;
125 };
126 
127 struct name_cache_entry {
128 	struct list_head list;
129 	/*
130 	 * radix_tree has only 32bit entries but we need to handle 64bit inums.
131 	 * We use the lower 32bit of the 64bit inum to store it in the tree. If
132 	 * more then one inum would fall into the same entry, we use radix_list
133 	 * to store the additional entries. radix_list is also used to store
134 	 * entries where two entries have the same inum but different
135 	 * generations.
136 	 */
137 	struct list_head radix_list;
138 	u64 ino;
139 	u64 gen;
140 	u64 parent_ino;
141 	u64 parent_gen;
142 	int ret;
143 	int need_later_update;
144 	int name_len;
145 	char name[];
146 };
147 
148 static void fs_path_reset(struct fs_path *p)
149 {
150 	if (p->reversed) {
151 		p->start = p->buf + p->buf_len - 1;
152 		p->end = p->start;
153 		*p->start = 0;
154 	} else {
155 		p->start = p->buf;
156 		p->end = p->start;
157 		*p->start = 0;
158 	}
159 }
160 
161 static struct fs_path *fs_path_alloc(void)
162 {
163 	struct fs_path *p;
164 
165 	p = kmalloc(sizeof(*p), GFP_NOFS);
166 	if (!p)
167 		return NULL;
168 	p->reversed = 0;
169 	p->virtual_mem = 0;
170 	p->buf = p->inline_buf;
171 	p->buf_len = FS_PATH_INLINE_SIZE;
172 	fs_path_reset(p);
173 	return p;
174 }
175 
176 static struct fs_path *fs_path_alloc_reversed(void)
177 {
178 	struct fs_path *p;
179 
180 	p = fs_path_alloc();
181 	if (!p)
182 		return NULL;
183 	p->reversed = 1;
184 	fs_path_reset(p);
185 	return p;
186 }
187 
188 static void fs_path_free(struct fs_path *p)
189 {
190 	if (!p)
191 		return;
192 	if (p->buf != p->inline_buf) {
193 		if (p->virtual_mem)
194 			vfree(p->buf);
195 		else
196 			kfree(p->buf);
197 	}
198 	kfree(p);
199 }
200 
201 static int fs_path_len(struct fs_path *p)
202 {
203 	return p->end - p->start;
204 }
205 
206 static int fs_path_ensure_buf(struct fs_path *p, int len)
207 {
208 	char *tmp_buf;
209 	int path_len;
210 	int old_buf_len;
211 
212 	len++;
213 
214 	if (p->buf_len >= len)
215 		return 0;
216 
217 	path_len = p->end - p->start;
218 	old_buf_len = p->buf_len;
219 	len = PAGE_ALIGN(len);
220 
221 	if (p->buf == p->inline_buf) {
222 		tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN);
223 		if (!tmp_buf) {
224 			tmp_buf = vmalloc(len);
225 			if (!tmp_buf)
226 				return -ENOMEM;
227 			p->virtual_mem = 1;
228 		}
229 		memcpy(tmp_buf, p->buf, p->buf_len);
230 		p->buf = tmp_buf;
231 		p->buf_len = len;
232 	} else {
233 		if (p->virtual_mem) {
234 			tmp_buf = vmalloc(len);
235 			if (!tmp_buf)
236 				return -ENOMEM;
237 			memcpy(tmp_buf, p->buf, p->buf_len);
238 			vfree(p->buf);
239 		} else {
240 			tmp_buf = krealloc(p->buf, len, GFP_NOFS);
241 			if (!tmp_buf) {
242 				tmp_buf = vmalloc(len);
243 				if (!tmp_buf)
244 					return -ENOMEM;
245 				memcpy(tmp_buf, p->buf, p->buf_len);
246 				kfree(p->buf);
247 				p->virtual_mem = 1;
248 			}
249 		}
250 		p->buf = tmp_buf;
251 		p->buf_len = len;
252 	}
253 	if (p->reversed) {
254 		tmp_buf = p->buf + old_buf_len - path_len - 1;
255 		p->end = p->buf + p->buf_len - 1;
256 		p->start = p->end - path_len;
257 		memmove(p->start, tmp_buf, path_len + 1);
258 	} else {
259 		p->start = p->buf;
260 		p->end = p->start + path_len;
261 	}
262 	return 0;
263 }
264 
265 static int fs_path_prepare_for_add(struct fs_path *p, int name_len)
266 {
267 	int ret;
268 	int new_len;
269 
270 	new_len = p->end - p->start + name_len;
271 	if (p->start != p->end)
272 		new_len++;
273 	ret = fs_path_ensure_buf(p, new_len);
274 	if (ret < 0)
275 		goto out;
276 
277 	if (p->reversed) {
278 		if (p->start != p->end)
279 			*--p->start = '/';
280 		p->start -= name_len;
281 		p->prepared = p->start;
282 	} else {
283 		if (p->start != p->end)
284 			*p->end++ = '/';
285 		p->prepared = p->end;
286 		p->end += name_len;
287 		*p->end = 0;
288 	}
289 
290 out:
291 	return ret;
292 }
293 
294 static int fs_path_add(struct fs_path *p, const char *name, int name_len)
295 {
296 	int ret;
297 
298 	ret = fs_path_prepare_for_add(p, name_len);
299 	if (ret < 0)
300 		goto out;
301 	memcpy(p->prepared, name, name_len);
302 	p->prepared = NULL;
303 
304 out:
305 	return ret;
306 }
307 
308 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
309 {
310 	int ret;
311 
312 	ret = fs_path_prepare_for_add(p, p2->end - p2->start);
313 	if (ret < 0)
314 		goto out;
315 	memcpy(p->prepared, p2->start, p2->end - p2->start);
316 	p->prepared = NULL;
317 
318 out:
319 	return ret;
320 }
321 
322 static int fs_path_add_from_extent_buffer(struct fs_path *p,
323 					  struct extent_buffer *eb,
324 					  unsigned long off, int len)
325 {
326 	int ret;
327 
328 	ret = fs_path_prepare_for_add(p, len);
329 	if (ret < 0)
330 		goto out;
331 
332 	read_extent_buffer(eb, p->prepared, off, len);
333 	p->prepared = NULL;
334 
335 out:
336 	return ret;
337 }
338 
339 #if 0
340 static void fs_path_remove(struct fs_path *p)
341 {
342 	BUG_ON(p->reversed);
343 	while (p->start != p->end && *p->end != '/')
344 		p->end--;
345 	*p->end = 0;
346 }
347 #endif
348 
349 static int fs_path_copy(struct fs_path *p, struct fs_path *from)
350 {
351 	int ret;
352 
353 	p->reversed = from->reversed;
354 	fs_path_reset(p);
355 
356 	ret = fs_path_add_path(p, from);
357 
358 	return ret;
359 }
360 
361 
362 static void fs_path_unreverse(struct fs_path *p)
363 {
364 	char *tmp;
365 	int len;
366 
367 	if (!p->reversed)
368 		return;
369 
370 	tmp = p->start;
371 	len = p->end - p->start;
372 	p->start = p->buf;
373 	p->end = p->start + len;
374 	memmove(p->start, tmp, len + 1);
375 	p->reversed = 0;
376 }
377 
378 static struct btrfs_path *alloc_path_for_send(void)
379 {
380 	struct btrfs_path *path;
381 
382 	path = btrfs_alloc_path();
383 	if (!path)
384 		return NULL;
385 	path->search_commit_root = 1;
386 	path->skip_locking = 1;
387 	return path;
388 }
389 
390 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
391 {
392 	int ret;
393 	mm_segment_t old_fs;
394 	u32 pos = 0;
395 
396 	old_fs = get_fs();
397 	set_fs(KERNEL_DS);
398 
399 	while (pos < len) {
400 		ret = vfs_write(filp, (char *)buf + pos, len - pos, off);
401 		/* TODO handle that correctly */
402 		/*if (ret == -ERESTARTSYS) {
403 			continue;
404 		}*/
405 		if (ret < 0)
406 			goto out;
407 		if (ret == 0) {
408 			ret = -EIO;
409 			goto out;
410 		}
411 		pos += ret;
412 	}
413 
414 	ret = 0;
415 
416 out:
417 	set_fs(old_fs);
418 	return ret;
419 }
420 
421 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
422 {
423 	struct btrfs_tlv_header *hdr;
424 	int total_len = sizeof(*hdr) + len;
425 	int left = sctx->send_max_size - sctx->send_size;
426 
427 	if (unlikely(left < total_len))
428 		return -EOVERFLOW;
429 
430 	hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
431 	hdr->tlv_type = cpu_to_le16(attr);
432 	hdr->tlv_len = cpu_to_le16(len);
433 	memcpy(hdr + 1, data, len);
434 	sctx->send_size += total_len;
435 
436 	return 0;
437 }
438 
439 #if 0
440 static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value)
441 {
442 	return tlv_put(sctx, attr, &value, sizeof(value));
443 }
444 
445 static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value)
446 {
447 	__le16 tmp = cpu_to_le16(value);
448 	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
449 }
450 
451 static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
452 {
453 	__le32 tmp = cpu_to_le32(value);
454 	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
455 }
456 #endif
457 
458 static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value)
459 {
460 	__le64 tmp = cpu_to_le64(value);
461 	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
462 }
463 
464 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
465 			  const char *str, int len)
466 {
467 	if (len == -1)
468 		len = strlen(str);
469 	return tlv_put(sctx, attr, str, len);
470 }
471 
472 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
473 			const u8 *uuid)
474 {
475 	return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
476 }
477 
478 #if 0
479 static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
480 			    struct timespec *ts)
481 {
482 	struct btrfs_timespec bts;
483 	bts.sec = cpu_to_le64(ts->tv_sec);
484 	bts.nsec = cpu_to_le32(ts->tv_nsec);
485 	return tlv_put(sctx, attr, &bts, sizeof(bts));
486 }
487 #endif
488 
489 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
490 				  struct extent_buffer *eb,
491 				  struct btrfs_timespec *ts)
492 {
493 	struct btrfs_timespec bts;
494 	read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
495 	return tlv_put(sctx, attr, &bts, sizeof(bts));
496 }
497 
498 
499 #define TLV_PUT(sctx, attrtype, attrlen, data) \
500 	do { \
501 		ret = tlv_put(sctx, attrtype, attrlen, data); \
502 		if (ret < 0) \
503 			goto tlv_put_failure; \
504 	} while (0)
505 
506 #define TLV_PUT_INT(sctx, attrtype, bits, value) \
507 	do { \
508 		ret = tlv_put_u##bits(sctx, attrtype, value); \
509 		if (ret < 0) \
510 			goto tlv_put_failure; \
511 	} while (0)
512 
513 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
514 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
515 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
516 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
517 #define TLV_PUT_STRING(sctx, attrtype, str, len) \
518 	do { \
519 		ret = tlv_put_string(sctx, attrtype, str, len); \
520 		if (ret < 0) \
521 			goto tlv_put_failure; \
522 	} while (0)
523 #define TLV_PUT_PATH(sctx, attrtype, p) \
524 	do { \
525 		ret = tlv_put_string(sctx, attrtype, p->start, \
526 			p->end - p->start); \
527 		if (ret < 0) \
528 			goto tlv_put_failure; \
529 	} while(0)
530 #define TLV_PUT_UUID(sctx, attrtype, uuid) \
531 	do { \
532 		ret = tlv_put_uuid(sctx, attrtype, uuid); \
533 		if (ret < 0) \
534 			goto tlv_put_failure; \
535 	} while (0)
536 #define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
537 	do { \
538 		ret = tlv_put_timespec(sctx, attrtype, ts); \
539 		if (ret < 0) \
540 			goto tlv_put_failure; \
541 	} while (0)
542 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
543 	do { \
544 		ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
545 		if (ret < 0) \
546 			goto tlv_put_failure; \
547 	} while (0)
548 
549 static int send_header(struct send_ctx *sctx)
550 {
551 	struct btrfs_stream_header hdr;
552 
553 	strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
554 	hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
555 
556 	return write_buf(sctx->send_filp, &hdr, sizeof(hdr),
557 					&sctx->send_off);
558 }
559 
560 /*
561  * For each command/item we want to send to userspace, we call this function.
562  */
563 static int begin_cmd(struct send_ctx *sctx, int cmd)
564 {
565 	struct btrfs_cmd_header *hdr;
566 
567 	if (WARN_ON(!sctx->send_buf))
568 		return -EINVAL;
569 
570 	BUG_ON(sctx->send_size);
571 
572 	sctx->send_size += sizeof(*hdr);
573 	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
574 	hdr->cmd = cpu_to_le16(cmd);
575 
576 	return 0;
577 }
578 
579 static int send_cmd(struct send_ctx *sctx)
580 {
581 	int ret;
582 	struct btrfs_cmd_header *hdr;
583 	u32 crc;
584 
585 	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
586 	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
587 	hdr->crc = 0;
588 
589 	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
590 	hdr->crc = cpu_to_le32(crc);
591 
592 	ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
593 					&sctx->send_off);
594 
595 	sctx->total_send_size += sctx->send_size;
596 	sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
597 	sctx->send_size = 0;
598 
599 	return ret;
600 }
601 
602 /*
603  * Sends a move instruction to user space
604  */
605 static int send_rename(struct send_ctx *sctx,
606 		     struct fs_path *from, struct fs_path *to)
607 {
608 	int ret;
609 
610 verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
611 
612 	ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
613 	if (ret < 0)
614 		goto out;
615 
616 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
617 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
618 
619 	ret = send_cmd(sctx);
620 
621 tlv_put_failure:
622 out:
623 	return ret;
624 }
625 
626 /*
627  * Sends a link instruction to user space
628  */
629 static int send_link(struct send_ctx *sctx,
630 		     struct fs_path *path, struct fs_path *lnk)
631 {
632 	int ret;
633 
634 verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
635 
636 	ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
637 	if (ret < 0)
638 		goto out;
639 
640 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
641 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
642 
643 	ret = send_cmd(sctx);
644 
645 tlv_put_failure:
646 out:
647 	return ret;
648 }
649 
650 /*
651  * Sends an unlink instruction to user space
652  */
653 static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
654 {
655 	int ret;
656 
657 verbose_printk("btrfs: send_unlink %s\n", path->start);
658 
659 	ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
660 	if (ret < 0)
661 		goto out;
662 
663 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
664 
665 	ret = send_cmd(sctx);
666 
667 tlv_put_failure:
668 out:
669 	return ret;
670 }
671 
672 /*
673  * Sends a rmdir instruction to user space
674  */
675 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
676 {
677 	int ret;
678 
679 verbose_printk("btrfs: send_rmdir %s\n", path->start);
680 
681 	ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
682 	if (ret < 0)
683 		goto out;
684 
685 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
686 
687 	ret = send_cmd(sctx);
688 
689 tlv_put_failure:
690 out:
691 	return ret;
692 }
693 
694 /*
695  * Helper function to retrieve some fields from an inode item.
696  */
697 static int get_inode_info(struct btrfs_root *root,
698 			  u64 ino, u64 *size, u64 *gen,
699 			  u64 *mode, u64 *uid, u64 *gid,
700 			  u64 *rdev)
701 {
702 	int ret;
703 	struct btrfs_inode_item *ii;
704 	struct btrfs_key key;
705 	struct btrfs_path *path;
706 
707 	path = alloc_path_for_send();
708 	if (!path)
709 		return -ENOMEM;
710 
711 	key.objectid = ino;
712 	key.type = BTRFS_INODE_ITEM_KEY;
713 	key.offset = 0;
714 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
715 	if (ret < 0)
716 		goto out;
717 	if (ret) {
718 		ret = -ENOENT;
719 		goto out;
720 	}
721 
722 	ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
723 			struct btrfs_inode_item);
724 	if (size)
725 		*size = btrfs_inode_size(path->nodes[0], ii);
726 	if (gen)
727 		*gen = btrfs_inode_generation(path->nodes[0], ii);
728 	if (mode)
729 		*mode = btrfs_inode_mode(path->nodes[0], ii);
730 	if (uid)
731 		*uid = btrfs_inode_uid(path->nodes[0], ii);
732 	if (gid)
733 		*gid = btrfs_inode_gid(path->nodes[0], ii);
734 	if (rdev)
735 		*rdev = btrfs_inode_rdev(path->nodes[0], ii);
736 
737 out:
738 	btrfs_free_path(path);
739 	return ret;
740 }
741 
742 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
743 				   struct fs_path *p,
744 				   void *ctx);
745 
746 /*
747  * Helper function to iterate the entries in ONE btrfs_inode_ref or
748  * btrfs_inode_extref.
749  * The iterate callback may return a non zero value to stop iteration. This can
750  * be a negative value for error codes or 1 to simply stop it.
751  *
752  * path must point to the INODE_REF or INODE_EXTREF when called.
753  */
754 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
755 			     struct btrfs_key *found_key, int resolve,
756 			     iterate_inode_ref_t iterate, void *ctx)
757 {
758 	struct extent_buffer *eb = path->nodes[0];
759 	struct btrfs_item *item;
760 	struct btrfs_inode_ref *iref;
761 	struct btrfs_inode_extref *extref;
762 	struct btrfs_path *tmp_path;
763 	struct fs_path *p;
764 	u32 cur = 0;
765 	u32 total;
766 	int slot = path->slots[0];
767 	u32 name_len;
768 	char *start;
769 	int ret = 0;
770 	int num = 0;
771 	int index;
772 	u64 dir;
773 	unsigned long name_off;
774 	unsigned long elem_size;
775 	unsigned long ptr;
776 
777 	p = fs_path_alloc_reversed();
778 	if (!p)
779 		return -ENOMEM;
780 
781 	tmp_path = alloc_path_for_send();
782 	if (!tmp_path) {
783 		fs_path_free(p);
784 		return -ENOMEM;
785 	}
786 
787 
788 	if (found_key->type == BTRFS_INODE_REF_KEY) {
789 		ptr = (unsigned long)btrfs_item_ptr(eb, slot,
790 						    struct btrfs_inode_ref);
791 		item = btrfs_item_nr(slot);
792 		total = btrfs_item_size(eb, item);
793 		elem_size = sizeof(*iref);
794 	} else {
795 		ptr = btrfs_item_ptr_offset(eb, slot);
796 		total = btrfs_item_size_nr(eb, slot);
797 		elem_size = sizeof(*extref);
798 	}
799 
800 	while (cur < total) {
801 		fs_path_reset(p);
802 
803 		if (found_key->type == BTRFS_INODE_REF_KEY) {
804 			iref = (struct btrfs_inode_ref *)(ptr + cur);
805 			name_len = btrfs_inode_ref_name_len(eb, iref);
806 			name_off = (unsigned long)(iref + 1);
807 			index = btrfs_inode_ref_index(eb, iref);
808 			dir = found_key->offset;
809 		} else {
810 			extref = (struct btrfs_inode_extref *)(ptr + cur);
811 			name_len = btrfs_inode_extref_name_len(eb, extref);
812 			name_off = (unsigned long)&extref->name;
813 			index = btrfs_inode_extref_index(eb, extref);
814 			dir = btrfs_inode_extref_parent(eb, extref);
815 		}
816 
817 		if (resolve) {
818 			start = btrfs_ref_to_path(root, tmp_path, name_len,
819 						  name_off, eb, dir,
820 						  p->buf, p->buf_len);
821 			if (IS_ERR(start)) {
822 				ret = PTR_ERR(start);
823 				goto out;
824 			}
825 			if (start < p->buf) {
826 				/* overflow , try again with larger buffer */
827 				ret = fs_path_ensure_buf(p,
828 						p->buf_len + p->buf - start);
829 				if (ret < 0)
830 					goto out;
831 				start = btrfs_ref_to_path(root, tmp_path,
832 							  name_len, name_off,
833 							  eb, dir,
834 							  p->buf, p->buf_len);
835 				if (IS_ERR(start)) {
836 					ret = PTR_ERR(start);
837 					goto out;
838 				}
839 				BUG_ON(start < p->buf);
840 			}
841 			p->start = start;
842 		} else {
843 			ret = fs_path_add_from_extent_buffer(p, eb, name_off,
844 							     name_len);
845 			if (ret < 0)
846 				goto out;
847 		}
848 
849 		cur += elem_size + name_len;
850 		ret = iterate(num, dir, index, p, ctx);
851 		if (ret)
852 			goto out;
853 		num++;
854 	}
855 
856 out:
857 	btrfs_free_path(tmp_path);
858 	fs_path_free(p);
859 	return ret;
860 }
861 
862 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
863 				  const char *name, int name_len,
864 				  const char *data, int data_len,
865 				  u8 type, void *ctx);
866 
867 /*
868  * Helper function to iterate the entries in ONE btrfs_dir_item.
869  * The iterate callback may return a non zero value to stop iteration. This can
870  * be a negative value for error codes or 1 to simply stop it.
871  *
872  * path must point to the dir item when called.
873  */
874 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
875 			    struct btrfs_key *found_key,
876 			    iterate_dir_item_t iterate, void *ctx)
877 {
878 	int ret = 0;
879 	struct extent_buffer *eb;
880 	struct btrfs_item *item;
881 	struct btrfs_dir_item *di;
882 	struct btrfs_key di_key;
883 	char *buf = NULL;
884 	char *buf2 = NULL;
885 	int buf_len;
886 	int buf_virtual = 0;
887 	u32 name_len;
888 	u32 data_len;
889 	u32 cur;
890 	u32 len;
891 	u32 total;
892 	int slot;
893 	int num;
894 	u8 type;
895 
896 	buf_len = PAGE_SIZE;
897 	buf = kmalloc(buf_len, GFP_NOFS);
898 	if (!buf) {
899 		ret = -ENOMEM;
900 		goto out;
901 	}
902 
903 	eb = path->nodes[0];
904 	slot = path->slots[0];
905 	item = btrfs_item_nr(slot);
906 	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
907 	cur = 0;
908 	len = 0;
909 	total = btrfs_item_size(eb, item);
910 
911 	num = 0;
912 	while (cur < total) {
913 		name_len = btrfs_dir_name_len(eb, di);
914 		data_len = btrfs_dir_data_len(eb, di);
915 		type = btrfs_dir_type(eb, di);
916 		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
917 
918 		if (name_len + data_len > buf_len) {
919 			buf_len = PAGE_ALIGN(name_len + data_len);
920 			if (buf_virtual) {
921 				buf2 = vmalloc(buf_len);
922 				if (!buf2) {
923 					ret = -ENOMEM;
924 					goto out;
925 				}
926 				vfree(buf);
927 			} else {
928 				buf2 = krealloc(buf, buf_len, GFP_NOFS);
929 				if (!buf2) {
930 					buf2 = vmalloc(buf_len);
931 					if (!buf2) {
932 						ret = -ENOMEM;
933 						goto out;
934 					}
935 					kfree(buf);
936 					buf_virtual = 1;
937 				}
938 			}
939 
940 			buf = buf2;
941 			buf2 = NULL;
942 		}
943 
944 		read_extent_buffer(eb, buf, (unsigned long)(di + 1),
945 				name_len + data_len);
946 
947 		len = sizeof(*di) + name_len + data_len;
948 		di = (struct btrfs_dir_item *)((char *)di + len);
949 		cur += len;
950 
951 		ret = iterate(num, &di_key, buf, name_len, buf + name_len,
952 				data_len, type, ctx);
953 		if (ret < 0)
954 			goto out;
955 		if (ret) {
956 			ret = 0;
957 			goto out;
958 		}
959 
960 		num++;
961 	}
962 
963 out:
964 	if (buf_virtual)
965 		vfree(buf);
966 	else
967 		kfree(buf);
968 	return ret;
969 }
970 
971 static int __copy_first_ref(int num, u64 dir, int index,
972 			    struct fs_path *p, void *ctx)
973 {
974 	int ret;
975 	struct fs_path *pt = ctx;
976 
977 	ret = fs_path_copy(pt, p);
978 	if (ret < 0)
979 		return ret;
980 
981 	/* we want the first only */
982 	return 1;
983 }
984 
985 /*
986  * Retrieve the first path of an inode. If an inode has more then one
987  * ref/hardlink, this is ignored.
988  */
989 static int get_inode_path(struct btrfs_root *root,
990 			  u64 ino, struct fs_path *path)
991 {
992 	int ret;
993 	struct btrfs_key key, found_key;
994 	struct btrfs_path *p;
995 
996 	p = alloc_path_for_send();
997 	if (!p)
998 		return -ENOMEM;
999 
1000 	fs_path_reset(path);
1001 
1002 	key.objectid = ino;
1003 	key.type = BTRFS_INODE_REF_KEY;
1004 	key.offset = 0;
1005 
1006 	ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
1007 	if (ret < 0)
1008 		goto out;
1009 	if (ret) {
1010 		ret = 1;
1011 		goto out;
1012 	}
1013 	btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
1014 	if (found_key.objectid != ino ||
1015 	    (found_key.type != BTRFS_INODE_REF_KEY &&
1016 	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1017 		ret = -ENOENT;
1018 		goto out;
1019 	}
1020 
1021 	ret = iterate_inode_ref(root, p, &found_key, 1,
1022 				__copy_first_ref, path);
1023 	if (ret < 0)
1024 		goto out;
1025 	ret = 0;
1026 
1027 out:
1028 	btrfs_free_path(p);
1029 	return ret;
1030 }
1031 
1032 struct backref_ctx {
1033 	struct send_ctx *sctx;
1034 
1035 	/* number of total found references */
1036 	u64 found;
1037 
1038 	/*
1039 	 * used for clones found in send_root. clones found behind cur_objectid
1040 	 * and cur_offset are not considered as allowed clones.
1041 	 */
1042 	u64 cur_objectid;
1043 	u64 cur_offset;
1044 
1045 	/* may be truncated in case it's the last extent in a file */
1046 	u64 extent_len;
1047 
1048 	/* Just to check for bugs in backref resolving */
1049 	int found_itself;
1050 };
1051 
1052 static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1053 {
1054 	u64 root = (u64)(uintptr_t)key;
1055 	struct clone_root *cr = (struct clone_root *)elt;
1056 
1057 	if (root < cr->root->objectid)
1058 		return -1;
1059 	if (root > cr->root->objectid)
1060 		return 1;
1061 	return 0;
1062 }
1063 
1064 static int __clone_root_cmp_sort(const void *e1, const void *e2)
1065 {
1066 	struct clone_root *cr1 = (struct clone_root *)e1;
1067 	struct clone_root *cr2 = (struct clone_root *)e2;
1068 
1069 	if (cr1->root->objectid < cr2->root->objectid)
1070 		return -1;
1071 	if (cr1->root->objectid > cr2->root->objectid)
1072 		return 1;
1073 	return 0;
1074 }
1075 
1076 /*
1077  * Called for every backref that is found for the current extent.
1078  * Results are collected in sctx->clone_roots->ino/offset/found_refs
1079  */
1080 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1081 {
1082 	struct backref_ctx *bctx = ctx_;
1083 	struct clone_root *found;
1084 	int ret;
1085 	u64 i_size;
1086 
1087 	/* First check if the root is in the list of accepted clone sources */
1088 	found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots,
1089 			bctx->sctx->clone_roots_cnt,
1090 			sizeof(struct clone_root),
1091 			__clone_root_cmp_bsearch);
1092 	if (!found)
1093 		return 0;
1094 
1095 	if (found->root == bctx->sctx->send_root &&
1096 	    ino == bctx->cur_objectid &&
1097 	    offset == bctx->cur_offset) {
1098 		bctx->found_itself = 1;
1099 	}
1100 
1101 	/*
1102 	 * There are inodes that have extents that lie behind its i_size. Don't
1103 	 * accept clones from these extents.
1104 	 */
1105 	ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
1106 			NULL);
1107 	if (ret < 0)
1108 		return ret;
1109 
1110 	if (offset + bctx->extent_len > i_size)
1111 		return 0;
1112 
1113 	/*
1114 	 * Make sure we don't consider clones from send_root that are
1115 	 * behind the current inode/offset.
1116 	 */
1117 	if (found->root == bctx->sctx->send_root) {
1118 		/*
1119 		 * TODO for the moment we don't accept clones from the inode
1120 		 * that is currently send. We may change this when
1121 		 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1122 		 * file.
1123 		 */
1124 		if (ino >= bctx->cur_objectid)
1125 			return 0;
1126 #if 0
1127 		if (ino > bctx->cur_objectid)
1128 			return 0;
1129 		if (offset + bctx->extent_len > bctx->cur_offset)
1130 			return 0;
1131 #endif
1132 	}
1133 
1134 	bctx->found++;
1135 	found->found_refs++;
1136 	if (ino < found->ino) {
1137 		found->ino = ino;
1138 		found->offset = offset;
1139 	} else if (found->ino == ino) {
1140 		/*
1141 		 * same extent found more then once in the same file.
1142 		 */
1143 		if (found->offset > offset + bctx->extent_len)
1144 			found->offset = offset;
1145 	}
1146 
1147 	return 0;
1148 }
1149 
1150 /*
1151  * Given an inode, offset and extent item, it finds a good clone for a clone
1152  * instruction. Returns -ENOENT when none could be found. The function makes
1153  * sure that the returned clone is usable at the point where sending is at the
1154  * moment. This means, that no clones are accepted which lie behind the current
1155  * inode+offset.
1156  *
1157  * path must point to the extent item when called.
1158  */
1159 static int find_extent_clone(struct send_ctx *sctx,
1160 			     struct btrfs_path *path,
1161 			     u64 ino, u64 data_offset,
1162 			     u64 ino_size,
1163 			     struct clone_root **found)
1164 {
1165 	int ret;
1166 	int extent_type;
1167 	u64 logical;
1168 	u64 disk_byte;
1169 	u64 num_bytes;
1170 	u64 extent_item_pos;
1171 	u64 flags = 0;
1172 	struct btrfs_file_extent_item *fi;
1173 	struct extent_buffer *eb = path->nodes[0];
1174 	struct backref_ctx *backref_ctx = NULL;
1175 	struct clone_root *cur_clone_root;
1176 	struct btrfs_key found_key;
1177 	struct btrfs_path *tmp_path;
1178 	int compressed;
1179 	u32 i;
1180 
1181 	tmp_path = alloc_path_for_send();
1182 	if (!tmp_path)
1183 		return -ENOMEM;
1184 
1185 	backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
1186 	if (!backref_ctx) {
1187 		ret = -ENOMEM;
1188 		goto out;
1189 	}
1190 
1191 	if (data_offset >= ino_size) {
1192 		/*
1193 		 * There may be extents that lie behind the file's size.
1194 		 * I at least had this in combination with snapshotting while
1195 		 * writing large files.
1196 		 */
1197 		ret = 0;
1198 		goto out;
1199 	}
1200 
1201 	fi = btrfs_item_ptr(eb, path->slots[0],
1202 			struct btrfs_file_extent_item);
1203 	extent_type = btrfs_file_extent_type(eb, fi);
1204 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1205 		ret = -ENOENT;
1206 		goto out;
1207 	}
1208 	compressed = btrfs_file_extent_compression(eb, fi);
1209 
1210 	num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1211 	disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
1212 	if (disk_byte == 0) {
1213 		ret = -ENOENT;
1214 		goto out;
1215 	}
1216 	logical = disk_byte + btrfs_file_extent_offset(eb, fi);
1217 
1218 	ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
1219 				  &found_key, &flags);
1220 	btrfs_release_path(tmp_path);
1221 
1222 	if (ret < 0)
1223 		goto out;
1224 	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1225 		ret = -EIO;
1226 		goto out;
1227 	}
1228 
1229 	/*
1230 	 * Setup the clone roots.
1231 	 */
1232 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1233 		cur_clone_root = sctx->clone_roots + i;
1234 		cur_clone_root->ino = (u64)-1;
1235 		cur_clone_root->offset = 0;
1236 		cur_clone_root->found_refs = 0;
1237 	}
1238 
1239 	backref_ctx->sctx = sctx;
1240 	backref_ctx->found = 0;
1241 	backref_ctx->cur_objectid = ino;
1242 	backref_ctx->cur_offset = data_offset;
1243 	backref_ctx->found_itself = 0;
1244 	backref_ctx->extent_len = num_bytes;
1245 
1246 	/*
1247 	 * The last extent of a file may be too large due to page alignment.
1248 	 * We need to adjust extent_len in this case so that the checks in
1249 	 * __iterate_backrefs work.
1250 	 */
1251 	if (data_offset + num_bytes >= ino_size)
1252 		backref_ctx->extent_len = ino_size - data_offset;
1253 
1254 	/*
1255 	 * Now collect all backrefs.
1256 	 */
1257 	if (compressed == BTRFS_COMPRESS_NONE)
1258 		extent_item_pos = logical - found_key.objectid;
1259 	else
1260 		extent_item_pos = 0;
1261 
1262 	extent_item_pos = logical - found_key.objectid;
1263 	ret = iterate_extent_inodes(sctx->send_root->fs_info,
1264 					found_key.objectid, extent_item_pos, 1,
1265 					__iterate_backrefs, backref_ctx);
1266 
1267 	if (ret < 0)
1268 		goto out;
1269 
1270 	if (!backref_ctx->found_itself) {
1271 		/* found a bug in backref code? */
1272 		ret = -EIO;
1273 		printk(KERN_ERR "btrfs: ERROR did not find backref in "
1274 				"send_root. inode=%llu, offset=%llu, "
1275 				"disk_byte=%llu found extent=%llu\n",
1276 				ino, data_offset, disk_byte, found_key.objectid);
1277 		goto out;
1278 	}
1279 
1280 verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1281 		"ino=%llu, "
1282 		"num_bytes=%llu, logical=%llu\n",
1283 		data_offset, ino, num_bytes, logical);
1284 
1285 	if (!backref_ctx->found)
1286 		verbose_printk("btrfs:    no clones found\n");
1287 
1288 	cur_clone_root = NULL;
1289 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1290 		if (sctx->clone_roots[i].found_refs) {
1291 			if (!cur_clone_root)
1292 				cur_clone_root = sctx->clone_roots + i;
1293 			else if (sctx->clone_roots[i].root == sctx->send_root)
1294 				/* prefer clones from send_root over others */
1295 				cur_clone_root = sctx->clone_roots + i;
1296 		}
1297 
1298 	}
1299 
1300 	if (cur_clone_root) {
1301 		*found = cur_clone_root;
1302 		ret = 0;
1303 	} else {
1304 		ret = -ENOENT;
1305 	}
1306 
1307 out:
1308 	btrfs_free_path(tmp_path);
1309 	kfree(backref_ctx);
1310 	return ret;
1311 }
1312 
1313 static int read_symlink(struct btrfs_root *root,
1314 			u64 ino,
1315 			struct fs_path *dest)
1316 {
1317 	int ret;
1318 	struct btrfs_path *path;
1319 	struct btrfs_key key;
1320 	struct btrfs_file_extent_item *ei;
1321 	u8 type;
1322 	u8 compression;
1323 	unsigned long off;
1324 	int len;
1325 
1326 	path = alloc_path_for_send();
1327 	if (!path)
1328 		return -ENOMEM;
1329 
1330 	key.objectid = ino;
1331 	key.type = BTRFS_EXTENT_DATA_KEY;
1332 	key.offset = 0;
1333 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1334 	if (ret < 0)
1335 		goto out;
1336 	BUG_ON(ret);
1337 
1338 	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1339 			struct btrfs_file_extent_item);
1340 	type = btrfs_file_extent_type(path->nodes[0], ei);
1341 	compression = btrfs_file_extent_compression(path->nodes[0], ei);
1342 	BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1343 	BUG_ON(compression);
1344 
1345 	off = btrfs_file_extent_inline_start(ei);
1346 	len = btrfs_file_extent_inline_len(path->nodes[0], ei);
1347 
1348 	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1349 
1350 out:
1351 	btrfs_free_path(path);
1352 	return ret;
1353 }
1354 
1355 /*
1356  * Helper function to generate a file name that is unique in the root of
1357  * send_root and parent_root. This is used to generate names for orphan inodes.
1358  */
1359 static int gen_unique_name(struct send_ctx *sctx,
1360 			   u64 ino, u64 gen,
1361 			   struct fs_path *dest)
1362 {
1363 	int ret = 0;
1364 	struct btrfs_path *path;
1365 	struct btrfs_dir_item *di;
1366 	char tmp[64];
1367 	int len;
1368 	u64 idx = 0;
1369 
1370 	path = alloc_path_for_send();
1371 	if (!path)
1372 		return -ENOMEM;
1373 
1374 	while (1) {
1375 		len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu",
1376 				ino, gen, idx);
1377 		if (len >= sizeof(tmp)) {
1378 			/* should really not happen */
1379 			ret = -EOVERFLOW;
1380 			goto out;
1381 		}
1382 
1383 		di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1384 				path, BTRFS_FIRST_FREE_OBJECTID,
1385 				tmp, strlen(tmp), 0);
1386 		btrfs_release_path(path);
1387 		if (IS_ERR(di)) {
1388 			ret = PTR_ERR(di);
1389 			goto out;
1390 		}
1391 		if (di) {
1392 			/* not unique, try again */
1393 			idx++;
1394 			continue;
1395 		}
1396 
1397 		if (!sctx->parent_root) {
1398 			/* unique */
1399 			ret = 0;
1400 			break;
1401 		}
1402 
1403 		di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1404 				path, BTRFS_FIRST_FREE_OBJECTID,
1405 				tmp, strlen(tmp), 0);
1406 		btrfs_release_path(path);
1407 		if (IS_ERR(di)) {
1408 			ret = PTR_ERR(di);
1409 			goto out;
1410 		}
1411 		if (di) {
1412 			/* not unique, try again */
1413 			idx++;
1414 			continue;
1415 		}
1416 		/* unique */
1417 		break;
1418 	}
1419 
1420 	ret = fs_path_add(dest, tmp, strlen(tmp));
1421 
1422 out:
1423 	btrfs_free_path(path);
1424 	return ret;
1425 }
1426 
1427 enum inode_state {
1428 	inode_state_no_change,
1429 	inode_state_will_create,
1430 	inode_state_did_create,
1431 	inode_state_will_delete,
1432 	inode_state_did_delete,
1433 };
1434 
1435 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1436 {
1437 	int ret;
1438 	int left_ret;
1439 	int right_ret;
1440 	u64 left_gen;
1441 	u64 right_gen;
1442 
1443 	ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1444 			NULL, NULL);
1445 	if (ret < 0 && ret != -ENOENT)
1446 		goto out;
1447 	left_ret = ret;
1448 
1449 	if (!sctx->parent_root) {
1450 		right_ret = -ENOENT;
1451 	} else {
1452 		ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1453 				NULL, NULL, NULL, NULL);
1454 		if (ret < 0 && ret != -ENOENT)
1455 			goto out;
1456 		right_ret = ret;
1457 	}
1458 
1459 	if (!left_ret && !right_ret) {
1460 		if (left_gen == gen && right_gen == gen) {
1461 			ret = inode_state_no_change;
1462 		} else if (left_gen == gen) {
1463 			if (ino < sctx->send_progress)
1464 				ret = inode_state_did_create;
1465 			else
1466 				ret = inode_state_will_create;
1467 		} else if (right_gen == gen) {
1468 			if (ino < sctx->send_progress)
1469 				ret = inode_state_did_delete;
1470 			else
1471 				ret = inode_state_will_delete;
1472 		} else  {
1473 			ret = -ENOENT;
1474 		}
1475 	} else if (!left_ret) {
1476 		if (left_gen == gen) {
1477 			if (ino < sctx->send_progress)
1478 				ret = inode_state_did_create;
1479 			else
1480 				ret = inode_state_will_create;
1481 		} else {
1482 			ret = -ENOENT;
1483 		}
1484 	} else if (!right_ret) {
1485 		if (right_gen == gen) {
1486 			if (ino < sctx->send_progress)
1487 				ret = inode_state_did_delete;
1488 			else
1489 				ret = inode_state_will_delete;
1490 		} else {
1491 			ret = -ENOENT;
1492 		}
1493 	} else {
1494 		ret = -ENOENT;
1495 	}
1496 
1497 out:
1498 	return ret;
1499 }
1500 
1501 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1502 {
1503 	int ret;
1504 
1505 	ret = get_cur_inode_state(sctx, ino, gen);
1506 	if (ret < 0)
1507 		goto out;
1508 
1509 	if (ret == inode_state_no_change ||
1510 	    ret == inode_state_did_create ||
1511 	    ret == inode_state_will_delete)
1512 		ret = 1;
1513 	else
1514 		ret = 0;
1515 
1516 out:
1517 	return ret;
1518 }
1519 
1520 /*
1521  * Helper function to lookup a dir item in a dir.
1522  */
1523 static int lookup_dir_item_inode(struct btrfs_root *root,
1524 				 u64 dir, const char *name, int name_len,
1525 				 u64 *found_inode,
1526 				 u8 *found_type)
1527 {
1528 	int ret = 0;
1529 	struct btrfs_dir_item *di;
1530 	struct btrfs_key key;
1531 	struct btrfs_path *path;
1532 
1533 	path = alloc_path_for_send();
1534 	if (!path)
1535 		return -ENOMEM;
1536 
1537 	di = btrfs_lookup_dir_item(NULL, root, path,
1538 			dir, name, name_len, 0);
1539 	if (!di) {
1540 		ret = -ENOENT;
1541 		goto out;
1542 	}
1543 	if (IS_ERR(di)) {
1544 		ret = PTR_ERR(di);
1545 		goto out;
1546 	}
1547 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1548 	*found_inode = key.objectid;
1549 	*found_type = btrfs_dir_type(path->nodes[0], di);
1550 
1551 out:
1552 	btrfs_free_path(path);
1553 	return ret;
1554 }
1555 
1556 /*
1557  * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir,
1558  * generation of the parent dir and the name of the dir entry.
1559  */
1560 static int get_first_ref(struct btrfs_root *root, u64 ino,
1561 			 u64 *dir, u64 *dir_gen, struct fs_path *name)
1562 {
1563 	int ret;
1564 	struct btrfs_key key;
1565 	struct btrfs_key found_key;
1566 	struct btrfs_path *path;
1567 	int len;
1568 	u64 parent_dir;
1569 
1570 	path = alloc_path_for_send();
1571 	if (!path)
1572 		return -ENOMEM;
1573 
1574 	key.objectid = ino;
1575 	key.type = BTRFS_INODE_REF_KEY;
1576 	key.offset = 0;
1577 
1578 	ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1579 	if (ret < 0)
1580 		goto out;
1581 	if (!ret)
1582 		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1583 				path->slots[0]);
1584 	if (ret || found_key.objectid != ino ||
1585 	    (found_key.type != BTRFS_INODE_REF_KEY &&
1586 	     found_key.type != BTRFS_INODE_EXTREF_KEY)) {
1587 		ret = -ENOENT;
1588 		goto out;
1589 	}
1590 
1591 	if (key.type == BTRFS_INODE_REF_KEY) {
1592 		struct btrfs_inode_ref *iref;
1593 		iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1594 				      struct btrfs_inode_ref);
1595 		len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1596 		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1597 						     (unsigned long)(iref + 1),
1598 						     len);
1599 		parent_dir = found_key.offset;
1600 	} else {
1601 		struct btrfs_inode_extref *extref;
1602 		extref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1603 					struct btrfs_inode_extref);
1604 		len = btrfs_inode_extref_name_len(path->nodes[0], extref);
1605 		ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1606 					(unsigned long)&extref->name, len);
1607 		parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
1608 	}
1609 	if (ret < 0)
1610 		goto out;
1611 	btrfs_release_path(path);
1612 
1613 	ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL,
1614 			NULL, NULL);
1615 	if (ret < 0)
1616 		goto out;
1617 
1618 	*dir = parent_dir;
1619 
1620 out:
1621 	btrfs_free_path(path);
1622 	return ret;
1623 }
1624 
1625 static int is_first_ref(struct btrfs_root *root,
1626 			u64 ino, u64 dir,
1627 			const char *name, int name_len)
1628 {
1629 	int ret;
1630 	struct fs_path *tmp_name;
1631 	u64 tmp_dir;
1632 	u64 tmp_dir_gen;
1633 
1634 	tmp_name = fs_path_alloc();
1635 	if (!tmp_name)
1636 		return -ENOMEM;
1637 
1638 	ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name);
1639 	if (ret < 0)
1640 		goto out;
1641 
1642 	if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) {
1643 		ret = 0;
1644 		goto out;
1645 	}
1646 
1647 	ret = !memcmp(tmp_name->start, name, name_len);
1648 
1649 out:
1650 	fs_path_free(tmp_name);
1651 	return ret;
1652 }
1653 
1654 /*
1655  * Used by process_recorded_refs to determine if a new ref would overwrite an
1656  * already existing ref. In case it detects an overwrite, it returns the
1657  * inode/gen in who_ino/who_gen.
1658  * When an overwrite is detected, process_recorded_refs does proper orphanizing
1659  * to make sure later references to the overwritten inode are possible.
1660  * Orphanizing is however only required for the first ref of an inode.
1661  * process_recorded_refs does an additional is_first_ref check to see if
1662  * orphanizing is really required.
1663  */
1664 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1665 			      const char *name, int name_len,
1666 			      u64 *who_ino, u64 *who_gen)
1667 {
1668 	int ret = 0;
1669 	u64 gen;
1670 	u64 other_inode = 0;
1671 	u8 other_type = 0;
1672 
1673 	if (!sctx->parent_root)
1674 		goto out;
1675 
1676 	ret = is_inode_existent(sctx, dir, dir_gen);
1677 	if (ret <= 0)
1678 		goto out;
1679 
1680 	/*
1681 	 * If we have a parent root we need to verify that the parent dir was
1682 	 * not delted and then re-created, if it was then we have no overwrite
1683 	 * and we can just unlink this entry.
1684 	 */
1685 	if (sctx->parent_root) {
1686 		ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
1687 				     NULL, NULL, NULL);
1688 		if (ret < 0 && ret != -ENOENT)
1689 			goto out;
1690 		if (ret) {
1691 			ret = 0;
1692 			goto out;
1693 		}
1694 		if (gen != dir_gen)
1695 			goto out;
1696 	}
1697 
1698 	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1699 			&other_inode, &other_type);
1700 	if (ret < 0 && ret != -ENOENT)
1701 		goto out;
1702 	if (ret) {
1703 		ret = 0;
1704 		goto out;
1705 	}
1706 
1707 	/*
1708 	 * Check if the overwritten ref was already processed. If yes, the ref
1709 	 * was already unlinked/moved, so we can safely assume that we will not
1710 	 * overwrite anything at this point in time.
1711 	 */
1712 	if (other_inode > sctx->send_progress) {
1713 		ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1714 				who_gen, NULL, NULL, NULL, NULL);
1715 		if (ret < 0)
1716 			goto out;
1717 
1718 		ret = 1;
1719 		*who_ino = other_inode;
1720 	} else {
1721 		ret = 0;
1722 	}
1723 
1724 out:
1725 	return ret;
1726 }
1727 
1728 /*
1729  * Checks if the ref was overwritten by an already processed inode. This is
1730  * used by __get_cur_name_and_parent to find out if the ref was orphanized and
1731  * thus the orphan name needs be used.
1732  * process_recorded_refs also uses it to avoid unlinking of refs that were
1733  * overwritten.
1734  */
1735 static int did_overwrite_ref(struct send_ctx *sctx,
1736 			    u64 dir, u64 dir_gen,
1737 			    u64 ino, u64 ino_gen,
1738 			    const char *name, int name_len)
1739 {
1740 	int ret = 0;
1741 	u64 gen;
1742 	u64 ow_inode;
1743 	u8 other_type;
1744 
1745 	if (!sctx->parent_root)
1746 		goto out;
1747 
1748 	ret = is_inode_existent(sctx, dir, dir_gen);
1749 	if (ret <= 0)
1750 		goto out;
1751 
1752 	/* check if the ref was overwritten by another ref */
1753 	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1754 			&ow_inode, &other_type);
1755 	if (ret < 0 && ret != -ENOENT)
1756 		goto out;
1757 	if (ret) {
1758 		/* was never and will never be overwritten */
1759 		ret = 0;
1760 		goto out;
1761 	}
1762 
1763 	ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1764 			NULL, NULL);
1765 	if (ret < 0)
1766 		goto out;
1767 
1768 	if (ow_inode == ino && gen == ino_gen) {
1769 		ret = 0;
1770 		goto out;
1771 	}
1772 
1773 	/* we know that it is or will be overwritten. check this now */
1774 	if (ow_inode < sctx->send_progress)
1775 		ret = 1;
1776 	else
1777 		ret = 0;
1778 
1779 out:
1780 	return ret;
1781 }
1782 
1783 /*
1784  * Same as did_overwrite_ref, but also checks if it is the first ref of an inode
1785  * that got overwritten. This is used by process_recorded_refs to determine
1786  * if it has to use the path as returned by get_cur_path or the orphan name.
1787  */
1788 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1789 {
1790 	int ret = 0;
1791 	struct fs_path *name = NULL;
1792 	u64 dir;
1793 	u64 dir_gen;
1794 
1795 	if (!sctx->parent_root)
1796 		goto out;
1797 
1798 	name = fs_path_alloc();
1799 	if (!name)
1800 		return -ENOMEM;
1801 
1802 	ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name);
1803 	if (ret < 0)
1804 		goto out;
1805 
1806 	ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1807 			name->start, fs_path_len(name));
1808 
1809 out:
1810 	fs_path_free(name);
1811 	return ret;
1812 }
1813 
1814 /*
1815  * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
1816  * so we need to do some special handling in case we have clashes. This function
1817  * takes care of this with the help of name_cache_entry::radix_list.
1818  * In case of error, nce is kfreed.
1819  */
1820 static int name_cache_insert(struct send_ctx *sctx,
1821 			     struct name_cache_entry *nce)
1822 {
1823 	int ret = 0;
1824 	struct list_head *nce_head;
1825 
1826 	nce_head = radix_tree_lookup(&sctx->name_cache,
1827 			(unsigned long)nce->ino);
1828 	if (!nce_head) {
1829 		nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1830 		if (!nce_head) {
1831 			kfree(nce);
1832 			return -ENOMEM;
1833 		}
1834 		INIT_LIST_HEAD(nce_head);
1835 
1836 		ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
1837 		if (ret < 0) {
1838 			kfree(nce_head);
1839 			kfree(nce);
1840 			return ret;
1841 		}
1842 	}
1843 	list_add_tail(&nce->radix_list, nce_head);
1844 	list_add_tail(&nce->list, &sctx->name_cache_list);
1845 	sctx->name_cache_size++;
1846 
1847 	return ret;
1848 }
1849 
1850 static void name_cache_delete(struct send_ctx *sctx,
1851 			      struct name_cache_entry *nce)
1852 {
1853 	struct list_head *nce_head;
1854 
1855 	nce_head = radix_tree_lookup(&sctx->name_cache,
1856 			(unsigned long)nce->ino);
1857 	BUG_ON(!nce_head);
1858 
1859 	list_del(&nce->radix_list);
1860 	list_del(&nce->list);
1861 	sctx->name_cache_size--;
1862 
1863 	if (list_empty(nce_head)) {
1864 		radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
1865 		kfree(nce_head);
1866 	}
1867 }
1868 
1869 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
1870 						    u64 ino, u64 gen)
1871 {
1872 	struct list_head *nce_head;
1873 	struct name_cache_entry *cur;
1874 
1875 	nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
1876 	if (!nce_head)
1877 		return NULL;
1878 
1879 	list_for_each_entry(cur, nce_head, radix_list) {
1880 		if (cur->ino == ino && cur->gen == gen)
1881 			return cur;
1882 	}
1883 	return NULL;
1884 }
1885 
1886 /*
1887  * Removes the entry from the list and adds it back to the end. This marks the
1888  * entry as recently used so that name_cache_clean_unused does not remove it.
1889  */
1890 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
1891 {
1892 	list_del(&nce->list);
1893 	list_add_tail(&nce->list, &sctx->name_cache_list);
1894 }
1895 
1896 /*
1897  * Remove some entries from the beginning of name_cache_list.
1898  */
1899 static void name_cache_clean_unused(struct send_ctx *sctx)
1900 {
1901 	struct name_cache_entry *nce;
1902 
1903 	if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
1904 		return;
1905 
1906 	while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
1907 		nce = list_entry(sctx->name_cache_list.next,
1908 				struct name_cache_entry, list);
1909 		name_cache_delete(sctx, nce);
1910 		kfree(nce);
1911 	}
1912 }
1913 
1914 static void name_cache_free(struct send_ctx *sctx)
1915 {
1916 	struct name_cache_entry *nce;
1917 
1918 	while (!list_empty(&sctx->name_cache_list)) {
1919 		nce = list_entry(sctx->name_cache_list.next,
1920 				struct name_cache_entry, list);
1921 		name_cache_delete(sctx, nce);
1922 		kfree(nce);
1923 	}
1924 }
1925 
1926 /*
1927  * Used by get_cur_path for each ref up to the root.
1928  * Returns 0 if it succeeded.
1929  * Returns 1 if the inode is not existent or got overwritten. In that case, the
1930  * name is an orphan name. This instructs get_cur_path to stop iterating. If 1
1931  * is returned, parent_ino/parent_gen are not guaranteed to be valid.
1932  * Returns <0 in case of error.
1933  */
1934 static int __get_cur_name_and_parent(struct send_ctx *sctx,
1935 				     u64 ino, u64 gen,
1936 				     u64 *parent_ino,
1937 				     u64 *parent_gen,
1938 				     struct fs_path *dest)
1939 {
1940 	int ret;
1941 	int nce_ret;
1942 	struct btrfs_path *path = NULL;
1943 	struct name_cache_entry *nce = NULL;
1944 
1945 	/*
1946 	 * First check if we already did a call to this function with the same
1947 	 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes
1948 	 * return the cached result.
1949 	 */
1950 	nce = name_cache_search(sctx, ino, gen);
1951 	if (nce) {
1952 		if (ino < sctx->send_progress && nce->need_later_update) {
1953 			name_cache_delete(sctx, nce);
1954 			kfree(nce);
1955 			nce = NULL;
1956 		} else {
1957 			name_cache_used(sctx, nce);
1958 			*parent_ino = nce->parent_ino;
1959 			*parent_gen = nce->parent_gen;
1960 			ret = fs_path_add(dest, nce->name, nce->name_len);
1961 			if (ret < 0)
1962 				goto out;
1963 			ret = nce->ret;
1964 			goto out;
1965 		}
1966 	}
1967 
1968 	path = alloc_path_for_send();
1969 	if (!path)
1970 		return -ENOMEM;
1971 
1972 	/*
1973 	 * If the inode is not existent yet, add the orphan name and return 1.
1974 	 * This should only happen for the parent dir that we determine in
1975 	 * __record_new_ref
1976 	 */
1977 	ret = is_inode_existent(sctx, ino, gen);
1978 	if (ret < 0)
1979 		goto out;
1980 
1981 	if (!ret) {
1982 		ret = gen_unique_name(sctx, ino, gen, dest);
1983 		if (ret < 0)
1984 			goto out;
1985 		ret = 1;
1986 		goto out_cache;
1987 	}
1988 
1989 	/*
1990 	 * Depending on whether the inode was already processed or not, use
1991 	 * send_root or parent_root for ref lookup.
1992 	 */
1993 	if (ino < sctx->send_progress)
1994 		ret = get_first_ref(sctx->send_root, ino,
1995 				    parent_ino, parent_gen, dest);
1996 	else
1997 		ret = get_first_ref(sctx->parent_root, ino,
1998 				    parent_ino, parent_gen, dest);
1999 	if (ret < 0)
2000 		goto out;
2001 
2002 	/*
2003 	 * Check if the ref was overwritten by an inode's ref that was processed
2004 	 * earlier. If yes, treat as orphan and return 1.
2005 	 */
2006 	ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
2007 			dest->start, dest->end - dest->start);
2008 	if (ret < 0)
2009 		goto out;
2010 	if (ret) {
2011 		fs_path_reset(dest);
2012 		ret = gen_unique_name(sctx, ino, gen, dest);
2013 		if (ret < 0)
2014 			goto out;
2015 		ret = 1;
2016 	}
2017 
2018 out_cache:
2019 	/*
2020 	 * Store the result of the lookup in the name cache.
2021 	 */
2022 	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
2023 	if (!nce) {
2024 		ret = -ENOMEM;
2025 		goto out;
2026 	}
2027 
2028 	nce->ino = ino;
2029 	nce->gen = gen;
2030 	nce->parent_ino = *parent_ino;
2031 	nce->parent_gen = *parent_gen;
2032 	nce->name_len = fs_path_len(dest);
2033 	nce->ret = ret;
2034 	strcpy(nce->name, dest->start);
2035 
2036 	if (ino < sctx->send_progress)
2037 		nce->need_later_update = 0;
2038 	else
2039 		nce->need_later_update = 1;
2040 
2041 	nce_ret = name_cache_insert(sctx, nce);
2042 	if (nce_ret < 0)
2043 		ret = nce_ret;
2044 	name_cache_clean_unused(sctx);
2045 
2046 out:
2047 	btrfs_free_path(path);
2048 	return ret;
2049 }
2050 
2051 /*
2052  * Magic happens here. This function returns the first ref to an inode as it
2053  * would look like while receiving the stream at this point in time.
2054  * We walk the path up to the root. For every inode in between, we check if it
2055  * was already processed/sent. If yes, we continue with the parent as found
2056  * in send_root. If not, we continue with the parent as found in parent_root.
2057  * If we encounter an inode that was deleted at this point in time, we use the
2058  * inodes "orphan" name instead of the real name and stop. Same with new inodes
2059  * that were not created yet and overwritten inodes/refs.
2060  *
2061  * When do we have have orphan inodes:
2062  * 1. When an inode is freshly created and thus no valid refs are available yet
2063  * 2. When a directory lost all it's refs (deleted) but still has dir items
2064  *    inside which were not processed yet (pending for move/delete). If anyone
2065  *    tried to get the path to the dir items, it would get a path inside that
2066  *    orphan directory.
2067  * 3. When an inode is moved around or gets new links, it may overwrite the ref
2068  *    of an unprocessed inode. If in that case the first ref would be
2069  *    overwritten, the overwritten inode gets "orphanized". Later when we
2070  *    process this overwritten inode, it is restored at a new place by moving
2071  *    the orphan inode.
2072  *
2073  * sctx->send_progress tells this function at which point in time receiving
2074  * would be.
2075  */
2076 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
2077 			struct fs_path *dest)
2078 {
2079 	int ret = 0;
2080 	struct fs_path *name = NULL;
2081 	u64 parent_inode = 0;
2082 	u64 parent_gen = 0;
2083 	int stop = 0;
2084 
2085 	name = fs_path_alloc();
2086 	if (!name) {
2087 		ret = -ENOMEM;
2088 		goto out;
2089 	}
2090 
2091 	dest->reversed = 1;
2092 	fs_path_reset(dest);
2093 
2094 	while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
2095 		fs_path_reset(name);
2096 
2097 		ret = __get_cur_name_and_parent(sctx, ino, gen,
2098 				&parent_inode, &parent_gen, name);
2099 		if (ret < 0)
2100 			goto out;
2101 		if (ret)
2102 			stop = 1;
2103 
2104 		ret = fs_path_add_path(dest, name);
2105 		if (ret < 0)
2106 			goto out;
2107 
2108 		ino = parent_inode;
2109 		gen = parent_gen;
2110 	}
2111 
2112 out:
2113 	fs_path_free(name);
2114 	if (!ret)
2115 		fs_path_unreverse(dest);
2116 	return ret;
2117 }
2118 
2119 /*
2120  * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2121  */
2122 static int send_subvol_begin(struct send_ctx *sctx)
2123 {
2124 	int ret;
2125 	struct btrfs_root *send_root = sctx->send_root;
2126 	struct btrfs_root *parent_root = sctx->parent_root;
2127 	struct btrfs_path *path;
2128 	struct btrfs_key key;
2129 	struct btrfs_root_ref *ref;
2130 	struct extent_buffer *leaf;
2131 	char *name = NULL;
2132 	int namelen;
2133 
2134 	path = alloc_path_for_send();
2135 	if (!path)
2136 		return -ENOMEM;
2137 
2138 	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2139 	if (!name) {
2140 		btrfs_free_path(path);
2141 		return -ENOMEM;
2142 	}
2143 
2144 	key.objectid = send_root->objectid;
2145 	key.type = BTRFS_ROOT_BACKREF_KEY;
2146 	key.offset = 0;
2147 
2148 	ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2149 				&key, path, 1, 0);
2150 	if (ret < 0)
2151 		goto out;
2152 	if (ret) {
2153 		ret = -ENOENT;
2154 		goto out;
2155 	}
2156 
2157 	leaf = path->nodes[0];
2158 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2159 	if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2160 	    key.objectid != send_root->objectid) {
2161 		ret = -ENOENT;
2162 		goto out;
2163 	}
2164 	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2165 	namelen = btrfs_root_ref_name_len(leaf, ref);
2166 	read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2167 	btrfs_release_path(path);
2168 
2169 	if (parent_root) {
2170 		ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2171 		if (ret < 0)
2172 			goto out;
2173 	} else {
2174 		ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2175 		if (ret < 0)
2176 			goto out;
2177 	}
2178 
2179 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2180 	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2181 			sctx->send_root->root_item.uuid);
2182 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2183 			sctx->send_root->root_item.ctransid);
2184 	if (parent_root) {
2185 		TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2186 				sctx->parent_root->root_item.uuid);
2187 		TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2188 				sctx->parent_root->root_item.ctransid);
2189 	}
2190 
2191 	ret = send_cmd(sctx);
2192 
2193 tlv_put_failure:
2194 out:
2195 	btrfs_free_path(path);
2196 	kfree(name);
2197 	return ret;
2198 }
2199 
2200 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2201 {
2202 	int ret = 0;
2203 	struct fs_path *p;
2204 
2205 verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2206 
2207 	p = fs_path_alloc();
2208 	if (!p)
2209 		return -ENOMEM;
2210 
2211 	ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2212 	if (ret < 0)
2213 		goto out;
2214 
2215 	ret = get_cur_path(sctx, ino, gen, p);
2216 	if (ret < 0)
2217 		goto out;
2218 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2219 	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2220 
2221 	ret = send_cmd(sctx);
2222 
2223 tlv_put_failure:
2224 out:
2225 	fs_path_free(p);
2226 	return ret;
2227 }
2228 
2229 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2230 {
2231 	int ret = 0;
2232 	struct fs_path *p;
2233 
2234 verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2235 
2236 	p = fs_path_alloc();
2237 	if (!p)
2238 		return -ENOMEM;
2239 
2240 	ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2241 	if (ret < 0)
2242 		goto out;
2243 
2244 	ret = get_cur_path(sctx, ino, gen, p);
2245 	if (ret < 0)
2246 		goto out;
2247 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2248 	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2249 
2250 	ret = send_cmd(sctx);
2251 
2252 tlv_put_failure:
2253 out:
2254 	fs_path_free(p);
2255 	return ret;
2256 }
2257 
2258 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2259 {
2260 	int ret = 0;
2261 	struct fs_path *p;
2262 
2263 verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2264 
2265 	p = fs_path_alloc();
2266 	if (!p)
2267 		return -ENOMEM;
2268 
2269 	ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2270 	if (ret < 0)
2271 		goto out;
2272 
2273 	ret = get_cur_path(sctx, ino, gen, p);
2274 	if (ret < 0)
2275 		goto out;
2276 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2277 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2278 	TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2279 
2280 	ret = send_cmd(sctx);
2281 
2282 tlv_put_failure:
2283 out:
2284 	fs_path_free(p);
2285 	return ret;
2286 }
2287 
2288 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2289 {
2290 	int ret = 0;
2291 	struct fs_path *p = NULL;
2292 	struct btrfs_inode_item *ii;
2293 	struct btrfs_path *path = NULL;
2294 	struct extent_buffer *eb;
2295 	struct btrfs_key key;
2296 	int slot;
2297 
2298 verbose_printk("btrfs: send_utimes %llu\n", ino);
2299 
2300 	p = fs_path_alloc();
2301 	if (!p)
2302 		return -ENOMEM;
2303 
2304 	path = alloc_path_for_send();
2305 	if (!path) {
2306 		ret = -ENOMEM;
2307 		goto out;
2308 	}
2309 
2310 	key.objectid = ino;
2311 	key.type = BTRFS_INODE_ITEM_KEY;
2312 	key.offset = 0;
2313 	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2314 	if (ret < 0)
2315 		goto out;
2316 
2317 	eb = path->nodes[0];
2318 	slot = path->slots[0];
2319 	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2320 
2321 	ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2322 	if (ret < 0)
2323 		goto out;
2324 
2325 	ret = get_cur_path(sctx, ino, gen, p);
2326 	if (ret < 0)
2327 		goto out;
2328 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2329 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
2330 			btrfs_inode_atime(ii));
2331 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
2332 			btrfs_inode_mtime(ii));
2333 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2334 			btrfs_inode_ctime(ii));
2335 	/* TODO Add otime support when the otime patches get into upstream */
2336 
2337 	ret = send_cmd(sctx);
2338 
2339 tlv_put_failure:
2340 out:
2341 	fs_path_free(p);
2342 	btrfs_free_path(path);
2343 	return ret;
2344 }
2345 
2346 /*
2347  * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2348  * a valid path yet because we did not process the refs yet. So, the inode
2349  * is created as orphan.
2350  */
2351 static int send_create_inode(struct send_ctx *sctx, u64 ino)
2352 {
2353 	int ret = 0;
2354 	struct fs_path *p;
2355 	int cmd;
2356 	u64 gen;
2357 	u64 mode;
2358 	u64 rdev;
2359 
2360 verbose_printk("btrfs: send_create_inode %llu\n", ino);
2361 
2362 	p = fs_path_alloc();
2363 	if (!p)
2364 		return -ENOMEM;
2365 
2366 	ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL,
2367 			NULL, &rdev);
2368 	if (ret < 0)
2369 		goto out;
2370 
2371 	if (S_ISREG(mode)) {
2372 		cmd = BTRFS_SEND_C_MKFILE;
2373 	} else if (S_ISDIR(mode)) {
2374 		cmd = BTRFS_SEND_C_MKDIR;
2375 	} else if (S_ISLNK(mode)) {
2376 		cmd = BTRFS_SEND_C_SYMLINK;
2377 	} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
2378 		cmd = BTRFS_SEND_C_MKNOD;
2379 	} else if (S_ISFIFO(mode)) {
2380 		cmd = BTRFS_SEND_C_MKFIFO;
2381 	} else if (S_ISSOCK(mode)) {
2382 		cmd = BTRFS_SEND_C_MKSOCK;
2383 	} else {
2384 		printk(KERN_WARNING "btrfs: unexpected inode type %o",
2385 				(int)(mode & S_IFMT));
2386 		ret = -ENOTSUPP;
2387 		goto out;
2388 	}
2389 
2390 	ret = begin_cmd(sctx, cmd);
2391 	if (ret < 0)
2392 		goto out;
2393 
2394 	ret = gen_unique_name(sctx, ino, gen, p);
2395 	if (ret < 0)
2396 		goto out;
2397 
2398 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2399 	TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino);
2400 
2401 	if (S_ISLNK(mode)) {
2402 		fs_path_reset(p);
2403 		ret = read_symlink(sctx->send_root, ino, p);
2404 		if (ret < 0)
2405 			goto out;
2406 		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2407 	} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2408 		   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2409 		TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
2410 		TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
2411 	}
2412 
2413 	ret = send_cmd(sctx);
2414 	if (ret < 0)
2415 		goto out;
2416 
2417 
2418 tlv_put_failure:
2419 out:
2420 	fs_path_free(p);
2421 	return ret;
2422 }
2423 
2424 /*
2425  * We need some special handling for inodes that get processed before the parent
2426  * directory got created. See process_recorded_refs for details.
2427  * This function does the check if we already created the dir out of order.
2428  */
2429 static int did_create_dir(struct send_ctx *sctx, u64 dir)
2430 {
2431 	int ret = 0;
2432 	struct btrfs_path *path = NULL;
2433 	struct btrfs_key key;
2434 	struct btrfs_key found_key;
2435 	struct btrfs_key di_key;
2436 	struct extent_buffer *eb;
2437 	struct btrfs_dir_item *di;
2438 	int slot;
2439 
2440 	path = alloc_path_for_send();
2441 	if (!path) {
2442 		ret = -ENOMEM;
2443 		goto out;
2444 	}
2445 
2446 	key.objectid = dir;
2447 	key.type = BTRFS_DIR_INDEX_KEY;
2448 	key.offset = 0;
2449 	while (1) {
2450 		ret = btrfs_search_slot_for_read(sctx->send_root, &key, path,
2451 				1, 0);
2452 		if (ret < 0)
2453 			goto out;
2454 		if (!ret) {
2455 			eb = path->nodes[0];
2456 			slot = path->slots[0];
2457 			btrfs_item_key_to_cpu(eb, &found_key, slot);
2458 		}
2459 		if (ret || found_key.objectid != key.objectid ||
2460 		    found_key.type != key.type) {
2461 			ret = 0;
2462 			goto out;
2463 		}
2464 
2465 		di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
2466 		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
2467 
2468 		if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
2469 		    di_key.objectid < sctx->send_progress) {
2470 			ret = 1;
2471 			goto out;
2472 		}
2473 
2474 		key.offset = found_key.offset + 1;
2475 		btrfs_release_path(path);
2476 	}
2477 
2478 out:
2479 	btrfs_free_path(path);
2480 	return ret;
2481 }
2482 
2483 /*
2484  * Only creates the inode if it is:
2485  * 1. Not a directory
2486  * 2. Or a directory which was not created already due to out of order
2487  *    directories. See did_create_dir and process_recorded_refs for details.
2488  */
2489 static int send_create_inode_if_needed(struct send_ctx *sctx)
2490 {
2491 	int ret;
2492 
2493 	if (S_ISDIR(sctx->cur_inode_mode)) {
2494 		ret = did_create_dir(sctx, sctx->cur_ino);
2495 		if (ret < 0)
2496 			goto out;
2497 		if (ret) {
2498 			ret = 0;
2499 			goto out;
2500 		}
2501 	}
2502 
2503 	ret = send_create_inode(sctx, sctx->cur_ino);
2504 	if (ret < 0)
2505 		goto out;
2506 
2507 out:
2508 	return ret;
2509 }
2510 
2511 struct recorded_ref {
2512 	struct list_head list;
2513 	char *dir_path;
2514 	char *name;
2515 	struct fs_path *full_path;
2516 	u64 dir;
2517 	u64 dir_gen;
2518 	int dir_path_len;
2519 	int name_len;
2520 };
2521 
2522 /*
2523  * We need to process new refs before deleted refs, but compare_tree gives us
2524  * everything mixed. So we first record all refs and later process them.
2525  * This function is a helper to record one ref.
2526  */
2527 static int record_ref(struct list_head *head, u64 dir,
2528 		      u64 dir_gen, struct fs_path *path)
2529 {
2530 	struct recorded_ref *ref;
2531 
2532 	ref = kmalloc(sizeof(*ref), GFP_NOFS);
2533 	if (!ref)
2534 		return -ENOMEM;
2535 
2536 	ref->dir = dir;
2537 	ref->dir_gen = dir_gen;
2538 	ref->full_path = path;
2539 
2540 	ref->name = (char *)kbasename(ref->full_path->start);
2541 	ref->name_len = ref->full_path->end - ref->name;
2542 	ref->dir_path = ref->full_path->start;
2543 	if (ref->name == ref->full_path->start)
2544 		ref->dir_path_len = 0;
2545 	else
2546 		ref->dir_path_len = ref->full_path->end -
2547 				ref->full_path->start - 1 - ref->name_len;
2548 
2549 	list_add_tail(&ref->list, head);
2550 	return 0;
2551 }
2552 
2553 static int dup_ref(struct recorded_ref *ref, struct list_head *list)
2554 {
2555 	struct recorded_ref *new;
2556 
2557 	new = kmalloc(sizeof(*ref), GFP_NOFS);
2558 	if (!new)
2559 		return -ENOMEM;
2560 
2561 	new->dir = ref->dir;
2562 	new->dir_gen = ref->dir_gen;
2563 	new->full_path = NULL;
2564 	INIT_LIST_HEAD(&new->list);
2565 	list_add_tail(&new->list, list);
2566 	return 0;
2567 }
2568 
2569 static void __free_recorded_refs(struct list_head *head)
2570 {
2571 	struct recorded_ref *cur;
2572 
2573 	while (!list_empty(head)) {
2574 		cur = list_entry(head->next, struct recorded_ref, list);
2575 		fs_path_free(cur->full_path);
2576 		list_del(&cur->list);
2577 		kfree(cur);
2578 	}
2579 }
2580 
2581 static void free_recorded_refs(struct send_ctx *sctx)
2582 {
2583 	__free_recorded_refs(&sctx->new_refs);
2584 	__free_recorded_refs(&sctx->deleted_refs);
2585 }
2586 
2587 /*
2588  * Renames/moves a file/dir to its orphan name. Used when the first
2589  * ref of an unprocessed inode gets overwritten and for all non empty
2590  * directories.
2591  */
2592 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2593 			  struct fs_path *path)
2594 {
2595 	int ret;
2596 	struct fs_path *orphan;
2597 
2598 	orphan = fs_path_alloc();
2599 	if (!orphan)
2600 		return -ENOMEM;
2601 
2602 	ret = gen_unique_name(sctx, ino, gen, orphan);
2603 	if (ret < 0)
2604 		goto out;
2605 
2606 	ret = send_rename(sctx, path, orphan);
2607 
2608 out:
2609 	fs_path_free(orphan);
2610 	return ret;
2611 }
2612 
2613 /*
2614  * Returns 1 if a directory can be removed at this point in time.
2615  * We check this by iterating all dir items and checking if the inode behind
2616  * the dir item was already processed.
2617  */
2618 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress)
2619 {
2620 	int ret = 0;
2621 	struct btrfs_root *root = sctx->parent_root;
2622 	struct btrfs_path *path;
2623 	struct btrfs_key key;
2624 	struct btrfs_key found_key;
2625 	struct btrfs_key loc;
2626 	struct btrfs_dir_item *di;
2627 
2628 	/*
2629 	 * Don't try to rmdir the top/root subvolume dir.
2630 	 */
2631 	if (dir == BTRFS_FIRST_FREE_OBJECTID)
2632 		return 0;
2633 
2634 	path = alloc_path_for_send();
2635 	if (!path)
2636 		return -ENOMEM;
2637 
2638 	key.objectid = dir;
2639 	key.type = BTRFS_DIR_INDEX_KEY;
2640 	key.offset = 0;
2641 
2642 	while (1) {
2643 		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
2644 		if (ret < 0)
2645 			goto out;
2646 		if (!ret) {
2647 			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2648 					path->slots[0]);
2649 		}
2650 		if (ret || found_key.objectid != key.objectid ||
2651 		    found_key.type != key.type) {
2652 			break;
2653 		}
2654 
2655 		di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2656 				struct btrfs_dir_item);
2657 		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2658 
2659 		if (loc.objectid > send_progress) {
2660 			ret = 0;
2661 			goto out;
2662 		}
2663 
2664 		btrfs_release_path(path);
2665 		key.offset = found_key.offset + 1;
2666 	}
2667 
2668 	ret = 1;
2669 
2670 out:
2671 	btrfs_free_path(path);
2672 	return ret;
2673 }
2674 
2675 /*
2676  * This does all the move/link/unlink/rmdir magic.
2677  */
2678 static int process_recorded_refs(struct send_ctx *sctx)
2679 {
2680 	int ret = 0;
2681 	struct recorded_ref *cur;
2682 	struct recorded_ref *cur2;
2683 	struct list_head check_dirs;
2684 	struct fs_path *valid_path = NULL;
2685 	u64 ow_inode = 0;
2686 	u64 ow_gen;
2687 	int did_overwrite = 0;
2688 	int is_orphan = 0;
2689 
2690 verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2691 
2692 	/*
2693 	 * This should never happen as the root dir always has the same ref
2694 	 * which is always '..'
2695 	 */
2696 	BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
2697 	INIT_LIST_HEAD(&check_dirs);
2698 
2699 	valid_path = fs_path_alloc();
2700 	if (!valid_path) {
2701 		ret = -ENOMEM;
2702 		goto out;
2703 	}
2704 
2705 	/*
2706 	 * First, check if the first ref of the current inode was overwritten
2707 	 * before. If yes, we know that the current inode was already orphanized
2708 	 * and thus use the orphan name. If not, we can use get_cur_path to
2709 	 * get the path of the first ref as it would like while receiving at
2710 	 * this point in time.
2711 	 * New inodes are always orphan at the beginning, so force to use the
2712 	 * orphan name in this case.
2713 	 * The first ref is stored in valid_path and will be updated if it
2714 	 * gets moved around.
2715 	 */
2716 	if (!sctx->cur_inode_new) {
2717 		ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
2718 				sctx->cur_inode_gen);
2719 		if (ret < 0)
2720 			goto out;
2721 		if (ret)
2722 			did_overwrite = 1;
2723 	}
2724 	if (sctx->cur_inode_new || did_overwrite) {
2725 		ret = gen_unique_name(sctx, sctx->cur_ino,
2726 				sctx->cur_inode_gen, valid_path);
2727 		if (ret < 0)
2728 			goto out;
2729 		is_orphan = 1;
2730 	} else {
2731 		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
2732 				valid_path);
2733 		if (ret < 0)
2734 			goto out;
2735 	}
2736 
2737 	list_for_each_entry(cur, &sctx->new_refs, list) {
2738 		/*
2739 		 * We may have refs where the parent directory does not exist
2740 		 * yet. This happens if the parent directories inum is higher
2741 		 * the the current inum. To handle this case, we create the
2742 		 * parent directory out of order. But we need to check if this
2743 		 * did already happen before due to other refs in the same dir.
2744 		 */
2745 		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
2746 		if (ret < 0)
2747 			goto out;
2748 		if (ret == inode_state_will_create) {
2749 			ret = 0;
2750 			/*
2751 			 * First check if any of the current inodes refs did
2752 			 * already create the dir.
2753 			 */
2754 			list_for_each_entry(cur2, &sctx->new_refs, list) {
2755 				if (cur == cur2)
2756 					break;
2757 				if (cur2->dir == cur->dir) {
2758 					ret = 1;
2759 					break;
2760 				}
2761 			}
2762 
2763 			/*
2764 			 * If that did not happen, check if a previous inode
2765 			 * did already create the dir.
2766 			 */
2767 			if (!ret)
2768 				ret = did_create_dir(sctx, cur->dir);
2769 			if (ret < 0)
2770 				goto out;
2771 			if (!ret) {
2772 				ret = send_create_inode(sctx, cur->dir);
2773 				if (ret < 0)
2774 					goto out;
2775 			}
2776 		}
2777 
2778 		/*
2779 		 * Check if this new ref would overwrite the first ref of
2780 		 * another unprocessed inode. If yes, orphanize the
2781 		 * overwritten inode. If we find an overwritten ref that is
2782 		 * not the first ref, simply unlink it.
2783 		 */
2784 		ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2785 				cur->name, cur->name_len,
2786 				&ow_inode, &ow_gen);
2787 		if (ret < 0)
2788 			goto out;
2789 		if (ret) {
2790 			ret = is_first_ref(sctx->parent_root,
2791 					   ow_inode, cur->dir, cur->name,
2792 					   cur->name_len);
2793 			if (ret < 0)
2794 				goto out;
2795 			if (ret) {
2796 				ret = orphanize_inode(sctx, ow_inode, ow_gen,
2797 						cur->full_path);
2798 				if (ret < 0)
2799 					goto out;
2800 			} else {
2801 				ret = send_unlink(sctx, cur->full_path);
2802 				if (ret < 0)
2803 					goto out;
2804 			}
2805 		}
2806 
2807 		/*
2808 		 * link/move the ref to the new place. If we have an orphan
2809 		 * inode, move it and update valid_path. If not, link or move
2810 		 * it depending on the inode mode.
2811 		 */
2812 		if (is_orphan) {
2813 			ret = send_rename(sctx, valid_path, cur->full_path);
2814 			if (ret < 0)
2815 				goto out;
2816 			is_orphan = 0;
2817 			ret = fs_path_copy(valid_path, cur->full_path);
2818 			if (ret < 0)
2819 				goto out;
2820 		} else {
2821 			if (S_ISDIR(sctx->cur_inode_mode)) {
2822 				/*
2823 				 * Dirs can't be linked, so move it. For moved
2824 				 * dirs, we always have one new and one deleted
2825 				 * ref. The deleted ref is ignored later.
2826 				 */
2827 				ret = send_rename(sctx, valid_path,
2828 						cur->full_path);
2829 				if (ret < 0)
2830 					goto out;
2831 				ret = fs_path_copy(valid_path, cur->full_path);
2832 				if (ret < 0)
2833 					goto out;
2834 			} else {
2835 				ret = send_link(sctx, cur->full_path,
2836 						valid_path);
2837 				if (ret < 0)
2838 					goto out;
2839 			}
2840 		}
2841 		ret = dup_ref(cur, &check_dirs);
2842 		if (ret < 0)
2843 			goto out;
2844 	}
2845 
2846 	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
2847 		/*
2848 		 * Check if we can already rmdir the directory. If not,
2849 		 * orphanize it. For every dir item inside that gets deleted
2850 		 * later, we do this check again and rmdir it then if possible.
2851 		 * See the use of check_dirs for more details.
2852 		 */
2853 		ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino);
2854 		if (ret < 0)
2855 			goto out;
2856 		if (ret) {
2857 			ret = send_rmdir(sctx, valid_path);
2858 			if (ret < 0)
2859 				goto out;
2860 		} else if (!is_orphan) {
2861 			ret = orphanize_inode(sctx, sctx->cur_ino,
2862 					sctx->cur_inode_gen, valid_path);
2863 			if (ret < 0)
2864 				goto out;
2865 			is_orphan = 1;
2866 		}
2867 
2868 		list_for_each_entry(cur, &sctx->deleted_refs, list) {
2869 			ret = dup_ref(cur, &check_dirs);
2870 			if (ret < 0)
2871 				goto out;
2872 		}
2873 	} else if (S_ISDIR(sctx->cur_inode_mode) &&
2874 		   !list_empty(&sctx->deleted_refs)) {
2875 		/*
2876 		 * We have a moved dir. Add the old parent to check_dirs
2877 		 */
2878 		cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
2879 				list);
2880 		ret = dup_ref(cur, &check_dirs);
2881 		if (ret < 0)
2882 			goto out;
2883 	} else if (!S_ISDIR(sctx->cur_inode_mode)) {
2884 		/*
2885 		 * We have a non dir inode. Go through all deleted refs and
2886 		 * unlink them if they were not already overwritten by other
2887 		 * inodes.
2888 		 */
2889 		list_for_each_entry(cur, &sctx->deleted_refs, list) {
2890 			ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2891 					sctx->cur_ino, sctx->cur_inode_gen,
2892 					cur->name, cur->name_len);
2893 			if (ret < 0)
2894 				goto out;
2895 			if (!ret) {
2896 				ret = send_unlink(sctx, cur->full_path);
2897 				if (ret < 0)
2898 					goto out;
2899 			}
2900 			ret = dup_ref(cur, &check_dirs);
2901 			if (ret < 0)
2902 				goto out;
2903 		}
2904 		/*
2905 		 * If the inode is still orphan, unlink the orphan. This may
2906 		 * happen when a previous inode did overwrite the first ref
2907 		 * of this inode and no new refs were added for the current
2908 		 * inode. Unlinking does not mean that the inode is deleted in
2909 		 * all cases. There may still be links to this inode in other
2910 		 * places.
2911 		 */
2912 		if (is_orphan) {
2913 			ret = send_unlink(sctx, valid_path);
2914 			if (ret < 0)
2915 				goto out;
2916 		}
2917 	}
2918 
2919 	/*
2920 	 * We did collect all parent dirs where cur_inode was once located. We
2921 	 * now go through all these dirs and check if they are pending for
2922 	 * deletion and if it's finally possible to perform the rmdir now.
2923 	 * We also update the inode stats of the parent dirs here.
2924 	 */
2925 	list_for_each_entry(cur, &check_dirs, list) {
2926 		/*
2927 		 * In case we had refs into dirs that were not processed yet,
2928 		 * we don't need to do the utime and rmdir logic for these dirs.
2929 		 * The dir will be processed later.
2930 		 */
2931 		if (cur->dir > sctx->cur_ino)
2932 			continue;
2933 
2934 		ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
2935 		if (ret < 0)
2936 			goto out;
2937 
2938 		if (ret == inode_state_did_create ||
2939 		    ret == inode_state_no_change) {
2940 			/* TODO delayed utimes */
2941 			ret = send_utimes(sctx, cur->dir, cur->dir_gen);
2942 			if (ret < 0)
2943 				goto out;
2944 		} else if (ret == inode_state_did_delete) {
2945 			ret = can_rmdir(sctx, cur->dir, sctx->cur_ino);
2946 			if (ret < 0)
2947 				goto out;
2948 			if (ret) {
2949 				ret = get_cur_path(sctx, cur->dir,
2950 						   cur->dir_gen, valid_path);
2951 				if (ret < 0)
2952 					goto out;
2953 				ret = send_rmdir(sctx, valid_path);
2954 				if (ret < 0)
2955 					goto out;
2956 			}
2957 		}
2958 	}
2959 
2960 	ret = 0;
2961 
2962 out:
2963 	__free_recorded_refs(&check_dirs);
2964 	free_recorded_refs(sctx);
2965 	fs_path_free(valid_path);
2966 	return ret;
2967 }
2968 
2969 static int __record_new_ref(int num, u64 dir, int index,
2970 			    struct fs_path *name,
2971 			    void *ctx)
2972 {
2973 	int ret = 0;
2974 	struct send_ctx *sctx = ctx;
2975 	struct fs_path *p;
2976 	u64 gen;
2977 
2978 	p = fs_path_alloc();
2979 	if (!p)
2980 		return -ENOMEM;
2981 
2982 	ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL,
2983 			NULL, NULL);
2984 	if (ret < 0)
2985 		goto out;
2986 
2987 	ret = get_cur_path(sctx, dir, gen, p);
2988 	if (ret < 0)
2989 		goto out;
2990 	ret = fs_path_add_path(p, name);
2991 	if (ret < 0)
2992 		goto out;
2993 
2994 	ret = record_ref(&sctx->new_refs, dir, gen, p);
2995 
2996 out:
2997 	if (ret)
2998 		fs_path_free(p);
2999 	return ret;
3000 }
3001 
3002 static int __record_deleted_ref(int num, u64 dir, int index,
3003 				struct fs_path *name,
3004 				void *ctx)
3005 {
3006 	int ret = 0;
3007 	struct send_ctx *sctx = ctx;
3008 	struct fs_path *p;
3009 	u64 gen;
3010 
3011 	p = fs_path_alloc();
3012 	if (!p)
3013 		return -ENOMEM;
3014 
3015 	ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL,
3016 			NULL, NULL);
3017 	if (ret < 0)
3018 		goto out;
3019 
3020 	ret = get_cur_path(sctx, dir, gen, p);
3021 	if (ret < 0)
3022 		goto out;
3023 	ret = fs_path_add_path(p, name);
3024 	if (ret < 0)
3025 		goto out;
3026 
3027 	ret = record_ref(&sctx->deleted_refs, dir, gen, p);
3028 
3029 out:
3030 	if (ret)
3031 		fs_path_free(p);
3032 	return ret;
3033 }
3034 
3035 static int record_new_ref(struct send_ctx *sctx)
3036 {
3037 	int ret;
3038 
3039 	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3040 				sctx->cmp_key, 0, __record_new_ref, sctx);
3041 	if (ret < 0)
3042 		goto out;
3043 	ret = 0;
3044 
3045 out:
3046 	return ret;
3047 }
3048 
3049 static int record_deleted_ref(struct send_ctx *sctx)
3050 {
3051 	int ret;
3052 
3053 	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3054 				sctx->cmp_key, 0, __record_deleted_ref, sctx);
3055 	if (ret < 0)
3056 		goto out;
3057 	ret = 0;
3058 
3059 out:
3060 	return ret;
3061 }
3062 
3063 struct find_ref_ctx {
3064 	u64 dir;
3065 	u64 dir_gen;
3066 	struct btrfs_root *root;
3067 	struct fs_path *name;
3068 	int found_idx;
3069 };
3070 
3071 static int __find_iref(int num, u64 dir, int index,
3072 		       struct fs_path *name,
3073 		       void *ctx_)
3074 {
3075 	struct find_ref_ctx *ctx = ctx_;
3076 	u64 dir_gen;
3077 	int ret;
3078 
3079 	if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3080 	    strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3081 		/*
3082 		 * To avoid doing extra lookups we'll only do this if everything
3083 		 * else matches.
3084 		 */
3085 		ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
3086 				     NULL, NULL, NULL);
3087 		if (ret)
3088 			return ret;
3089 		if (dir_gen != ctx->dir_gen)
3090 			return 0;
3091 		ctx->found_idx = num;
3092 		return 1;
3093 	}
3094 	return 0;
3095 }
3096 
3097 static int find_iref(struct btrfs_root *root,
3098 		     struct btrfs_path *path,
3099 		     struct btrfs_key *key,
3100 		     u64 dir, u64 dir_gen, struct fs_path *name)
3101 {
3102 	int ret;
3103 	struct find_ref_ctx ctx;
3104 
3105 	ctx.dir = dir;
3106 	ctx.name = name;
3107 	ctx.dir_gen = dir_gen;
3108 	ctx.found_idx = -1;
3109 	ctx.root = root;
3110 
3111 	ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
3112 	if (ret < 0)
3113 		return ret;
3114 
3115 	if (ctx.found_idx == -1)
3116 		return -ENOENT;
3117 
3118 	return ctx.found_idx;
3119 }
3120 
3121 static int __record_changed_new_ref(int num, u64 dir, int index,
3122 				    struct fs_path *name,
3123 				    void *ctx)
3124 {
3125 	u64 dir_gen;
3126 	int ret;
3127 	struct send_ctx *sctx = ctx;
3128 
3129 	ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
3130 			     NULL, NULL, NULL);
3131 	if (ret)
3132 		return ret;
3133 
3134 	ret = find_iref(sctx->parent_root, sctx->right_path,
3135 			sctx->cmp_key, dir, dir_gen, name);
3136 	if (ret == -ENOENT)
3137 		ret = __record_new_ref(num, dir, index, name, sctx);
3138 	else if (ret > 0)
3139 		ret = 0;
3140 
3141 	return ret;
3142 }
3143 
3144 static int __record_changed_deleted_ref(int num, u64 dir, int index,
3145 					struct fs_path *name,
3146 					void *ctx)
3147 {
3148 	u64 dir_gen;
3149 	int ret;
3150 	struct send_ctx *sctx = ctx;
3151 
3152 	ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
3153 			     NULL, NULL, NULL);
3154 	if (ret)
3155 		return ret;
3156 
3157 	ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
3158 			dir, dir_gen, name);
3159 	if (ret == -ENOENT)
3160 		ret = __record_deleted_ref(num, dir, index, name, sctx);
3161 	else if (ret > 0)
3162 		ret = 0;
3163 
3164 	return ret;
3165 }
3166 
3167 static int record_changed_ref(struct send_ctx *sctx)
3168 {
3169 	int ret = 0;
3170 
3171 	ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
3172 			sctx->cmp_key, 0, __record_changed_new_ref, sctx);
3173 	if (ret < 0)
3174 		goto out;
3175 	ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
3176 			sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
3177 	if (ret < 0)
3178 		goto out;
3179 	ret = 0;
3180 
3181 out:
3182 	return ret;
3183 }
3184 
3185 /*
3186  * Record and process all refs at once. Needed when an inode changes the
3187  * generation number, which means that it was deleted and recreated.
3188  */
3189 static int process_all_refs(struct send_ctx *sctx,
3190 			    enum btrfs_compare_tree_result cmd)
3191 {
3192 	int ret;
3193 	struct btrfs_root *root;
3194 	struct btrfs_path *path;
3195 	struct btrfs_key key;
3196 	struct btrfs_key found_key;
3197 	struct extent_buffer *eb;
3198 	int slot;
3199 	iterate_inode_ref_t cb;
3200 
3201 	path = alloc_path_for_send();
3202 	if (!path)
3203 		return -ENOMEM;
3204 
3205 	if (cmd == BTRFS_COMPARE_TREE_NEW) {
3206 		root = sctx->send_root;
3207 		cb = __record_new_ref;
3208 	} else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
3209 		root = sctx->parent_root;
3210 		cb = __record_deleted_ref;
3211 	} else {
3212 		BUG();
3213 	}
3214 
3215 	key.objectid = sctx->cmp_key->objectid;
3216 	key.type = BTRFS_INODE_REF_KEY;
3217 	key.offset = 0;
3218 	while (1) {
3219 		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3220 		if (ret < 0)
3221 			goto out;
3222 		if (ret)
3223 			break;
3224 
3225 		eb = path->nodes[0];
3226 		slot = path->slots[0];
3227 		btrfs_item_key_to_cpu(eb, &found_key, slot);
3228 
3229 		if (found_key.objectid != key.objectid ||
3230 		    (found_key.type != BTRFS_INODE_REF_KEY &&
3231 		     found_key.type != BTRFS_INODE_EXTREF_KEY))
3232 			break;
3233 
3234 		ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
3235 		btrfs_release_path(path);
3236 		if (ret < 0)
3237 			goto out;
3238 
3239 		key.offset = found_key.offset + 1;
3240 	}
3241 	btrfs_release_path(path);
3242 
3243 	ret = process_recorded_refs(sctx);
3244 
3245 out:
3246 	btrfs_free_path(path);
3247 	return ret;
3248 }
3249 
3250 static int send_set_xattr(struct send_ctx *sctx,
3251 			  struct fs_path *path,
3252 			  const char *name, int name_len,
3253 			  const char *data, int data_len)
3254 {
3255 	int ret = 0;
3256 
3257 	ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
3258 	if (ret < 0)
3259 		goto out;
3260 
3261 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3262 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3263 	TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
3264 
3265 	ret = send_cmd(sctx);
3266 
3267 tlv_put_failure:
3268 out:
3269 	return ret;
3270 }
3271 
3272 static int send_remove_xattr(struct send_ctx *sctx,
3273 			  struct fs_path *path,
3274 			  const char *name, int name_len)
3275 {
3276 	int ret = 0;
3277 
3278 	ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
3279 	if (ret < 0)
3280 		goto out;
3281 
3282 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3283 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3284 
3285 	ret = send_cmd(sctx);
3286 
3287 tlv_put_failure:
3288 out:
3289 	return ret;
3290 }
3291 
3292 static int __process_new_xattr(int num, struct btrfs_key *di_key,
3293 			       const char *name, int name_len,
3294 			       const char *data, int data_len,
3295 			       u8 type, void *ctx)
3296 {
3297 	int ret;
3298 	struct send_ctx *sctx = ctx;
3299 	struct fs_path *p;
3300 	posix_acl_xattr_header dummy_acl;
3301 
3302 	p = fs_path_alloc();
3303 	if (!p)
3304 		return -ENOMEM;
3305 
3306 	/*
3307 	 * This hack is needed because empty acl's are stored as zero byte
3308 	 * data in xattrs. Problem with that is, that receiving these zero byte
3309 	 * acl's will fail later. To fix this, we send a dummy acl list that
3310 	 * only contains the version number and no entries.
3311 	 */
3312 	if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
3313 	    !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
3314 		if (data_len == 0) {
3315 			dummy_acl.a_version =
3316 					cpu_to_le32(POSIX_ACL_XATTR_VERSION);
3317 			data = (char *)&dummy_acl;
3318 			data_len = sizeof(dummy_acl);
3319 		}
3320 	}
3321 
3322 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3323 	if (ret < 0)
3324 		goto out;
3325 
3326 	ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
3327 
3328 out:
3329 	fs_path_free(p);
3330 	return ret;
3331 }
3332 
3333 static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
3334 				   const char *name, int name_len,
3335 				   const char *data, int data_len,
3336 				   u8 type, void *ctx)
3337 {
3338 	int ret;
3339 	struct send_ctx *sctx = ctx;
3340 	struct fs_path *p;
3341 
3342 	p = fs_path_alloc();
3343 	if (!p)
3344 		return -ENOMEM;
3345 
3346 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3347 	if (ret < 0)
3348 		goto out;
3349 
3350 	ret = send_remove_xattr(sctx, p, name, name_len);
3351 
3352 out:
3353 	fs_path_free(p);
3354 	return ret;
3355 }
3356 
3357 static int process_new_xattr(struct send_ctx *sctx)
3358 {
3359 	int ret = 0;
3360 
3361 	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
3362 			       sctx->cmp_key, __process_new_xattr, sctx);
3363 
3364 	return ret;
3365 }
3366 
3367 static int process_deleted_xattr(struct send_ctx *sctx)
3368 {
3369 	int ret;
3370 
3371 	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
3372 			       sctx->cmp_key, __process_deleted_xattr, sctx);
3373 
3374 	return ret;
3375 }
3376 
3377 struct find_xattr_ctx {
3378 	const char *name;
3379 	int name_len;
3380 	int found_idx;
3381 	char *found_data;
3382 	int found_data_len;
3383 };
3384 
3385 static int __find_xattr(int num, struct btrfs_key *di_key,
3386 			const char *name, int name_len,
3387 			const char *data, int data_len,
3388 			u8 type, void *vctx)
3389 {
3390 	struct find_xattr_ctx *ctx = vctx;
3391 
3392 	if (name_len == ctx->name_len &&
3393 	    strncmp(name, ctx->name, name_len) == 0) {
3394 		ctx->found_idx = num;
3395 		ctx->found_data_len = data_len;
3396 		ctx->found_data = kmemdup(data, data_len, GFP_NOFS);
3397 		if (!ctx->found_data)
3398 			return -ENOMEM;
3399 		return 1;
3400 	}
3401 	return 0;
3402 }
3403 
3404 static int find_xattr(struct btrfs_root *root,
3405 		      struct btrfs_path *path,
3406 		      struct btrfs_key *key,
3407 		      const char *name, int name_len,
3408 		      char **data, int *data_len)
3409 {
3410 	int ret;
3411 	struct find_xattr_ctx ctx;
3412 
3413 	ctx.name = name;
3414 	ctx.name_len = name_len;
3415 	ctx.found_idx = -1;
3416 	ctx.found_data = NULL;
3417 	ctx.found_data_len = 0;
3418 
3419 	ret = iterate_dir_item(root, path, key, __find_xattr, &ctx);
3420 	if (ret < 0)
3421 		return ret;
3422 
3423 	if (ctx.found_idx == -1)
3424 		return -ENOENT;
3425 	if (data) {
3426 		*data = ctx.found_data;
3427 		*data_len = ctx.found_data_len;
3428 	} else {
3429 		kfree(ctx.found_data);
3430 	}
3431 	return ctx.found_idx;
3432 }
3433 
3434 
3435 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3436 				       const char *name, int name_len,
3437 				       const char *data, int data_len,
3438 				       u8 type, void *ctx)
3439 {
3440 	int ret;
3441 	struct send_ctx *sctx = ctx;
3442 	char *found_data = NULL;
3443 	int found_data_len  = 0;
3444 
3445 	ret = find_xattr(sctx->parent_root, sctx->right_path,
3446 			 sctx->cmp_key, name, name_len, &found_data,
3447 			 &found_data_len);
3448 	if (ret == -ENOENT) {
3449 		ret = __process_new_xattr(num, di_key, name, name_len, data,
3450 				data_len, type, ctx);
3451 	} else if (ret >= 0) {
3452 		if (data_len != found_data_len ||
3453 		    memcmp(data, found_data, data_len)) {
3454 			ret = __process_new_xattr(num, di_key, name, name_len,
3455 					data, data_len, type, ctx);
3456 		} else {
3457 			ret = 0;
3458 		}
3459 	}
3460 
3461 	kfree(found_data);
3462 	return ret;
3463 }
3464 
3465 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
3466 					   const char *name, int name_len,
3467 					   const char *data, int data_len,
3468 					   u8 type, void *ctx)
3469 {
3470 	int ret;
3471 	struct send_ctx *sctx = ctx;
3472 
3473 	ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key,
3474 			 name, name_len, NULL, NULL);
3475 	if (ret == -ENOENT)
3476 		ret = __process_deleted_xattr(num, di_key, name, name_len, data,
3477 				data_len, type, ctx);
3478 	else if (ret >= 0)
3479 		ret = 0;
3480 
3481 	return ret;
3482 }
3483 
3484 static int process_changed_xattr(struct send_ctx *sctx)
3485 {
3486 	int ret = 0;
3487 
3488 	ret = iterate_dir_item(sctx->send_root, sctx->left_path,
3489 			sctx->cmp_key, __process_changed_new_xattr, sctx);
3490 	if (ret < 0)
3491 		goto out;
3492 	ret = iterate_dir_item(sctx->parent_root, sctx->right_path,
3493 			sctx->cmp_key, __process_changed_deleted_xattr, sctx);
3494 
3495 out:
3496 	return ret;
3497 }
3498 
3499 static int process_all_new_xattrs(struct send_ctx *sctx)
3500 {
3501 	int ret;
3502 	struct btrfs_root *root;
3503 	struct btrfs_path *path;
3504 	struct btrfs_key key;
3505 	struct btrfs_key found_key;
3506 	struct extent_buffer *eb;
3507 	int slot;
3508 
3509 	path = alloc_path_for_send();
3510 	if (!path)
3511 		return -ENOMEM;
3512 
3513 	root = sctx->send_root;
3514 
3515 	key.objectid = sctx->cmp_key->objectid;
3516 	key.type = BTRFS_XATTR_ITEM_KEY;
3517 	key.offset = 0;
3518 	while (1) {
3519 		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3520 		if (ret < 0)
3521 			goto out;
3522 		if (ret) {
3523 			ret = 0;
3524 			goto out;
3525 		}
3526 
3527 		eb = path->nodes[0];
3528 		slot = path->slots[0];
3529 		btrfs_item_key_to_cpu(eb, &found_key, slot);
3530 
3531 		if (found_key.objectid != key.objectid ||
3532 		    found_key.type != key.type) {
3533 			ret = 0;
3534 			goto out;
3535 		}
3536 
3537 		ret = iterate_dir_item(root, path, &found_key,
3538 				       __process_new_xattr, sctx);
3539 		if (ret < 0)
3540 			goto out;
3541 
3542 		btrfs_release_path(path);
3543 		key.offset = found_key.offset + 1;
3544 	}
3545 
3546 out:
3547 	btrfs_free_path(path);
3548 	return ret;
3549 }
3550 
3551 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
3552 {
3553 	struct btrfs_root *root = sctx->send_root;
3554 	struct btrfs_fs_info *fs_info = root->fs_info;
3555 	struct inode *inode;
3556 	struct page *page;
3557 	char *addr;
3558 	struct btrfs_key key;
3559 	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
3560 	pgoff_t last_index;
3561 	unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
3562 	ssize_t ret = 0;
3563 
3564 	key.objectid = sctx->cur_ino;
3565 	key.type = BTRFS_INODE_ITEM_KEY;
3566 	key.offset = 0;
3567 
3568 	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3569 	if (IS_ERR(inode))
3570 		return PTR_ERR(inode);
3571 
3572 	if (offset + len > i_size_read(inode)) {
3573 		if (offset > i_size_read(inode))
3574 			len = 0;
3575 		else
3576 			len = offset - i_size_read(inode);
3577 	}
3578 	if (len == 0)
3579 		goto out;
3580 
3581 	last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
3582 	while (index <= last_index) {
3583 		unsigned cur_len = min_t(unsigned, len,
3584 					 PAGE_CACHE_SIZE - pg_offset);
3585 		page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3586 		if (!page) {
3587 			ret = -ENOMEM;
3588 			break;
3589 		}
3590 
3591 		if (!PageUptodate(page)) {
3592 			btrfs_readpage(NULL, page);
3593 			lock_page(page);
3594 			if (!PageUptodate(page)) {
3595 				unlock_page(page);
3596 				page_cache_release(page);
3597 				ret = -EIO;
3598 				break;
3599 			}
3600 		}
3601 
3602 		addr = kmap(page);
3603 		memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
3604 		kunmap(page);
3605 		unlock_page(page);
3606 		page_cache_release(page);
3607 		index++;
3608 		pg_offset = 0;
3609 		len -= cur_len;
3610 		ret += cur_len;
3611 	}
3612 out:
3613 	iput(inode);
3614 	return ret;
3615 }
3616 
3617 /*
3618  * Read some bytes from the current inode/file and send a write command to
3619  * user space.
3620  */
3621 static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
3622 {
3623 	int ret = 0;
3624 	struct fs_path *p;
3625 	ssize_t num_read = 0;
3626 
3627 	p = fs_path_alloc();
3628 	if (!p)
3629 		return -ENOMEM;
3630 
3631 verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
3632 
3633 	num_read = fill_read_buf(sctx, offset, len);
3634 	if (num_read <= 0) {
3635 		if (num_read < 0)
3636 			ret = num_read;
3637 		goto out;
3638 	}
3639 
3640 	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
3641 	if (ret < 0)
3642 		goto out;
3643 
3644 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3645 	if (ret < 0)
3646 		goto out;
3647 
3648 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3649 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3650 	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read);
3651 
3652 	ret = send_cmd(sctx);
3653 
3654 tlv_put_failure:
3655 out:
3656 	fs_path_free(p);
3657 	if (ret < 0)
3658 		return ret;
3659 	return num_read;
3660 }
3661 
3662 /*
3663  * Send a clone command to user space.
3664  */
3665 static int send_clone(struct send_ctx *sctx,
3666 		      u64 offset, u32 len,
3667 		      struct clone_root *clone_root)
3668 {
3669 	int ret = 0;
3670 	struct fs_path *p;
3671 	u64 gen;
3672 
3673 verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
3674 	       "clone_inode=%llu, clone_offset=%llu\n", offset, len,
3675 		clone_root->root->objectid, clone_root->ino,
3676 		clone_root->offset);
3677 
3678 	p = fs_path_alloc();
3679 	if (!p)
3680 		return -ENOMEM;
3681 
3682 	ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
3683 	if (ret < 0)
3684 		goto out;
3685 
3686 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3687 	if (ret < 0)
3688 		goto out;
3689 
3690 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3691 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
3692 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3693 
3694 	if (clone_root->root == sctx->send_root) {
3695 		ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
3696 				&gen, NULL, NULL, NULL, NULL);
3697 		if (ret < 0)
3698 			goto out;
3699 		ret = get_cur_path(sctx, clone_root->ino, gen, p);
3700 	} else {
3701 		ret = get_inode_path(clone_root->root, clone_root->ino, p);
3702 	}
3703 	if (ret < 0)
3704 		goto out;
3705 
3706 	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
3707 			clone_root->root->root_item.uuid);
3708 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
3709 			clone_root->root->root_item.ctransid);
3710 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
3711 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
3712 			clone_root->offset);
3713 
3714 	ret = send_cmd(sctx);
3715 
3716 tlv_put_failure:
3717 out:
3718 	fs_path_free(p);
3719 	return ret;
3720 }
3721 
3722 /*
3723  * Send an update extent command to user space.
3724  */
3725 static int send_update_extent(struct send_ctx *sctx,
3726 			      u64 offset, u32 len)
3727 {
3728 	int ret = 0;
3729 	struct fs_path *p;
3730 
3731 	p = fs_path_alloc();
3732 	if (!p)
3733 		return -ENOMEM;
3734 
3735 	ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
3736 	if (ret < 0)
3737 		goto out;
3738 
3739 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3740 	if (ret < 0)
3741 		goto out;
3742 
3743 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3744 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3745 	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
3746 
3747 	ret = send_cmd(sctx);
3748 
3749 tlv_put_failure:
3750 out:
3751 	fs_path_free(p);
3752 	return ret;
3753 }
3754 
3755 static int send_write_or_clone(struct send_ctx *sctx,
3756 			       struct btrfs_path *path,
3757 			       struct btrfs_key *key,
3758 			       struct clone_root *clone_root)
3759 {
3760 	int ret = 0;
3761 	struct btrfs_file_extent_item *ei;
3762 	u64 offset = key->offset;
3763 	u64 pos = 0;
3764 	u64 len;
3765 	u32 l;
3766 	u8 type;
3767 
3768 	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3769 			struct btrfs_file_extent_item);
3770 	type = btrfs_file_extent_type(path->nodes[0], ei);
3771 	if (type == BTRFS_FILE_EXTENT_INLINE) {
3772 		len = btrfs_file_extent_inline_len(path->nodes[0], ei);
3773 		/*
3774 		 * it is possible the inline item won't cover the whole page,
3775 		 * but there may be items after this page.  Make
3776 		 * sure to send the whole thing
3777 		 */
3778 		len = PAGE_CACHE_ALIGN(len);
3779 	} else {
3780 		len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
3781 	}
3782 
3783 	if (offset + len > sctx->cur_inode_size)
3784 		len = sctx->cur_inode_size - offset;
3785 	if (len == 0) {
3786 		ret = 0;
3787 		goto out;
3788 	}
3789 
3790 	if (clone_root) {
3791 		ret = send_clone(sctx, offset, len, clone_root);
3792 	} else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
3793 		ret = send_update_extent(sctx, offset, len);
3794 	} else {
3795 		while (pos < len) {
3796 			l = len - pos;
3797 			if (l > BTRFS_SEND_READ_SIZE)
3798 				l = BTRFS_SEND_READ_SIZE;
3799 			ret = send_write(sctx, pos + offset, l);
3800 			if (ret < 0)
3801 				goto out;
3802 			if (!ret)
3803 				break;
3804 			pos += ret;
3805 		}
3806 		ret = 0;
3807 	}
3808 out:
3809 	return ret;
3810 }
3811 
3812 static int is_extent_unchanged(struct send_ctx *sctx,
3813 			       struct btrfs_path *left_path,
3814 			       struct btrfs_key *ekey)
3815 {
3816 	int ret = 0;
3817 	struct btrfs_key key;
3818 	struct btrfs_path *path = NULL;
3819 	struct extent_buffer *eb;
3820 	int slot;
3821 	struct btrfs_key found_key;
3822 	struct btrfs_file_extent_item *ei;
3823 	u64 left_disknr;
3824 	u64 right_disknr;
3825 	u64 left_offset;
3826 	u64 right_offset;
3827 	u64 left_offset_fixed;
3828 	u64 left_len;
3829 	u64 right_len;
3830 	u64 left_gen;
3831 	u64 right_gen;
3832 	u8 left_type;
3833 	u8 right_type;
3834 
3835 	path = alloc_path_for_send();
3836 	if (!path)
3837 		return -ENOMEM;
3838 
3839 	eb = left_path->nodes[0];
3840 	slot = left_path->slots[0];
3841 	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3842 	left_type = btrfs_file_extent_type(eb, ei);
3843 
3844 	if (left_type != BTRFS_FILE_EXTENT_REG) {
3845 		ret = 0;
3846 		goto out;
3847 	}
3848 	left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3849 	left_len = btrfs_file_extent_num_bytes(eb, ei);
3850 	left_offset = btrfs_file_extent_offset(eb, ei);
3851 	left_gen = btrfs_file_extent_generation(eb, ei);
3852 
3853 	/*
3854 	 * Following comments will refer to these graphics. L is the left
3855 	 * extents which we are checking at the moment. 1-8 are the right
3856 	 * extents that we iterate.
3857 	 *
3858 	 *       |-----L-----|
3859 	 * |-1-|-2a-|-3-|-4-|-5-|-6-|
3860 	 *
3861 	 *       |-----L-----|
3862 	 * |--1--|-2b-|...(same as above)
3863 	 *
3864 	 * Alternative situation. Happens on files where extents got split.
3865 	 *       |-----L-----|
3866 	 * |-----------7-----------|-6-|
3867 	 *
3868 	 * Alternative situation. Happens on files which got larger.
3869 	 *       |-----L-----|
3870 	 * |-8-|
3871 	 * Nothing follows after 8.
3872 	 */
3873 
3874 	key.objectid = ekey->objectid;
3875 	key.type = BTRFS_EXTENT_DATA_KEY;
3876 	key.offset = ekey->offset;
3877 	ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
3878 	if (ret < 0)
3879 		goto out;
3880 	if (ret) {
3881 		ret = 0;
3882 		goto out;
3883 	}
3884 
3885 	/*
3886 	 * Handle special case where the right side has no extents at all.
3887 	 */
3888 	eb = path->nodes[0];
3889 	slot = path->slots[0];
3890 	btrfs_item_key_to_cpu(eb, &found_key, slot);
3891 	if (found_key.objectid != key.objectid ||
3892 	    found_key.type != key.type) {
3893 		/* If we're a hole then just pretend nothing changed */
3894 		ret = (left_disknr) ? 0 : 1;
3895 		goto out;
3896 	}
3897 
3898 	/*
3899 	 * We're now on 2a, 2b or 7.
3900 	 */
3901 	key = found_key;
3902 	while (key.offset < ekey->offset + left_len) {
3903 		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3904 		right_type = btrfs_file_extent_type(eb, ei);
3905 		if (right_type != BTRFS_FILE_EXTENT_REG) {
3906 			ret = 0;
3907 			goto out;
3908 		}
3909 
3910 		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3911 		right_len = btrfs_file_extent_num_bytes(eb, ei);
3912 		right_offset = btrfs_file_extent_offset(eb, ei);
3913 		right_gen = btrfs_file_extent_generation(eb, ei);
3914 
3915 		/*
3916 		 * Are we at extent 8? If yes, we know the extent is changed.
3917 		 * This may only happen on the first iteration.
3918 		 */
3919 		if (found_key.offset + right_len <= ekey->offset) {
3920 			/* If we're a hole just pretend nothing changed */
3921 			ret = (left_disknr) ? 0 : 1;
3922 			goto out;
3923 		}
3924 
3925 		left_offset_fixed = left_offset;
3926 		if (key.offset < ekey->offset) {
3927 			/* Fix the right offset for 2a and 7. */
3928 			right_offset += ekey->offset - key.offset;
3929 		} else {
3930 			/* Fix the left offset for all behind 2a and 2b */
3931 			left_offset_fixed += key.offset - ekey->offset;
3932 		}
3933 
3934 		/*
3935 		 * Check if we have the same extent.
3936 		 */
3937 		if (left_disknr != right_disknr ||
3938 		    left_offset_fixed != right_offset ||
3939 		    left_gen != right_gen) {
3940 			ret = 0;
3941 			goto out;
3942 		}
3943 
3944 		/*
3945 		 * Go to the next extent.
3946 		 */
3947 		ret = btrfs_next_item(sctx->parent_root, path);
3948 		if (ret < 0)
3949 			goto out;
3950 		if (!ret) {
3951 			eb = path->nodes[0];
3952 			slot = path->slots[0];
3953 			btrfs_item_key_to_cpu(eb, &found_key, slot);
3954 		}
3955 		if (ret || found_key.objectid != key.objectid ||
3956 		    found_key.type != key.type) {
3957 			key.offset += right_len;
3958 			break;
3959 		}
3960 		if (found_key.offset != key.offset + right_len) {
3961 			ret = 0;
3962 			goto out;
3963 		}
3964 		key = found_key;
3965 	}
3966 
3967 	/*
3968 	 * We're now behind the left extent (treat as unchanged) or at the end
3969 	 * of the right side (treat as changed).
3970 	 */
3971 	if (key.offset >= ekey->offset + left_len)
3972 		ret = 1;
3973 	else
3974 		ret = 0;
3975 
3976 
3977 out:
3978 	btrfs_free_path(path);
3979 	return ret;
3980 }
3981 
3982 static int process_extent(struct send_ctx *sctx,
3983 			  struct btrfs_path *path,
3984 			  struct btrfs_key *key)
3985 {
3986 	struct clone_root *found_clone = NULL;
3987 	int ret = 0;
3988 
3989 	if (S_ISLNK(sctx->cur_inode_mode))
3990 		return 0;
3991 
3992 	if (sctx->parent_root && !sctx->cur_inode_new) {
3993 		ret = is_extent_unchanged(sctx, path, key);
3994 		if (ret < 0)
3995 			goto out;
3996 		if (ret) {
3997 			ret = 0;
3998 			goto out;
3999 		}
4000 	} else {
4001 		struct btrfs_file_extent_item *ei;
4002 		u8 type;
4003 
4004 		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
4005 				    struct btrfs_file_extent_item);
4006 		type = btrfs_file_extent_type(path->nodes[0], ei);
4007 		if (type == BTRFS_FILE_EXTENT_PREALLOC ||
4008 		    type == BTRFS_FILE_EXTENT_REG) {
4009 			/*
4010 			 * The send spec does not have a prealloc command yet,
4011 			 * so just leave a hole for prealloc'ed extents until
4012 			 * we have enough commands queued up to justify rev'ing
4013 			 * the send spec.
4014 			 */
4015 			if (type == BTRFS_FILE_EXTENT_PREALLOC) {
4016 				ret = 0;
4017 				goto out;
4018 			}
4019 
4020 			/* Have a hole, just skip it. */
4021 			if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
4022 				ret = 0;
4023 				goto out;
4024 			}
4025 		}
4026 	}
4027 
4028 	ret = find_extent_clone(sctx, path, key->objectid, key->offset,
4029 			sctx->cur_inode_size, &found_clone);
4030 	if (ret != -ENOENT && ret < 0)
4031 		goto out;
4032 
4033 	ret = send_write_or_clone(sctx, path, key, found_clone);
4034 
4035 out:
4036 	return ret;
4037 }
4038 
4039 static int process_all_extents(struct send_ctx *sctx)
4040 {
4041 	int ret;
4042 	struct btrfs_root *root;
4043 	struct btrfs_path *path;
4044 	struct btrfs_key key;
4045 	struct btrfs_key found_key;
4046 	struct extent_buffer *eb;
4047 	int slot;
4048 
4049 	root = sctx->send_root;
4050 	path = alloc_path_for_send();
4051 	if (!path)
4052 		return -ENOMEM;
4053 
4054 	key.objectid = sctx->cmp_key->objectid;
4055 	key.type = BTRFS_EXTENT_DATA_KEY;
4056 	key.offset = 0;
4057 	while (1) {
4058 		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
4059 		if (ret < 0)
4060 			goto out;
4061 		if (ret) {
4062 			ret = 0;
4063 			goto out;
4064 		}
4065 
4066 		eb = path->nodes[0];
4067 		slot = path->slots[0];
4068 		btrfs_item_key_to_cpu(eb, &found_key, slot);
4069 
4070 		if (found_key.objectid != key.objectid ||
4071 		    found_key.type != key.type) {
4072 			ret = 0;
4073 			goto out;
4074 		}
4075 
4076 		ret = process_extent(sctx, path, &found_key);
4077 		if (ret < 0)
4078 			goto out;
4079 
4080 		btrfs_release_path(path);
4081 		key.offset = found_key.offset + 1;
4082 	}
4083 
4084 out:
4085 	btrfs_free_path(path);
4086 	return ret;
4087 }
4088 
4089 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
4090 {
4091 	int ret = 0;
4092 
4093 	if (sctx->cur_ino == 0)
4094 		goto out;
4095 	if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
4096 	    sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY)
4097 		goto out;
4098 	if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
4099 		goto out;
4100 
4101 	ret = process_recorded_refs(sctx);
4102 	if (ret < 0)
4103 		goto out;
4104 
4105 	/*
4106 	 * We have processed the refs and thus need to advance send_progress.
4107 	 * Now, calls to get_cur_xxx will take the updated refs of the current
4108 	 * inode into account.
4109 	 */
4110 	sctx->send_progress = sctx->cur_ino + 1;
4111 
4112 out:
4113 	return ret;
4114 }
4115 
4116 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
4117 {
4118 	int ret = 0;
4119 	u64 left_mode;
4120 	u64 left_uid;
4121 	u64 left_gid;
4122 	u64 right_mode;
4123 	u64 right_uid;
4124 	u64 right_gid;
4125 	int need_chmod = 0;
4126 	int need_chown = 0;
4127 
4128 	ret = process_recorded_refs_if_needed(sctx, at_end);
4129 	if (ret < 0)
4130 		goto out;
4131 
4132 	if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4133 		goto out;
4134 	if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
4135 		goto out;
4136 
4137 	ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
4138 			&left_mode, &left_uid, &left_gid, NULL);
4139 	if (ret < 0)
4140 		goto out;
4141 
4142 	if (!sctx->parent_root || sctx->cur_inode_new) {
4143 		need_chown = 1;
4144 		if (!S_ISLNK(sctx->cur_inode_mode))
4145 			need_chmod = 1;
4146 	} else {
4147 		ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
4148 				NULL, NULL, &right_mode, &right_uid,
4149 				&right_gid, NULL);
4150 		if (ret < 0)
4151 			goto out;
4152 
4153 		if (left_uid != right_uid || left_gid != right_gid)
4154 			need_chown = 1;
4155 		if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode)
4156 			need_chmod = 1;
4157 	}
4158 
4159 	if (S_ISREG(sctx->cur_inode_mode)) {
4160 		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4161 				sctx->cur_inode_size);
4162 		if (ret < 0)
4163 			goto out;
4164 	}
4165 
4166 	if (need_chown) {
4167 		ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4168 				left_uid, left_gid);
4169 		if (ret < 0)
4170 			goto out;
4171 	}
4172 	if (need_chmod) {
4173 		ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4174 				left_mode);
4175 		if (ret < 0)
4176 			goto out;
4177 	}
4178 
4179 	/*
4180 	 * Need to send that every time, no matter if it actually changed
4181 	 * between the two trees as we have done changes to the inode before.
4182 	 */
4183 	ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4184 	if (ret < 0)
4185 		goto out;
4186 
4187 out:
4188 	return ret;
4189 }
4190 
4191 static int changed_inode(struct send_ctx *sctx,
4192 			 enum btrfs_compare_tree_result result)
4193 {
4194 	int ret = 0;
4195 	struct btrfs_key *key = sctx->cmp_key;
4196 	struct btrfs_inode_item *left_ii = NULL;
4197 	struct btrfs_inode_item *right_ii = NULL;
4198 	u64 left_gen = 0;
4199 	u64 right_gen = 0;
4200 
4201 	sctx->cur_ino = key->objectid;
4202 	sctx->cur_inode_new_gen = 0;
4203 
4204 	/*
4205 	 * Set send_progress to current inode. This will tell all get_cur_xxx
4206 	 * functions that the current inode's refs are not updated yet. Later,
4207 	 * when process_recorded_refs is finished, it is set to cur_ino + 1.
4208 	 */
4209 	sctx->send_progress = sctx->cur_ino;
4210 
4211 	if (result == BTRFS_COMPARE_TREE_NEW ||
4212 	    result == BTRFS_COMPARE_TREE_CHANGED) {
4213 		left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
4214 				sctx->left_path->slots[0],
4215 				struct btrfs_inode_item);
4216 		left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
4217 				left_ii);
4218 	} else {
4219 		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4220 				sctx->right_path->slots[0],
4221 				struct btrfs_inode_item);
4222 		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4223 				right_ii);
4224 	}
4225 	if (result == BTRFS_COMPARE_TREE_CHANGED) {
4226 		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4227 				sctx->right_path->slots[0],
4228 				struct btrfs_inode_item);
4229 
4230 		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4231 				right_ii);
4232 
4233 		/*
4234 		 * The cur_ino = root dir case is special here. We can't treat
4235 		 * the inode as deleted+reused because it would generate a
4236 		 * stream that tries to delete/mkdir the root dir.
4237 		 */
4238 		if (left_gen != right_gen &&
4239 		    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
4240 			sctx->cur_inode_new_gen = 1;
4241 	}
4242 
4243 	if (result == BTRFS_COMPARE_TREE_NEW) {
4244 		sctx->cur_inode_gen = left_gen;
4245 		sctx->cur_inode_new = 1;
4246 		sctx->cur_inode_deleted = 0;
4247 		sctx->cur_inode_size = btrfs_inode_size(
4248 				sctx->left_path->nodes[0], left_ii);
4249 		sctx->cur_inode_mode = btrfs_inode_mode(
4250 				sctx->left_path->nodes[0], left_ii);
4251 		if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
4252 			ret = send_create_inode_if_needed(sctx);
4253 	} else if (result == BTRFS_COMPARE_TREE_DELETED) {
4254 		sctx->cur_inode_gen = right_gen;
4255 		sctx->cur_inode_new = 0;
4256 		sctx->cur_inode_deleted = 1;
4257 		sctx->cur_inode_size = btrfs_inode_size(
4258 				sctx->right_path->nodes[0], right_ii);
4259 		sctx->cur_inode_mode = btrfs_inode_mode(
4260 				sctx->right_path->nodes[0], right_ii);
4261 	} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
4262 		/*
4263 		 * We need to do some special handling in case the inode was
4264 		 * reported as changed with a changed generation number. This
4265 		 * means that the original inode was deleted and new inode
4266 		 * reused the same inum. So we have to treat the old inode as
4267 		 * deleted and the new one as new.
4268 		 */
4269 		if (sctx->cur_inode_new_gen) {
4270 			/*
4271 			 * First, process the inode as if it was deleted.
4272 			 */
4273 			sctx->cur_inode_gen = right_gen;
4274 			sctx->cur_inode_new = 0;
4275 			sctx->cur_inode_deleted = 1;
4276 			sctx->cur_inode_size = btrfs_inode_size(
4277 					sctx->right_path->nodes[0], right_ii);
4278 			sctx->cur_inode_mode = btrfs_inode_mode(
4279 					sctx->right_path->nodes[0], right_ii);
4280 			ret = process_all_refs(sctx,
4281 					BTRFS_COMPARE_TREE_DELETED);
4282 			if (ret < 0)
4283 				goto out;
4284 
4285 			/*
4286 			 * Now process the inode as if it was new.
4287 			 */
4288 			sctx->cur_inode_gen = left_gen;
4289 			sctx->cur_inode_new = 1;
4290 			sctx->cur_inode_deleted = 0;
4291 			sctx->cur_inode_size = btrfs_inode_size(
4292 					sctx->left_path->nodes[0], left_ii);
4293 			sctx->cur_inode_mode = btrfs_inode_mode(
4294 					sctx->left_path->nodes[0], left_ii);
4295 			ret = send_create_inode_if_needed(sctx);
4296 			if (ret < 0)
4297 				goto out;
4298 
4299 			ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
4300 			if (ret < 0)
4301 				goto out;
4302 			/*
4303 			 * Advance send_progress now as we did not get into
4304 			 * process_recorded_refs_if_needed in the new_gen case.
4305 			 */
4306 			sctx->send_progress = sctx->cur_ino + 1;
4307 
4308 			/*
4309 			 * Now process all extents and xattrs of the inode as if
4310 			 * they were all new.
4311 			 */
4312 			ret = process_all_extents(sctx);
4313 			if (ret < 0)
4314 				goto out;
4315 			ret = process_all_new_xattrs(sctx);
4316 			if (ret < 0)
4317 				goto out;
4318 		} else {
4319 			sctx->cur_inode_gen = left_gen;
4320 			sctx->cur_inode_new = 0;
4321 			sctx->cur_inode_new_gen = 0;
4322 			sctx->cur_inode_deleted = 0;
4323 			sctx->cur_inode_size = btrfs_inode_size(
4324 					sctx->left_path->nodes[0], left_ii);
4325 			sctx->cur_inode_mode = btrfs_inode_mode(
4326 					sctx->left_path->nodes[0], left_ii);
4327 		}
4328 	}
4329 
4330 out:
4331 	return ret;
4332 }
4333 
4334 /*
4335  * We have to process new refs before deleted refs, but compare_trees gives us
4336  * the new and deleted refs mixed. To fix this, we record the new/deleted refs
4337  * first and later process them in process_recorded_refs.
4338  * For the cur_inode_new_gen case, we skip recording completely because
4339  * changed_inode did already initiate processing of refs. The reason for this is
4340  * that in this case, compare_tree actually compares the refs of 2 different
4341  * inodes. To fix this, process_all_refs is used in changed_inode to handle all
4342  * refs of the right tree as deleted and all refs of the left tree as new.
4343  */
4344 static int changed_ref(struct send_ctx *sctx,
4345 		       enum btrfs_compare_tree_result result)
4346 {
4347 	int ret = 0;
4348 
4349 	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4350 
4351 	if (!sctx->cur_inode_new_gen &&
4352 	    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
4353 		if (result == BTRFS_COMPARE_TREE_NEW)
4354 			ret = record_new_ref(sctx);
4355 		else if (result == BTRFS_COMPARE_TREE_DELETED)
4356 			ret = record_deleted_ref(sctx);
4357 		else if (result == BTRFS_COMPARE_TREE_CHANGED)
4358 			ret = record_changed_ref(sctx);
4359 	}
4360 
4361 	return ret;
4362 }
4363 
4364 /*
4365  * Process new/deleted/changed xattrs. We skip processing in the
4366  * cur_inode_new_gen case because changed_inode did already initiate processing
4367  * of xattrs. The reason is the same as in changed_ref
4368  */
4369 static int changed_xattr(struct send_ctx *sctx,
4370 			 enum btrfs_compare_tree_result result)
4371 {
4372 	int ret = 0;
4373 
4374 	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4375 
4376 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4377 		if (result == BTRFS_COMPARE_TREE_NEW)
4378 			ret = process_new_xattr(sctx);
4379 		else if (result == BTRFS_COMPARE_TREE_DELETED)
4380 			ret = process_deleted_xattr(sctx);
4381 		else if (result == BTRFS_COMPARE_TREE_CHANGED)
4382 			ret = process_changed_xattr(sctx);
4383 	}
4384 
4385 	return ret;
4386 }
4387 
4388 /*
4389  * Process new/deleted/changed extents. We skip processing in the
4390  * cur_inode_new_gen case because changed_inode did already initiate processing
4391  * of extents. The reason is the same as in changed_ref
4392  */
4393 static int changed_extent(struct send_ctx *sctx,
4394 			  enum btrfs_compare_tree_result result)
4395 {
4396 	int ret = 0;
4397 
4398 	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4399 
4400 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4401 		if (result != BTRFS_COMPARE_TREE_DELETED)
4402 			ret = process_extent(sctx, sctx->left_path,
4403 					sctx->cmp_key);
4404 	}
4405 
4406 	return ret;
4407 }
4408 
4409 static int dir_changed(struct send_ctx *sctx, u64 dir)
4410 {
4411 	u64 orig_gen, new_gen;
4412 	int ret;
4413 
4414 	ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
4415 			     NULL, NULL);
4416 	if (ret)
4417 		return ret;
4418 
4419 	ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
4420 			     NULL, NULL, NULL);
4421 	if (ret)
4422 		return ret;
4423 
4424 	return (orig_gen != new_gen) ? 1 : 0;
4425 }
4426 
4427 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
4428 			struct btrfs_key *key)
4429 {
4430 	struct btrfs_inode_extref *extref;
4431 	struct extent_buffer *leaf;
4432 	u64 dirid = 0, last_dirid = 0;
4433 	unsigned long ptr;
4434 	u32 item_size;
4435 	u32 cur_offset = 0;
4436 	int ref_name_len;
4437 	int ret = 0;
4438 
4439 	/* Easy case, just check this one dirid */
4440 	if (key->type == BTRFS_INODE_REF_KEY) {
4441 		dirid = key->offset;
4442 
4443 		ret = dir_changed(sctx, dirid);
4444 		goto out;
4445 	}
4446 
4447 	leaf = path->nodes[0];
4448 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4449 	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4450 	while (cur_offset < item_size) {
4451 		extref = (struct btrfs_inode_extref *)(ptr +
4452 						       cur_offset);
4453 		dirid = btrfs_inode_extref_parent(leaf, extref);
4454 		ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
4455 		cur_offset += ref_name_len + sizeof(*extref);
4456 		if (dirid == last_dirid)
4457 			continue;
4458 		ret = dir_changed(sctx, dirid);
4459 		if (ret)
4460 			break;
4461 		last_dirid = dirid;
4462 	}
4463 out:
4464 	return ret;
4465 }
4466 
4467 /*
4468  * Updates compare related fields in sctx and simply forwards to the actual
4469  * changed_xxx functions.
4470  */
4471 static int changed_cb(struct btrfs_root *left_root,
4472 		      struct btrfs_root *right_root,
4473 		      struct btrfs_path *left_path,
4474 		      struct btrfs_path *right_path,
4475 		      struct btrfs_key *key,
4476 		      enum btrfs_compare_tree_result result,
4477 		      void *ctx)
4478 {
4479 	int ret = 0;
4480 	struct send_ctx *sctx = ctx;
4481 
4482 	if (result == BTRFS_COMPARE_TREE_SAME) {
4483 		if (key->type != BTRFS_INODE_REF_KEY &&
4484 		    key->type != BTRFS_INODE_EXTREF_KEY)
4485 			return 0;
4486 		ret = compare_refs(sctx, left_path, key);
4487 		if (!ret)
4488 			return 0;
4489 		if (ret < 0)
4490 			return ret;
4491 		result = BTRFS_COMPARE_TREE_CHANGED;
4492 		ret = 0;
4493 	}
4494 
4495 	sctx->left_path = left_path;
4496 	sctx->right_path = right_path;
4497 	sctx->cmp_key = key;
4498 
4499 	ret = finish_inode_if_needed(sctx, 0);
4500 	if (ret < 0)
4501 		goto out;
4502 
4503 	/* Ignore non-FS objects */
4504 	if (key->objectid == BTRFS_FREE_INO_OBJECTID ||
4505 	    key->objectid == BTRFS_FREE_SPACE_OBJECTID)
4506 		goto out;
4507 
4508 	if (key->type == BTRFS_INODE_ITEM_KEY)
4509 		ret = changed_inode(sctx, result);
4510 	else if (key->type == BTRFS_INODE_REF_KEY ||
4511 		 key->type == BTRFS_INODE_EXTREF_KEY)
4512 		ret = changed_ref(sctx, result);
4513 	else if (key->type == BTRFS_XATTR_ITEM_KEY)
4514 		ret = changed_xattr(sctx, result);
4515 	else if (key->type == BTRFS_EXTENT_DATA_KEY)
4516 		ret = changed_extent(sctx, result);
4517 
4518 out:
4519 	return ret;
4520 }
4521 
4522 static int full_send_tree(struct send_ctx *sctx)
4523 {
4524 	int ret;
4525 	struct btrfs_trans_handle *trans = NULL;
4526 	struct btrfs_root *send_root = sctx->send_root;
4527 	struct btrfs_key key;
4528 	struct btrfs_key found_key;
4529 	struct btrfs_path *path;
4530 	struct extent_buffer *eb;
4531 	int slot;
4532 	u64 start_ctransid;
4533 	u64 ctransid;
4534 
4535 	path = alloc_path_for_send();
4536 	if (!path)
4537 		return -ENOMEM;
4538 
4539 	spin_lock(&send_root->root_item_lock);
4540 	start_ctransid = btrfs_root_ctransid(&send_root->root_item);
4541 	spin_unlock(&send_root->root_item_lock);
4542 
4543 	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4544 	key.type = BTRFS_INODE_ITEM_KEY;
4545 	key.offset = 0;
4546 
4547 join_trans:
4548 	/*
4549 	 * We need to make sure the transaction does not get committed
4550 	 * while we do anything on commit roots. Join a transaction to prevent
4551 	 * this.
4552 	 */
4553 	trans = btrfs_join_transaction(send_root);
4554 	if (IS_ERR(trans)) {
4555 		ret = PTR_ERR(trans);
4556 		trans = NULL;
4557 		goto out;
4558 	}
4559 
4560 	/*
4561 	 * Make sure the tree has not changed after re-joining. We detect this
4562 	 * by comparing start_ctransid and ctransid. They should always match.
4563 	 */
4564 	spin_lock(&send_root->root_item_lock);
4565 	ctransid = btrfs_root_ctransid(&send_root->root_item);
4566 	spin_unlock(&send_root->root_item_lock);
4567 
4568 	if (ctransid != start_ctransid) {
4569 		WARN(1, KERN_WARNING "btrfs: the root that you're trying to "
4570 				     "send was modified in between. This is "
4571 				     "probably a bug.\n");
4572 		ret = -EIO;
4573 		goto out;
4574 	}
4575 
4576 	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
4577 	if (ret < 0)
4578 		goto out;
4579 	if (ret)
4580 		goto out_finish;
4581 
4582 	while (1) {
4583 		/*
4584 		 * When someone want to commit while we iterate, end the
4585 		 * joined transaction and rejoin.
4586 		 */
4587 		if (btrfs_should_end_transaction(trans, send_root)) {
4588 			ret = btrfs_end_transaction(trans, send_root);
4589 			trans = NULL;
4590 			if (ret < 0)
4591 				goto out;
4592 			btrfs_release_path(path);
4593 			goto join_trans;
4594 		}
4595 
4596 		eb = path->nodes[0];
4597 		slot = path->slots[0];
4598 		btrfs_item_key_to_cpu(eb, &found_key, slot);
4599 
4600 		ret = changed_cb(send_root, NULL, path, NULL,
4601 				&found_key, BTRFS_COMPARE_TREE_NEW, sctx);
4602 		if (ret < 0)
4603 			goto out;
4604 
4605 		key.objectid = found_key.objectid;
4606 		key.type = found_key.type;
4607 		key.offset = found_key.offset + 1;
4608 
4609 		ret = btrfs_next_item(send_root, path);
4610 		if (ret < 0)
4611 			goto out;
4612 		if (ret) {
4613 			ret  = 0;
4614 			break;
4615 		}
4616 	}
4617 
4618 out_finish:
4619 	ret = finish_inode_if_needed(sctx, 1);
4620 
4621 out:
4622 	btrfs_free_path(path);
4623 	if (trans) {
4624 		if (!ret)
4625 			ret = btrfs_end_transaction(trans, send_root);
4626 		else
4627 			btrfs_end_transaction(trans, send_root);
4628 	}
4629 	return ret;
4630 }
4631 
4632 static int send_subvol(struct send_ctx *sctx)
4633 {
4634 	int ret;
4635 
4636 	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
4637 		ret = send_header(sctx);
4638 		if (ret < 0)
4639 			goto out;
4640 	}
4641 
4642 	ret = send_subvol_begin(sctx);
4643 	if (ret < 0)
4644 		goto out;
4645 
4646 	if (sctx->parent_root) {
4647 		ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
4648 				changed_cb, sctx);
4649 		if (ret < 0)
4650 			goto out;
4651 		ret = finish_inode_if_needed(sctx, 1);
4652 		if (ret < 0)
4653 			goto out;
4654 	} else {
4655 		ret = full_send_tree(sctx);
4656 		if (ret < 0)
4657 			goto out;
4658 	}
4659 
4660 out:
4661 	free_recorded_refs(sctx);
4662 	return ret;
4663 }
4664 
4665 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4666 {
4667 	int ret = 0;
4668 	struct btrfs_root *send_root;
4669 	struct btrfs_root *clone_root;
4670 	struct btrfs_fs_info *fs_info;
4671 	struct btrfs_ioctl_send_args *arg = NULL;
4672 	struct btrfs_key key;
4673 	struct send_ctx *sctx = NULL;
4674 	u32 i;
4675 	u64 *clone_sources_tmp = NULL;
4676 
4677 	if (!capable(CAP_SYS_ADMIN))
4678 		return -EPERM;
4679 
4680 	send_root = BTRFS_I(file_inode(mnt_file))->root;
4681 	fs_info = send_root->fs_info;
4682 
4683 	/*
4684 	 * This is done when we lookup the root, it should already be complete
4685 	 * by the time we get here.
4686 	 */
4687 	WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
4688 
4689 	/*
4690 	 * If we just created this root we need to make sure that the orphan
4691 	 * cleanup has been done and committed since we search the commit root,
4692 	 * so check its commit root transid with our otransid and if they match
4693 	 * commit the transaction to make sure everything is updated.
4694 	 */
4695 	down_read(&send_root->fs_info->extent_commit_sem);
4696 	if (btrfs_header_generation(send_root->commit_root) ==
4697 	    btrfs_root_otransid(&send_root->root_item)) {
4698 		struct btrfs_trans_handle *trans;
4699 
4700 		up_read(&send_root->fs_info->extent_commit_sem);
4701 
4702 		trans = btrfs_attach_transaction_barrier(send_root);
4703 		if (IS_ERR(trans)) {
4704 			if (PTR_ERR(trans) != -ENOENT) {
4705 				ret = PTR_ERR(trans);
4706 				goto out;
4707 			}
4708 			/* ENOENT means theres no transaction */
4709 		} else {
4710 			ret = btrfs_commit_transaction(trans, send_root);
4711 			if (ret)
4712 				goto out;
4713 		}
4714 	} else {
4715 		up_read(&send_root->fs_info->extent_commit_sem);
4716 	}
4717 
4718 	arg = memdup_user(arg_, sizeof(*arg));
4719 	if (IS_ERR(arg)) {
4720 		ret = PTR_ERR(arg);
4721 		arg = NULL;
4722 		goto out;
4723 	}
4724 
4725 	if (!access_ok(VERIFY_READ, arg->clone_sources,
4726 			sizeof(*arg->clone_sources) *
4727 			arg->clone_sources_count)) {
4728 		ret = -EFAULT;
4729 		goto out;
4730 	}
4731 
4732 	if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
4733 		ret = -EINVAL;
4734 		goto out;
4735 	}
4736 
4737 	sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
4738 	if (!sctx) {
4739 		ret = -ENOMEM;
4740 		goto out;
4741 	}
4742 
4743 	INIT_LIST_HEAD(&sctx->new_refs);
4744 	INIT_LIST_HEAD(&sctx->deleted_refs);
4745 	INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
4746 	INIT_LIST_HEAD(&sctx->name_cache_list);
4747 
4748 	sctx->flags = arg->flags;
4749 
4750 	sctx->send_filp = fget(arg->send_fd);
4751 	if (!sctx->send_filp) {
4752 		ret = -EBADF;
4753 		goto out;
4754 	}
4755 
4756 	sctx->mnt = mnt_file->f_path.mnt;
4757 
4758 	sctx->send_root = send_root;
4759 	sctx->clone_roots_cnt = arg->clone_sources_count;
4760 
4761 	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
4762 	sctx->send_buf = vmalloc(sctx->send_max_size);
4763 	if (!sctx->send_buf) {
4764 		ret = -ENOMEM;
4765 		goto out;
4766 	}
4767 
4768 	sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
4769 	if (!sctx->read_buf) {
4770 		ret = -ENOMEM;
4771 		goto out;
4772 	}
4773 
4774 	sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4775 			(arg->clone_sources_count + 1));
4776 	if (!sctx->clone_roots) {
4777 		ret = -ENOMEM;
4778 		goto out;
4779 	}
4780 
4781 	if (arg->clone_sources_count) {
4782 		clone_sources_tmp = vmalloc(arg->clone_sources_count *
4783 				sizeof(*arg->clone_sources));
4784 		if (!clone_sources_tmp) {
4785 			ret = -ENOMEM;
4786 			goto out;
4787 		}
4788 
4789 		ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
4790 				arg->clone_sources_count *
4791 				sizeof(*arg->clone_sources));
4792 		if (ret) {
4793 			ret = -EFAULT;
4794 			goto out;
4795 		}
4796 
4797 		for (i = 0; i < arg->clone_sources_count; i++) {
4798 			key.objectid = clone_sources_tmp[i];
4799 			key.type = BTRFS_ROOT_ITEM_KEY;
4800 			key.offset = (u64)-1;
4801 			clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
4802 			if (IS_ERR(clone_root)) {
4803 				ret = PTR_ERR(clone_root);
4804 				goto out;
4805 			}
4806 			sctx->clone_roots[i].root = clone_root;
4807 		}
4808 		vfree(clone_sources_tmp);
4809 		clone_sources_tmp = NULL;
4810 	}
4811 
4812 	if (arg->parent_root) {
4813 		key.objectid = arg->parent_root;
4814 		key.type = BTRFS_ROOT_ITEM_KEY;
4815 		key.offset = (u64)-1;
4816 		sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
4817 		if (IS_ERR(sctx->parent_root)) {
4818 			ret = PTR_ERR(sctx->parent_root);
4819 			goto out;
4820 		}
4821 	}
4822 
4823 	/*
4824 	 * Clones from send_root are allowed, but only if the clone source
4825 	 * is behind the current send position. This is checked while searching
4826 	 * for possible clone sources.
4827 	 */
4828 	sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
4829 
4830 	/* We do a bsearch later */
4831 	sort(sctx->clone_roots, sctx->clone_roots_cnt,
4832 			sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
4833 			NULL);
4834 
4835 	ret = send_subvol(sctx);
4836 	if (ret < 0)
4837 		goto out;
4838 
4839 	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
4840 		ret = begin_cmd(sctx, BTRFS_SEND_C_END);
4841 		if (ret < 0)
4842 			goto out;
4843 		ret = send_cmd(sctx);
4844 		if (ret < 0)
4845 			goto out;
4846 	}
4847 
4848 out:
4849 	kfree(arg);
4850 	vfree(clone_sources_tmp);
4851 
4852 	if (sctx) {
4853 		if (sctx->send_filp)
4854 			fput(sctx->send_filp);
4855 
4856 		vfree(sctx->clone_roots);
4857 		vfree(sctx->send_buf);
4858 		vfree(sctx->read_buf);
4859 
4860 		name_cache_free(sctx);
4861 
4862 		kfree(sctx);
4863 	}
4864 
4865 	return ret;
4866 }
4867