xref: /openbmc/linux/fs/btrfs/send.c (revision 95e9fd10)
1 /*
2  * Copyright (C) 2012 Alexander Block.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 
19 #include <linux/bsearch.h>
20 #include <linux/fs.h>
21 #include <linux/file.h>
22 #include <linux/sort.h>
23 #include <linux/mount.h>
24 #include <linux/xattr.h>
25 #include <linux/posix_acl_xattr.h>
26 #include <linux/radix-tree.h>
27 #include <linux/crc32c.h>
28 #include <linux/vmalloc.h>
29 
30 #include "send.h"
31 #include "backref.h"
32 #include "locking.h"
33 #include "disk-io.h"
34 #include "btrfs_inode.h"
35 #include "transaction.h"
36 
37 static int g_verbose = 0;
38 
39 #define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)
40 
41 /*
42  * A fs_path is a helper to dynamically build path names with unknown size.
43  * It reallocates the internal buffer on demand.
44  * It allows fast adding of path elements on the right side (normal path) and
45  * fast adding to the left side (reversed path). A reversed path can also be
46  * unreversed if needed.
47  */
48 struct fs_path {
49 	union {
50 		struct {
51 			char *start;
52 			char *end;
53 			char *prepared;
54 
55 			char *buf;
56 			int buf_len;
57 			int reversed:1;
58 			int virtual_mem:1;
59 			char inline_buf[];
60 		};
61 		char pad[PAGE_SIZE];
62 	};
63 };
64 #define FS_PATH_INLINE_SIZE \
65 	(sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf))
66 
67 
68 /* reused for each extent */
69 struct clone_root {
70 	struct btrfs_root *root;
71 	u64 ino;
72 	u64 offset;
73 
74 	u64 found_refs;
75 };
76 
77 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128
78 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2)
79 
80 struct send_ctx {
81 	struct file *send_filp;
82 	loff_t send_off;
83 	char *send_buf;
84 	u32 send_size;
85 	u32 send_max_size;
86 	u64 total_send_size;
87 	u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
88 
89 	struct vfsmount *mnt;
90 
91 	struct btrfs_root *send_root;
92 	struct btrfs_root *parent_root;
93 	struct clone_root *clone_roots;
94 	int clone_roots_cnt;
95 
96 	/* current state of the compare_tree call */
97 	struct btrfs_path *left_path;
98 	struct btrfs_path *right_path;
99 	struct btrfs_key *cmp_key;
100 
101 	/*
102 	 * infos of the currently processed inode. In case of deleted inodes,
103 	 * these are the values from the deleted inode.
104 	 */
105 	u64 cur_ino;
106 	u64 cur_inode_gen;
107 	int cur_inode_new;
108 	int cur_inode_new_gen;
109 	int cur_inode_deleted;
110 	int cur_inode_first_ref_orphan;
111 	u64 cur_inode_size;
112 	u64 cur_inode_mode;
113 
114 	u64 send_progress;
115 
116 	struct list_head new_refs;
117 	struct list_head deleted_refs;
118 
119 	struct radix_tree_root name_cache;
120 	struct list_head name_cache_list;
121 	int name_cache_size;
122 
123 	struct file *cur_inode_filp;
124 	char *read_buf;
125 };
126 
127 struct name_cache_entry {
128 	struct list_head list;
129 	struct list_head use_list;
130 	u64 ino;
131 	u64 gen;
132 	u64 parent_ino;
133 	u64 parent_gen;
134 	int ret;
135 	int need_later_update;
136 	int name_len;
137 	char name[];
138 };
139 
140 static void fs_path_reset(struct fs_path *p)
141 {
142 	if (p->reversed) {
143 		p->start = p->buf + p->buf_len - 1;
144 		p->end = p->start;
145 		*p->start = 0;
146 	} else {
147 		p->start = p->buf;
148 		p->end = p->start;
149 		*p->start = 0;
150 	}
151 }
152 
153 static struct fs_path *fs_path_alloc(struct send_ctx *sctx)
154 {
155 	struct fs_path *p;
156 
157 	p = kmalloc(sizeof(*p), GFP_NOFS);
158 	if (!p)
159 		return NULL;
160 	p->reversed = 0;
161 	p->virtual_mem = 0;
162 	p->buf = p->inline_buf;
163 	p->buf_len = FS_PATH_INLINE_SIZE;
164 	fs_path_reset(p);
165 	return p;
166 }
167 
168 static struct fs_path *fs_path_alloc_reversed(struct send_ctx *sctx)
169 {
170 	struct fs_path *p;
171 
172 	p = fs_path_alloc(sctx);
173 	if (!p)
174 		return NULL;
175 	p->reversed = 1;
176 	fs_path_reset(p);
177 	return p;
178 }
179 
180 static void fs_path_free(struct send_ctx *sctx, struct fs_path *p)
181 {
182 	if (!p)
183 		return;
184 	if (p->buf != p->inline_buf) {
185 		if (p->virtual_mem)
186 			vfree(p->buf);
187 		else
188 			kfree(p->buf);
189 	}
190 	kfree(p);
191 }
192 
193 static int fs_path_len(struct fs_path *p)
194 {
195 	return p->end - p->start;
196 }
197 
198 static int fs_path_ensure_buf(struct fs_path *p, int len)
199 {
200 	char *tmp_buf;
201 	int path_len;
202 	int old_buf_len;
203 
204 	len++;
205 
206 	if (p->buf_len >= len)
207 		return 0;
208 
209 	path_len = p->end - p->start;
210 	old_buf_len = p->buf_len;
211 	len = PAGE_ALIGN(len);
212 
213 	if (p->buf == p->inline_buf) {
214 		tmp_buf = kmalloc(len, GFP_NOFS);
215 		if (!tmp_buf) {
216 			tmp_buf = vmalloc(len);
217 			if (!tmp_buf)
218 				return -ENOMEM;
219 			p->virtual_mem = 1;
220 		}
221 		memcpy(tmp_buf, p->buf, p->buf_len);
222 		p->buf = tmp_buf;
223 		p->buf_len = len;
224 	} else {
225 		if (p->virtual_mem) {
226 			tmp_buf = vmalloc(len);
227 			if (!tmp_buf)
228 				return -ENOMEM;
229 			memcpy(tmp_buf, p->buf, p->buf_len);
230 			vfree(p->buf);
231 		} else {
232 			tmp_buf = krealloc(p->buf, len, GFP_NOFS);
233 			if (!tmp_buf) {
234 				tmp_buf = vmalloc(len);
235 				if (!tmp_buf)
236 					return -ENOMEM;
237 				memcpy(tmp_buf, p->buf, p->buf_len);
238 				kfree(p->buf);
239 				p->virtual_mem = 1;
240 			}
241 		}
242 		p->buf = tmp_buf;
243 		p->buf_len = len;
244 	}
245 	if (p->reversed) {
246 		tmp_buf = p->buf + old_buf_len - path_len - 1;
247 		p->end = p->buf + p->buf_len - 1;
248 		p->start = p->end - path_len;
249 		memmove(p->start, tmp_buf, path_len + 1);
250 	} else {
251 		p->start = p->buf;
252 		p->end = p->start + path_len;
253 	}
254 	return 0;
255 }
256 
257 static int fs_path_prepare_for_add(struct fs_path *p, int name_len)
258 {
259 	int ret;
260 	int new_len;
261 
262 	new_len = p->end - p->start + name_len;
263 	if (p->start != p->end)
264 		new_len++;
265 	ret = fs_path_ensure_buf(p, new_len);
266 	if (ret < 0)
267 		goto out;
268 
269 	if (p->reversed) {
270 		if (p->start != p->end)
271 			*--p->start = '/';
272 		p->start -= name_len;
273 		p->prepared = p->start;
274 	} else {
275 		if (p->start != p->end)
276 			*p->end++ = '/';
277 		p->prepared = p->end;
278 		p->end += name_len;
279 		*p->end = 0;
280 	}
281 
282 out:
283 	return ret;
284 }
285 
286 static int fs_path_add(struct fs_path *p, const char *name, int name_len)
287 {
288 	int ret;
289 
290 	ret = fs_path_prepare_for_add(p, name_len);
291 	if (ret < 0)
292 		goto out;
293 	memcpy(p->prepared, name, name_len);
294 	p->prepared = NULL;
295 
296 out:
297 	return ret;
298 }
299 
300 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2)
301 {
302 	int ret;
303 
304 	ret = fs_path_prepare_for_add(p, p2->end - p2->start);
305 	if (ret < 0)
306 		goto out;
307 	memcpy(p->prepared, p2->start, p2->end - p2->start);
308 	p->prepared = NULL;
309 
310 out:
311 	return ret;
312 }
313 
314 static int fs_path_add_from_extent_buffer(struct fs_path *p,
315 					  struct extent_buffer *eb,
316 					  unsigned long off, int len)
317 {
318 	int ret;
319 
320 	ret = fs_path_prepare_for_add(p, len);
321 	if (ret < 0)
322 		goto out;
323 
324 	read_extent_buffer(eb, p->prepared, off, len);
325 	p->prepared = NULL;
326 
327 out:
328 	return ret;
329 }
330 
331 static void fs_path_remove(struct fs_path *p)
332 {
333 	BUG_ON(p->reversed);
334 	while (p->start != p->end && *p->end != '/')
335 		p->end--;
336 	*p->end = 0;
337 }
338 
339 static int fs_path_copy(struct fs_path *p, struct fs_path *from)
340 {
341 	int ret;
342 
343 	p->reversed = from->reversed;
344 	fs_path_reset(p);
345 
346 	ret = fs_path_add_path(p, from);
347 
348 	return ret;
349 }
350 
351 
352 static void fs_path_unreverse(struct fs_path *p)
353 {
354 	char *tmp;
355 	int len;
356 
357 	if (!p->reversed)
358 		return;
359 
360 	tmp = p->start;
361 	len = p->end - p->start;
362 	p->start = p->buf;
363 	p->end = p->start + len;
364 	memmove(p->start, tmp, len + 1);
365 	p->reversed = 0;
366 }
367 
368 static struct btrfs_path *alloc_path_for_send(void)
369 {
370 	struct btrfs_path *path;
371 
372 	path = btrfs_alloc_path();
373 	if (!path)
374 		return NULL;
375 	path->search_commit_root = 1;
376 	path->skip_locking = 1;
377 	return path;
378 }
379 
380 static int write_buf(struct send_ctx *sctx, const void *buf, u32 len)
381 {
382 	int ret;
383 	mm_segment_t old_fs;
384 	u32 pos = 0;
385 
386 	old_fs = get_fs();
387 	set_fs(KERNEL_DS);
388 
389 	while (pos < len) {
390 		ret = vfs_write(sctx->send_filp, (char *)buf + pos, len - pos,
391 				&sctx->send_off);
392 		/* TODO handle that correctly */
393 		/*if (ret == -ERESTARTSYS) {
394 			continue;
395 		}*/
396 		if (ret < 0)
397 			goto out;
398 		if (ret == 0) {
399 			ret = -EIO;
400 			goto out;
401 		}
402 		pos += ret;
403 	}
404 
405 	ret = 0;
406 
407 out:
408 	set_fs(old_fs);
409 	return ret;
410 }
411 
412 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
413 {
414 	struct btrfs_tlv_header *hdr;
415 	int total_len = sizeof(*hdr) + len;
416 	int left = sctx->send_max_size - sctx->send_size;
417 
418 	if (unlikely(left < total_len))
419 		return -EOVERFLOW;
420 
421 	hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size);
422 	hdr->tlv_type = cpu_to_le16(attr);
423 	hdr->tlv_len = cpu_to_le16(len);
424 	memcpy(hdr + 1, data, len);
425 	sctx->send_size += total_len;
426 
427 	return 0;
428 }
429 
430 #if 0
431 static int tlv_put_u8(struct send_ctx *sctx, u16 attr, u8 value)
432 {
433 	return tlv_put(sctx, attr, &value, sizeof(value));
434 }
435 
436 static int tlv_put_u16(struct send_ctx *sctx, u16 attr, u16 value)
437 {
438 	__le16 tmp = cpu_to_le16(value);
439 	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
440 }
441 
442 static int tlv_put_u32(struct send_ctx *sctx, u16 attr, u32 value)
443 {
444 	__le32 tmp = cpu_to_le32(value);
445 	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
446 }
447 #endif
448 
449 static int tlv_put_u64(struct send_ctx *sctx, u16 attr, u64 value)
450 {
451 	__le64 tmp = cpu_to_le64(value);
452 	return tlv_put(sctx, attr, &tmp, sizeof(tmp));
453 }
454 
455 static int tlv_put_string(struct send_ctx *sctx, u16 attr,
456 			  const char *str, int len)
457 {
458 	if (len == -1)
459 		len = strlen(str);
460 	return tlv_put(sctx, attr, str, len);
461 }
462 
463 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr,
464 			const u8 *uuid)
465 {
466 	return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE);
467 }
468 
469 #if 0
470 static int tlv_put_timespec(struct send_ctx *sctx, u16 attr,
471 			    struct timespec *ts)
472 {
473 	struct btrfs_timespec bts;
474 	bts.sec = cpu_to_le64(ts->tv_sec);
475 	bts.nsec = cpu_to_le32(ts->tv_nsec);
476 	return tlv_put(sctx, attr, &bts, sizeof(bts));
477 }
478 #endif
479 
480 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr,
481 				  struct extent_buffer *eb,
482 				  struct btrfs_timespec *ts)
483 {
484 	struct btrfs_timespec bts;
485 	read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts));
486 	return tlv_put(sctx, attr, &bts, sizeof(bts));
487 }
488 
489 
490 #define TLV_PUT(sctx, attrtype, attrlen, data) \
491 	do { \
492 		ret = tlv_put(sctx, attrtype, attrlen, data); \
493 		if (ret < 0) \
494 			goto tlv_put_failure; \
495 	} while (0)
496 
497 #define TLV_PUT_INT(sctx, attrtype, bits, value) \
498 	do { \
499 		ret = tlv_put_u##bits(sctx, attrtype, value); \
500 		if (ret < 0) \
501 			goto tlv_put_failure; \
502 	} while (0)
503 
504 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data)
505 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data)
506 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data)
507 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data)
508 #define TLV_PUT_STRING(sctx, attrtype, str, len) \
509 	do { \
510 		ret = tlv_put_string(sctx, attrtype, str, len); \
511 		if (ret < 0) \
512 			goto tlv_put_failure; \
513 	} while (0)
514 #define TLV_PUT_PATH(sctx, attrtype, p) \
515 	do { \
516 		ret = tlv_put_string(sctx, attrtype, p->start, \
517 			p->end - p->start); \
518 		if (ret < 0) \
519 			goto tlv_put_failure; \
520 	} while(0)
521 #define TLV_PUT_UUID(sctx, attrtype, uuid) \
522 	do { \
523 		ret = tlv_put_uuid(sctx, attrtype, uuid); \
524 		if (ret < 0) \
525 			goto tlv_put_failure; \
526 	} while (0)
527 #define TLV_PUT_TIMESPEC(sctx, attrtype, ts) \
528 	do { \
529 		ret = tlv_put_timespec(sctx, attrtype, ts); \
530 		if (ret < 0) \
531 			goto tlv_put_failure; \
532 	} while (0)
533 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \
534 	do { \
535 		ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \
536 		if (ret < 0) \
537 			goto tlv_put_failure; \
538 	} while (0)
539 
540 static int send_header(struct send_ctx *sctx)
541 {
542 	struct btrfs_stream_header hdr;
543 
544 	strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC);
545 	hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION);
546 
547 	return write_buf(sctx, &hdr, sizeof(hdr));
548 }
549 
550 /*
551  * For each command/item we want to send to userspace, we call this function.
552  */
553 static int begin_cmd(struct send_ctx *sctx, int cmd)
554 {
555 	struct btrfs_cmd_header *hdr;
556 
557 	if (!sctx->send_buf) {
558 		WARN_ON(1);
559 		return -EINVAL;
560 	}
561 
562 	BUG_ON(sctx->send_size);
563 
564 	sctx->send_size += sizeof(*hdr);
565 	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
566 	hdr->cmd = cpu_to_le16(cmd);
567 
568 	return 0;
569 }
570 
571 static int send_cmd(struct send_ctx *sctx)
572 {
573 	int ret;
574 	struct btrfs_cmd_header *hdr;
575 	u32 crc;
576 
577 	hdr = (struct btrfs_cmd_header *)sctx->send_buf;
578 	hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
579 	hdr->crc = 0;
580 
581 	crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
582 	hdr->crc = cpu_to_le32(crc);
583 
584 	ret = write_buf(sctx, sctx->send_buf, sctx->send_size);
585 
586 	sctx->total_send_size += sctx->send_size;
587 	sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size;
588 	sctx->send_size = 0;
589 
590 	return ret;
591 }
592 
593 /*
594  * Sends a move instruction to user space
595  */
596 static int send_rename(struct send_ctx *sctx,
597 		     struct fs_path *from, struct fs_path *to)
598 {
599 	int ret;
600 
601 verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start);
602 
603 	ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME);
604 	if (ret < 0)
605 		goto out;
606 
607 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from);
608 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to);
609 
610 	ret = send_cmd(sctx);
611 
612 tlv_put_failure:
613 out:
614 	return ret;
615 }
616 
617 /*
618  * Sends a link instruction to user space
619  */
620 static int send_link(struct send_ctx *sctx,
621 		     struct fs_path *path, struct fs_path *lnk)
622 {
623 	int ret;
624 
625 verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start);
626 
627 	ret = begin_cmd(sctx, BTRFS_SEND_C_LINK);
628 	if (ret < 0)
629 		goto out;
630 
631 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
632 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk);
633 
634 	ret = send_cmd(sctx);
635 
636 tlv_put_failure:
637 out:
638 	return ret;
639 }
640 
641 /*
642  * Sends an unlink instruction to user space
643  */
644 static int send_unlink(struct send_ctx *sctx, struct fs_path *path)
645 {
646 	int ret;
647 
648 verbose_printk("btrfs: send_unlink %s\n", path->start);
649 
650 	ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK);
651 	if (ret < 0)
652 		goto out;
653 
654 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
655 
656 	ret = send_cmd(sctx);
657 
658 tlv_put_failure:
659 out:
660 	return ret;
661 }
662 
663 /*
664  * Sends a rmdir instruction to user space
665  */
666 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path)
667 {
668 	int ret;
669 
670 verbose_printk("btrfs: send_rmdir %s\n", path->start);
671 
672 	ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR);
673 	if (ret < 0)
674 		goto out;
675 
676 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
677 
678 	ret = send_cmd(sctx);
679 
680 tlv_put_failure:
681 out:
682 	return ret;
683 }
684 
685 /*
686  * Helper function to retrieve some fields from an inode item.
687  */
688 static int get_inode_info(struct btrfs_root *root,
689 			  u64 ino, u64 *size, u64 *gen,
690 			  u64 *mode, u64 *uid, u64 *gid)
691 {
692 	int ret;
693 	struct btrfs_inode_item *ii;
694 	struct btrfs_key key;
695 	struct btrfs_path *path;
696 
697 	path = alloc_path_for_send();
698 	if (!path)
699 		return -ENOMEM;
700 
701 	key.objectid = ino;
702 	key.type = BTRFS_INODE_ITEM_KEY;
703 	key.offset = 0;
704 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
705 	if (ret < 0)
706 		goto out;
707 	if (ret) {
708 		ret = -ENOENT;
709 		goto out;
710 	}
711 
712 	ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
713 			struct btrfs_inode_item);
714 	if (size)
715 		*size = btrfs_inode_size(path->nodes[0], ii);
716 	if (gen)
717 		*gen = btrfs_inode_generation(path->nodes[0], ii);
718 	if (mode)
719 		*mode = btrfs_inode_mode(path->nodes[0], ii);
720 	if (uid)
721 		*uid = btrfs_inode_uid(path->nodes[0], ii);
722 	if (gid)
723 		*gid = btrfs_inode_gid(path->nodes[0], ii);
724 
725 out:
726 	btrfs_free_path(path);
727 	return ret;
728 }
729 
730 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
731 				   struct fs_path *p,
732 				   void *ctx);
733 
734 /*
735  * Helper function to iterate the entries in ONE btrfs_inode_ref.
736  * The iterate callback may return a non zero value to stop iteration. This can
737  * be a negative value for error codes or 1 to simply stop it.
738  *
739  * path must point to the INODE_REF when called.
740  */
741 static int iterate_inode_ref(struct send_ctx *sctx,
742 			     struct btrfs_root *root, struct btrfs_path *path,
743 			     struct btrfs_key *found_key, int resolve,
744 			     iterate_inode_ref_t iterate, void *ctx)
745 {
746 	struct extent_buffer *eb;
747 	struct btrfs_item *item;
748 	struct btrfs_inode_ref *iref;
749 	struct btrfs_path *tmp_path;
750 	struct fs_path *p;
751 	u32 cur;
752 	u32 len;
753 	u32 total;
754 	int slot;
755 	u32 name_len;
756 	char *start;
757 	int ret = 0;
758 	int num;
759 	int index;
760 
761 	p = fs_path_alloc_reversed(sctx);
762 	if (!p)
763 		return -ENOMEM;
764 
765 	tmp_path = alloc_path_for_send();
766 	if (!tmp_path) {
767 		fs_path_free(sctx, p);
768 		return -ENOMEM;
769 	}
770 
771 	eb = path->nodes[0];
772 	slot = path->slots[0];
773 	item = btrfs_item_nr(eb, slot);
774 	iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
775 	cur = 0;
776 	len = 0;
777 	total = btrfs_item_size(eb, item);
778 
779 	num = 0;
780 	while (cur < total) {
781 		fs_path_reset(p);
782 
783 		name_len = btrfs_inode_ref_name_len(eb, iref);
784 		index = btrfs_inode_ref_index(eb, iref);
785 		if (resolve) {
786 			start = btrfs_iref_to_path(root, tmp_path, iref, eb,
787 						found_key->offset, p->buf,
788 						p->buf_len);
789 			if (IS_ERR(start)) {
790 				ret = PTR_ERR(start);
791 				goto out;
792 			}
793 			if (start < p->buf) {
794 				/* overflow , try again with larger buffer */
795 				ret = fs_path_ensure_buf(p,
796 						p->buf_len + p->buf - start);
797 				if (ret < 0)
798 					goto out;
799 				start = btrfs_iref_to_path(root, tmp_path, iref,
800 						eb, found_key->offset, p->buf,
801 						p->buf_len);
802 				if (IS_ERR(start)) {
803 					ret = PTR_ERR(start);
804 					goto out;
805 				}
806 				BUG_ON(start < p->buf);
807 			}
808 			p->start = start;
809 		} else {
810 			ret = fs_path_add_from_extent_buffer(p, eb,
811 					(unsigned long)(iref + 1), name_len);
812 			if (ret < 0)
813 				goto out;
814 		}
815 
816 
817 		len = sizeof(*iref) + name_len;
818 		iref = (struct btrfs_inode_ref *)((char *)iref + len);
819 		cur += len;
820 
821 		ret = iterate(num, found_key->offset, index, p, ctx);
822 		if (ret)
823 			goto out;
824 
825 		num++;
826 	}
827 
828 out:
829 	btrfs_free_path(tmp_path);
830 	fs_path_free(sctx, p);
831 	return ret;
832 }
833 
834 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key,
835 				  const char *name, int name_len,
836 				  const char *data, int data_len,
837 				  u8 type, void *ctx);
838 
839 /*
840  * Helper function to iterate the entries in ONE btrfs_dir_item.
841  * The iterate callback may return a non zero value to stop iteration. This can
842  * be a negative value for error codes or 1 to simply stop it.
843  *
844  * path must point to the dir item when called.
845  */
846 static int iterate_dir_item(struct send_ctx *sctx,
847 			    struct btrfs_root *root, struct btrfs_path *path,
848 			    struct btrfs_key *found_key,
849 			    iterate_dir_item_t iterate, void *ctx)
850 {
851 	int ret = 0;
852 	struct extent_buffer *eb;
853 	struct btrfs_item *item;
854 	struct btrfs_dir_item *di;
855 	struct btrfs_path *tmp_path = NULL;
856 	struct btrfs_key di_key;
857 	char *buf = NULL;
858 	char *buf2 = NULL;
859 	int buf_len;
860 	int buf_virtual = 0;
861 	u32 name_len;
862 	u32 data_len;
863 	u32 cur;
864 	u32 len;
865 	u32 total;
866 	int slot;
867 	int num;
868 	u8 type;
869 
870 	buf_len = PAGE_SIZE;
871 	buf = kmalloc(buf_len, GFP_NOFS);
872 	if (!buf) {
873 		ret = -ENOMEM;
874 		goto out;
875 	}
876 
877 	tmp_path = alloc_path_for_send();
878 	if (!tmp_path) {
879 		ret = -ENOMEM;
880 		goto out;
881 	}
882 
883 	eb = path->nodes[0];
884 	slot = path->slots[0];
885 	item = btrfs_item_nr(eb, slot);
886 	di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
887 	cur = 0;
888 	len = 0;
889 	total = btrfs_item_size(eb, item);
890 
891 	num = 0;
892 	while (cur < total) {
893 		name_len = btrfs_dir_name_len(eb, di);
894 		data_len = btrfs_dir_data_len(eb, di);
895 		type = btrfs_dir_type(eb, di);
896 		btrfs_dir_item_key_to_cpu(eb, di, &di_key);
897 
898 		if (name_len + data_len > buf_len) {
899 			buf_len = PAGE_ALIGN(name_len + data_len);
900 			if (buf_virtual) {
901 				buf2 = vmalloc(buf_len);
902 				if (!buf2) {
903 					ret = -ENOMEM;
904 					goto out;
905 				}
906 				vfree(buf);
907 			} else {
908 				buf2 = krealloc(buf, buf_len, GFP_NOFS);
909 				if (!buf2) {
910 					buf2 = vmalloc(buf_len);
911 					if (!buf2) {
912 						ret = -ENOMEM;
913 						goto out;
914 					}
915 					kfree(buf);
916 					buf_virtual = 1;
917 				}
918 			}
919 
920 			buf = buf2;
921 			buf2 = NULL;
922 		}
923 
924 		read_extent_buffer(eb, buf, (unsigned long)(di + 1),
925 				name_len + data_len);
926 
927 		len = sizeof(*di) + name_len + data_len;
928 		di = (struct btrfs_dir_item *)((char *)di + len);
929 		cur += len;
930 
931 		ret = iterate(num, &di_key, buf, name_len, buf + name_len,
932 				data_len, type, ctx);
933 		if (ret < 0)
934 			goto out;
935 		if (ret) {
936 			ret = 0;
937 			goto out;
938 		}
939 
940 		num++;
941 	}
942 
943 out:
944 	btrfs_free_path(tmp_path);
945 	if (buf_virtual)
946 		vfree(buf);
947 	else
948 		kfree(buf);
949 	return ret;
950 }
951 
952 static int __copy_first_ref(int num, u64 dir, int index,
953 			    struct fs_path *p, void *ctx)
954 {
955 	int ret;
956 	struct fs_path *pt = ctx;
957 
958 	ret = fs_path_copy(pt, p);
959 	if (ret < 0)
960 		return ret;
961 
962 	/* we want the first only */
963 	return 1;
964 }
965 
966 /*
967  * Retrieve the first path of an inode. If an inode has more then one
968  * ref/hardlink, this is ignored.
969  */
970 static int get_inode_path(struct send_ctx *sctx, struct btrfs_root *root,
971 			  u64 ino, struct fs_path *path)
972 {
973 	int ret;
974 	struct btrfs_key key, found_key;
975 	struct btrfs_path *p;
976 
977 	p = alloc_path_for_send();
978 	if (!p)
979 		return -ENOMEM;
980 
981 	fs_path_reset(path);
982 
983 	key.objectid = ino;
984 	key.type = BTRFS_INODE_REF_KEY;
985 	key.offset = 0;
986 
987 	ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
988 	if (ret < 0)
989 		goto out;
990 	if (ret) {
991 		ret = 1;
992 		goto out;
993 	}
994 	btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
995 	if (found_key.objectid != ino ||
996 		found_key.type != BTRFS_INODE_REF_KEY) {
997 		ret = -ENOENT;
998 		goto out;
999 	}
1000 
1001 	ret = iterate_inode_ref(sctx, root, p, &found_key, 1,
1002 			__copy_first_ref, path);
1003 	if (ret < 0)
1004 		goto out;
1005 	ret = 0;
1006 
1007 out:
1008 	btrfs_free_path(p);
1009 	return ret;
1010 }
1011 
1012 struct backref_ctx {
1013 	struct send_ctx *sctx;
1014 
1015 	/* number of total found references */
1016 	u64 found;
1017 
1018 	/*
1019 	 * used for clones found in send_root. clones found behind cur_objectid
1020 	 * and cur_offset are not considered as allowed clones.
1021 	 */
1022 	u64 cur_objectid;
1023 	u64 cur_offset;
1024 
1025 	/* may be truncated in case it's the last extent in a file */
1026 	u64 extent_len;
1027 
1028 	/* Just to check for bugs in backref resolving */
1029 	int found_in_send_root;
1030 };
1031 
1032 static int __clone_root_cmp_bsearch(const void *key, const void *elt)
1033 {
1034 	u64 root = (u64)key;
1035 	struct clone_root *cr = (struct clone_root *)elt;
1036 
1037 	if (root < cr->root->objectid)
1038 		return -1;
1039 	if (root > cr->root->objectid)
1040 		return 1;
1041 	return 0;
1042 }
1043 
1044 static int __clone_root_cmp_sort(const void *e1, const void *e2)
1045 {
1046 	struct clone_root *cr1 = (struct clone_root *)e1;
1047 	struct clone_root *cr2 = (struct clone_root *)e2;
1048 
1049 	if (cr1->root->objectid < cr2->root->objectid)
1050 		return -1;
1051 	if (cr1->root->objectid > cr2->root->objectid)
1052 		return 1;
1053 	return 0;
1054 }
1055 
1056 /*
1057  * Called for every backref that is found for the current extent.
1058  */
1059 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
1060 {
1061 	struct backref_ctx *bctx = ctx_;
1062 	struct clone_root *found;
1063 	int ret;
1064 	u64 i_size;
1065 
1066 	/* First check if the root is in the list of accepted clone sources */
1067 	found = bsearch((void *)root, bctx->sctx->clone_roots,
1068 			bctx->sctx->clone_roots_cnt,
1069 			sizeof(struct clone_root),
1070 			__clone_root_cmp_bsearch);
1071 	if (!found)
1072 		return 0;
1073 
1074 	if (found->root == bctx->sctx->send_root &&
1075 	    ino == bctx->cur_objectid &&
1076 	    offset == bctx->cur_offset) {
1077 		bctx->found_in_send_root = 1;
1078 	}
1079 
1080 	/*
1081 	 * There are inodes that have extents that lie behind it's i_size. Don't
1082 	 * accept clones from these extents.
1083 	 */
1084 	ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL);
1085 	if (ret < 0)
1086 		return ret;
1087 
1088 	if (offset + bctx->extent_len > i_size)
1089 		return 0;
1090 
1091 	/*
1092 	 * Make sure we don't consider clones from send_root that are
1093 	 * behind the current inode/offset.
1094 	 */
1095 	if (found->root == bctx->sctx->send_root) {
1096 		/*
1097 		 * TODO for the moment we don't accept clones from the inode
1098 		 * that is currently send. We may change this when
1099 		 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
1100 		 * file.
1101 		 */
1102 		if (ino >= bctx->cur_objectid)
1103 			return 0;
1104 		/*if (ino > ctx->cur_objectid)
1105 			return 0;
1106 		if (offset + ctx->extent_len > ctx->cur_offset)
1107 			return 0;*/
1108 
1109 		bctx->found++;
1110 		found->found_refs++;
1111 		found->ino = ino;
1112 		found->offset = offset;
1113 		return 0;
1114 	}
1115 
1116 	bctx->found++;
1117 	found->found_refs++;
1118 	if (ino < found->ino) {
1119 		found->ino = ino;
1120 		found->offset = offset;
1121 	} else if (found->ino == ino) {
1122 		/*
1123 		 * same extent found more then once in the same file.
1124 		 */
1125 		if (found->offset > offset + bctx->extent_len)
1126 			found->offset = offset;
1127 	}
1128 
1129 	return 0;
1130 }
1131 
1132 /*
1133  * path must point to the extent item when called.
1134  */
1135 static int find_extent_clone(struct send_ctx *sctx,
1136 			     struct btrfs_path *path,
1137 			     u64 ino, u64 data_offset,
1138 			     u64 ino_size,
1139 			     struct clone_root **found)
1140 {
1141 	int ret;
1142 	int extent_type;
1143 	u64 logical;
1144 	u64 num_bytes;
1145 	u64 extent_item_pos;
1146 	struct btrfs_file_extent_item *fi;
1147 	struct extent_buffer *eb = path->nodes[0];
1148 	struct backref_ctx backref_ctx;
1149 	struct clone_root *cur_clone_root;
1150 	struct btrfs_key found_key;
1151 	struct btrfs_path *tmp_path;
1152 	u32 i;
1153 
1154 	tmp_path = alloc_path_for_send();
1155 	if (!tmp_path)
1156 		return -ENOMEM;
1157 
1158 	if (data_offset >= ino_size) {
1159 		/*
1160 		 * There may be extents that lie behind the file's size.
1161 		 * I at least had this in combination with snapshotting while
1162 		 * writing large files.
1163 		 */
1164 		ret = 0;
1165 		goto out;
1166 	}
1167 
1168 	fi = btrfs_item_ptr(eb, path->slots[0],
1169 			struct btrfs_file_extent_item);
1170 	extent_type = btrfs_file_extent_type(eb, fi);
1171 	if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1172 		ret = -ENOENT;
1173 		goto out;
1174 	}
1175 
1176 	num_bytes = btrfs_file_extent_num_bytes(eb, fi);
1177 	logical = btrfs_file_extent_disk_bytenr(eb, fi);
1178 	if (logical == 0) {
1179 		ret = -ENOENT;
1180 		goto out;
1181 	}
1182 	logical += btrfs_file_extent_offset(eb, fi);
1183 
1184 	ret = extent_from_logical(sctx->send_root->fs_info,
1185 			logical, tmp_path, &found_key);
1186 	btrfs_release_path(tmp_path);
1187 
1188 	if (ret < 0)
1189 		goto out;
1190 	if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1191 		ret = -EIO;
1192 		goto out;
1193 	}
1194 
1195 	/*
1196 	 * Setup the clone roots.
1197 	 */
1198 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1199 		cur_clone_root = sctx->clone_roots + i;
1200 		cur_clone_root->ino = (u64)-1;
1201 		cur_clone_root->offset = 0;
1202 		cur_clone_root->found_refs = 0;
1203 	}
1204 
1205 	backref_ctx.sctx = sctx;
1206 	backref_ctx.found = 0;
1207 	backref_ctx.cur_objectid = ino;
1208 	backref_ctx.cur_offset = data_offset;
1209 	backref_ctx.found_in_send_root = 0;
1210 	backref_ctx.extent_len = num_bytes;
1211 
1212 	/*
1213 	 * The last extent of a file may be too large due to page alignment.
1214 	 * We need to adjust extent_len in this case so that the checks in
1215 	 * __iterate_backrefs work.
1216 	 */
1217 	if (data_offset + num_bytes >= ino_size)
1218 		backref_ctx.extent_len = ino_size - data_offset;
1219 
1220 	/*
1221 	 * Now collect all backrefs.
1222 	 */
1223 	extent_item_pos = logical - found_key.objectid;
1224 	ret = iterate_extent_inodes(sctx->send_root->fs_info,
1225 					found_key.objectid, extent_item_pos, 1,
1226 					__iterate_backrefs, &backref_ctx);
1227 	if (ret < 0)
1228 		goto out;
1229 
1230 	if (!backref_ctx.found_in_send_root) {
1231 		/* found a bug in backref code? */
1232 		ret = -EIO;
1233 		printk(KERN_ERR "btrfs: ERROR did not find backref in "
1234 				"send_root. inode=%llu, offset=%llu, "
1235 				"logical=%llu\n",
1236 				ino, data_offset, logical);
1237 		goto out;
1238 	}
1239 
1240 verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, "
1241 		"ino=%llu, "
1242 		"num_bytes=%llu, logical=%llu\n",
1243 		data_offset, ino, num_bytes, logical);
1244 
1245 	if (!backref_ctx.found)
1246 		verbose_printk("btrfs:    no clones found\n");
1247 
1248 	cur_clone_root = NULL;
1249 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
1250 		if (sctx->clone_roots[i].found_refs) {
1251 			if (!cur_clone_root)
1252 				cur_clone_root = sctx->clone_roots + i;
1253 			else if (sctx->clone_roots[i].root == sctx->send_root)
1254 				/* prefer clones from send_root over others */
1255 				cur_clone_root = sctx->clone_roots + i;
1256 			break;
1257 		}
1258 
1259 	}
1260 
1261 	if (cur_clone_root) {
1262 		*found = cur_clone_root;
1263 		ret = 0;
1264 	} else {
1265 		ret = -ENOENT;
1266 	}
1267 
1268 out:
1269 	btrfs_free_path(tmp_path);
1270 	return ret;
1271 }
1272 
1273 static int read_symlink(struct send_ctx *sctx,
1274 			struct btrfs_root *root,
1275 			u64 ino,
1276 			struct fs_path *dest)
1277 {
1278 	int ret;
1279 	struct btrfs_path *path;
1280 	struct btrfs_key key;
1281 	struct btrfs_file_extent_item *ei;
1282 	u8 type;
1283 	u8 compression;
1284 	unsigned long off;
1285 	int len;
1286 
1287 	path = alloc_path_for_send();
1288 	if (!path)
1289 		return -ENOMEM;
1290 
1291 	key.objectid = ino;
1292 	key.type = BTRFS_EXTENT_DATA_KEY;
1293 	key.offset = 0;
1294 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1295 	if (ret < 0)
1296 		goto out;
1297 	BUG_ON(ret);
1298 
1299 	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
1300 			struct btrfs_file_extent_item);
1301 	type = btrfs_file_extent_type(path->nodes[0], ei);
1302 	compression = btrfs_file_extent_compression(path->nodes[0], ei);
1303 	BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
1304 	BUG_ON(compression);
1305 
1306 	off = btrfs_file_extent_inline_start(ei);
1307 	len = btrfs_file_extent_inline_len(path->nodes[0], ei);
1308 
1309 	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
1310 	if (ret < 0)
1311 		goto out;
1312 
1313 out:
1314 	btrfs_free_path(path);
1315 	return ret;
1316 }
1317 
1318 /*
1319  * Helper function to generate a file name that is unique in the root of
1320  * send_root and parent_root. This is used to generate names for orphan inodes.
1321  */
1322 static int gen_unique_name(struct send_ctx *sctx,
1323 			   u64 ino, u64 gen,
1324 			   struct fs_path *dest)
1325 {
1326 	int ret = 0;
1327 	struct btrfs_path *path;
1328 	struct btrfs_dir_item *di;
1329 	char tmp[64];
1330 	int len;
1331 	u64 idx = 0;
1332 
1333 	path = alloc_path_for_send();
1334 	if (!path)
1335 		return -ENOMEM;
1336 
1337 	while (1) {
1338 		len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu",
1339 				ino, gen, idx);
1340 		if (len >= sizeof(tmp)) {
1341 			/* should really not happen */
1342 			ret = -EOVERFLOW;
1343 			goto out;
1344 		}
1345 
1346 		di = btrfs_lookup_dir_item(NULL, sctx->send_root,
1347 				path, BTRFS_FIRST_FREE_OBJECTID,
1348 				tmp, strlen(tmp), 0);
1349 		btrfs_release_path(path);
1350 		if (IS_ERR(di)) {
1351 			ret = PTR_ERR(di);
1352 			goto out;
1353 		}
1354 		if (di) {
1355 			/* not unique, try again */
1356 			idx++;
1357 			continue;
1358 		}
1359 
1360 		if (!sctx->parent_root) {
1361 			/* unique */
1362 			ret = 0;
1363 			break;
1364 		}
1365 
1366 		di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
1367 				path, BTRFS_FIRST_FREE_OBJECTID,
1368 				tmp, strlen(tmp), 0);
1369 		btrfs_release_path(path);
1370 		if (IS_ERR(di)) {
1371 			ret = PTR_ERR(di);
1372 			goto out;
1373 		}
1374 		if (di) {
1375 			/* not unique, try again */
1376 			idx++;
1377 			continue;
1378 		}
1379 		/* unique */
1380 		break;
1381 	}
1382 
1383 	ret = fs_path_add(dest, tmp, strlen(tmp));
1384 
1385 out:
1386 	btrfs_free_path(path);
1387 	return ret;
1388 }
1389 
1390 enum inode_state {
1391 	inode_state_no_change,
1392 	inode_state_will_create,
1393 	inode_state_did_create,
1394 	inode_state_will_delete,
1395 	inode_state_did_delete,
1396 };
1397 
1398 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
1399 {
1400 	int ret;
1401 	int left_ret;
1402 	int right_ret;
1403 	u64 left_gen;
1404 	u64 right_gen;
1405 
1406 	ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
1407 			NULL);
1408 	if (ret < 0 && ret != -ENOENT)
1409 		goto out;
1410 	left_ret = ret;
1411 
1412 	if (!sctx->parent_root) {
1413 		right_ret = -ENOENT;
1414 	} else {
1415 		ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
1416 				NULL, NULL, NULL);
1417 		if (ret < 0 && ret != -ENOENT)
1418 			goto out;
1419 		right_ret = ret;
1420 	}
1421 
1422 	if (!left_ret && !right_ret) {
1423 		if (left_gen == gen && right_gen == gen)
1424 			ret = inode_state_no_change;
1425 		else if (left_gen == gen) {
1426 			if (ino < sctx->send_progress)
1427 				ret = inode_state_did_create;
1428 			else
1429 				ret = inode_state_will_create;
1430 		} else if (right_gen == gen) {
1431 			if (ino < sctx->send_progress)
1432 				ret = inode_state_did_delete;
1433 			else
1434 				ret = inode_state_will_delete;
1435 		} else  {
1436 			ret = -ENOENT;
1437 		}
1438 	} else if (!left_ret) {
1439 		if (left_gen == gen) {
1440 			if (ino < sctx->send_progress)
1441 				ret = inode_state_did_create;
1442 			else
1443 				ret = inode_state_will_create;
1444 		} else {
1445 			ret = -ENOENT;
1446 		}
1447 	} else if (!right_ret) {
1448 		if (right_gen == gen) {
1449 			if (ino < sctx->send_progress)
1450 				ret = inode_state_did_delete;
1451 			else
1452 				ret = inode_state_will_delete;
1453 		} else {
1454 			ret = -ENOENT;
1455 		}
1456 	} else {
1457 		ret = -ENOENT;
1458 	}
1459 
1460 out:
1461 	return ret;
1462 }
1463 
1464 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen)
1465 {
1466 	int ret;
1467 
1468 	ret = get_cur_inode_state(sctx, ino, gen);
1469 	if (ret < 0)
1470 		goto out;
1471 
1472 	if (ret == inode_state_no_change ||
1473 	    ret == inode_state_did_create ||
1474 	    ret == inode_state_will_delete)
1475 		ret = 1;
1476 	else
1477 		ret = 0;
1478 
1479 out:
1480 	return ret;
1481 }
1482 
1483 /*
1484  * Helper function to lookup a dir item in a dir.
1485  */
1486 static int lookup_dir_item_inode(struct btrfs_root *root,
1487 				 u64 dir, const char *name, int name_len,
1488 				 u64 *found_inode,
1489 				 u8 *found_type)
1490 {
1491 	int ret = 0;
1492 	struct btrfs_dir_item *di;
1493 	struct btrfs_key key;
1494 	struct btrfs_path *path;
1495 
1496 	path = alloc_path_for_send();
1497 	if (!path)
1498 		return -ENOMEM;
1499 
1500 	di = btrfs_lookup_dir_item(NULL, root, path,
1501 			dir, name, name_len, 0);
1502 	if (!di) {
1503 		ret = -ENOENT;
1504 		goto out;
1505 	}
1506 	if (IS_ERR(di)) {
1507 		ret = PTR_ERR(di);
1508 		goto out;
1509 	}
1510 	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
1511 	*found_inode = key.objectid;
1512 	*found_type = btrfs_dir_type(path->nodes[0], di);
1513 
1514 out:
1515 	btrfs_free_path(path);
1516 	return ret;
1517 }
1518 
1519 static int get_first_ref(struct send_ctx *sctx,
1520 			 struct btrfs_root *root, u64 ino,
1521 			 u64 *dir, u64 *dir_gen, struct fs_path *name)
1522 {
1523 	int ret;
1524 	struct btrfs_key key;
1525 	struct btrfs_key found_key;
1526 	struct btrfs_path *path;
1527 	struct btrfs_inode_ref *iref;
1528 	int len;
1529 
1530 	path = alloc_path_for_send();
1531 	if (!path)
1532 		return -ENOMEM;
1533 
1534 	key.objectid = ino;
1535 	key.type = BTRFS_INODE_REF_KEY;
1536 	key.offset = 0;
1537 
1538 	ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
1539 	if (ret < 0)
1540 		goto out;
1541 	if (!ret)
1542 		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1543 				path->slots[0]);
1544 	if (ret || found_key.objectid != key.objectid ||
1545 	    found_key.type != key.type) {
1546 		ret = -ENOENT;
1547 		goto out;
1548 	}
1549 
1550 	iref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1551 			struct btrfs_inode_ref);
1552 	len = btrfs_inode_ref_name_len(path->nodes[0], iref);
1553 	ret = fs_path_add_from_extent_buffer(name, path->nodes[0],
1554 			(unsigned long)(iref + 1), len);
1555 	if (ret < 0)
1556 		goto out;
1557 	btrfs_release_path(path);
1558 
1559 	ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL,
1560 			NULL);
1561 	if (ret < 0)
1562 		goto out;
1563 
1564 	*dir = found_key.offset;
1565 
1566 out:
1567 	btrfs_free_path(path);
1568 	return ret;
1569 }
1570 
1571 static int is_first_ref(struct send_ctx *sctx,
1572 			struct btrfs_root *root,
1573 			u64 ino, u64 dir,
1574 			const char *name, int name_len)
1575 {
1576 	int ret;
1577 	struct fs_path *tmp_name;
1578 	u64 tmp_dir;
1579 	u64 tmp_dir_gen;
1580 
1581 	tmp_name = fs_path_alloc(sctx);
1582 	if (!tmp_name)
1583 		return -ENOMEM;
1584 
1585 	ret = get_first_ref(sctx, root, ino, &tmp_dir, &tmp_dir_gen, tmp_name);
1586 	if (ret < 0)
1587 		goto out;
1588 
1589 	if (name_len != fs_path_len(tmp_name)) {
1590 		ret = 0;
1591 		goto out;
1592 	}
1593 
1594 	ret = memcmp(tmp_name->start, name, name_len);
1595 	if (ret)
1596 		ret = 0;
1597 	else
1598 		ret = 1;
1599 
1600 out:
1601 	fs_path_free(sctx, tmp_name);
1602 	return ret;
1603 }
1604 
1605 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
1606 			      const char *name, int name_len,
1607 			      u64 *who_ino, u64 *who_gen)
1608 {
1609 	int ret = 0;
1610 	u64 other_inode = 0;
1611 	u8 other_type = 0;
1612 
1613 	if (!sctx->parent_root)
1614 		goto out;
1615 
1616 	ret = is_inode_existent(sctx, dir, dir_gen);
1617 	if (ret <= 0)
1618 		goto out;
1619 
1620 	ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
1621 			&other_inode, &other_type);
1622 	if (ret < 0 && ret != -ENOENT)
1623 		goto out;
1624 	if (ret) {
1625 		ret = 0;
1626 		goto out;
1627 	}
1628 
1629 	if (other_inode > sctx->send_progress) {
1630 		ret = get_inode_info(sctx->parent_root, other_inode, NULL,
1631 				who_gen, NULL, NULL, NULL);
1632 		if (ret < 0)
1633 			goto out;
1634 
1635 		ret = 1;
1636 		*who_ino = other_inode;
1637 	} else {
1638 		ret = 0;
1639 	}
1640 
1641 out:
1642 	return ret;
1643 }
1644 
1645 static int did_overwrite_ref(struct send_ctx *sctx,
1646 			    u64 dir, u64 dir_gen,
1647 			    u64 ino, u64 ino_gen,
1648 			    const char *name, int name_len)
1649 {
1650 	int ret = 0;
1651 	u64 gen;
1652 	u64 ow_inode;
1653 	u8 other_type;
1654 
1655 	if (!sctx->parent_root)
1656 		goto out;
1657 
1658 	ret = is_inode_existent(sctx, dir, dir_gen);
1659 	if (ret <= 0)
1660 		goto out;
1661 
1662 	/* check if the ref was overwritten by another ref */
1663 	ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len,
1664 			&ow_inode, &other_type);
1665 	if (ret < 0 && ret != -ENOENT)
1666 		goto out;
1667 	if (ret) {
1668 		/* was never and will never be overwritten */
1669 		ret = 0;
1670 		goto out;
1671 	}
1672 
1673 	ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
1674 			NULL);
1675 	if (ret < 0)
1676 		goto out;
1677 
1678 	if (ow_inode == ino && gen == ino_gen) {
1679 		ret = 0;
1680 		goto out;
1681 	}
1682 
1683 	/* we know that it is or will be overwritten. check this now */
1684 	if (ow_inode < sctx->send_progress)
1685 		ret = 1;
1686 	else
1687 		ret = 0;
1688 
1689 out:
1690 	return ret;
1691 }
1692 
1693 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen)
1694 {
1695 	int ret = 0;
1696 	struct fs_path *name = NULL;
1697 	u64 dir;
1698 	u64 dir_gen;
1699 
1700 	if (!sctx->parent_root)
1701 		goto out;
1702 
1703 	name = fs_path_alloc(sctx);
1704 	if (!name)
1705 		return -ENOMEM;
1706 
1707 	ret = get_first_ref(sctx, sctx->parent_root, ino, &dir, &dir_gen, name);
1708 	if (ret < 0)
1709 		goto out;
1710 
1711 	ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen,
1712 			name->start, fs_path_len(name));
1713 	if (ret < 0)
1714 		goto out;
1715 
1716 out:
1717 	fs_path_free(sctx, name);
1718 	return ret;
1719 }
1720 
1721 static int name_cache_insert(struct send_ctx *sctx,
1722 			     struct name_cache_entry *nce)
1723 {
1724 	int ret = 0;
1725 	struct name_cache_entry **ncea;
1726 
1727 	ncea = radix_tree_lookup(&sctx->name_cache, nce->ino);
1728 	if (ncea) {
1729 		if (!ncea[0])
1730 			ncea[0] = nce;
1731 		else if (!ncea[1])
1732 			ncea[1] = nce;
1733 		else
1734 			BUG();
1735 	} else {
1736 		ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS);
1737 		if (!ncea)
1738 			return -ENOMEM;
1739 
1740 		ncea[0] = nce;
1741 		ncea[1] = NULL;
1742 		ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea);
1743 		if (ret < 0)
1744 			return ret;
1745 	}
1746 	list_add_tail(&nce->list, &sctx->name_cache_list);
1747 	sctx->name_cache_size++;
1748 
1749 	return ret;
1750 }
1751 
1752 static void name_cache_delete(struct send_ctx *sctx,
1753 			      struct name_cache_entry *nce)
1754 {
1755 	struct name_cache_entry **ncea;
1756 
1757 	ncea = radix_tree_lookup(&sctx->name_cache, nce->ino);
1758 	BUG_ON(!ncea);
1759 
1760 	if (ncea[0] == nce)
1761 		ncea[0] = NULL;
1762 	else if (ncea[1] == nce)
1763 		ncea[1] = NULL;
1764 	else
1765 		BUG();
1766 
1767 	if (!ncea[0] && !ncea[1]) {
1768 		radix_tree_delete(&sctx->name_cache, nce->ino);
1769 		kfree(ncea);
1770 	}
1771 
1772 	list_del(&nce->list);
1773 
1774 	sctx->name_cache_size--;
1775 }
1776 
1777 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
1778 						    u64 ino, u64 gen)
1779 {
1780 	struct name_cache_entry **ncea;
1781 
1782 	ncea = radix_tree_lookup(&sctx->name_cache, ino);
1783 	if (!ncea)
1784 		return NULL;
1785 
1786 	if (ncea[0] && ncea[0]->gen == gen)
1787 		return ncea[0];
1788 	else if (ncea[1] && ncea[1]->gen == gen)
1789 		return ncea[1];
1790 	return NULL;
1791 }
1792 
1793 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
1794 {
1795 	list_del(&nce->list);
1796 	list_add_tail(&nce->list, &sctx->name_cache_list);
1797 }
1798 
1799 static void name_cache_clean_unused(struct send_ctx *sctx)
1800 {
1801 	struct name_cache_entry *nce;
1802 
1803 	if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE)
1804 		return;
1805 
1806 	while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) {
1807 		nce = list_entry(sctx->name_cache_list.next,
1808 				struct name_cache_entry, list);
1809 		name_cache_delete(sctx, nce);
1810 		kfree(nce);
1811 	}
1812 }
1813 
1814 static void name_cache_free(struct send_ctx *sctx)
1815 {
1816 	struct name_cache_entry *nce;
1817 	struct name_cache_entry *tmp;
1818 
1819 	list_for_each_entry_safe(nce, tmp, &sctx->name_cache_list, list) {
1820 		name_cache_delete(sctx, nce);
1821 	}
1822 }
1823 
1824 static int __get_cur_name_and_parent(struct send_ctx *sctx,
1825 				     u64 ino, u64 gen,
1826 				     u64 *parent_ino,
1827 				     u64 *parent_gen,
1828 				     struct fs_path *dest)
1829 {
1830 	int ret;
1831 	int nce_ret;
1832 	struct btrfs_path *path = NULL;
1833 	struct name_cache_entry *nce = NULL;
1834 
1835 	nce = name_cache_search(sctx, ino, gen);
1836 	if (nce) {
1837 		if (ino < sctx->send_progress && nce->need_later_update) {
1838 			name_cache_delete(sctx, nce);
1839 			kfree(nce);
1840 			nce = NULL;
1841 		} else {
1842 			name_cache_used(sctx, nce);
1843 			*parent_ino = nce->parent_ino;
1844 			*parent_gen = nce->parent_gen;
1845 			ret = fs_path_add(dest, nce->name, nce->name_len);
1846 			if (ret < 0)
1847 				goto out;
1848 			ret = nce->ret;
1849 			goto out;
1850 		}
1851 	}
1852 
1853 	path = alloc_path_for_send();
1854 	if (!path)
1855 		return -ENOMEM;
1856 
1857 	ret = is_inode_existent(sctx, ino, gen);
1858 	if (ret < 0)
1859 		goto out;
1860 
1861 	if (!ret) {
1862 		ret = gen_unique_name(sctx, ino, gen, dest);
1863 		if (ret < 0)
1864 			goto out;
1865 		ret = 1;
1866 		goto out_cache;
1867 	}
1868 
1869 	if (ino < sctx->send_progress)
1870 		ret = get_first_ref(sctx, sctx->send_root, ino,
1871 				parent_ino, parent_gen, dest);
1872 	else
1873 		ret = get_first_ref(sctx, sctx->parent_root, ino,
1874 				parent_ino, parent_gen, dest);
1875 	if (ret < 0)
1876 		goto out;
1877 
1878 	ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen,
1879 			dest->start, dest->end - dest->start);
1880 	if (ret < 0)
1881 		goto out;
1882 	if (ret) {
1883 		fs_path_reset(dest);
1884 		ret = gen_unique_name(sctx, ino, gen, dest);
1885 		if (ret < 0)
1886 			goto out;
1887 		ret = 1;
1888 	}
1889 
1890 out_cache:
1891 	nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
1892 	if (!nce) {
1893 		ret = -ENOMEM;
1894 		goto out;
1895 	}
1896 
1897 	nce->ino = ino;
1898 	nce->gen = gen;
1899 	nce->parent_ino = *parent_ino;
1900 	nce->parent_gen = *parent_gen;
1901 	nce->name_len = fs_path_len(dest);
1902 	nce->ret = ret;
1903 	strcpy(nce->name, dest->start);
1904 	memset(&nce->use_list, 0, sizeof(nce->use_list));
1905 
1906 	if (ino < sctx->send_progress)
1907 		nce->need_later_update = 0;
1908 	else
1909 		nce->need_later_update = 1;
1910 
1911 	nce_ret = name_cache_insert(sctx, nce);
1912 	if (nce_ret < 0)
1913 		ret = nce_ret;
1914 	name_cache_clean_unused(sctx);
1915 
1916 out:
1917 	btrfs_free_path(path);
1918 	return ret;
1919 }
1920 
1921 /*
1922  * Magic happens here. This function returns the first ref to an inode as it
1923  * would look like while receiving the stream at this point in time.
1924  * We walk the path up to the root. For every inode in between, we check if it
1925  * was already processed/sent. If yes, we continue with the parent as found
1926  * in send_root. If not, we continue with the parent as found in parent_root.
1927  * If we encounter an inode that was deleted at this point in time, we use the
1928  * inodes "orphan" name instead of the real name and stop. Same with new inodes
1929  * that were not created yet and overwritten inodes/refs.
1930  *
1931  * When do we have have orphan inodes:
1932  * 1. When an inode is freshly created and thus no valid refs are available yet
1933  * 2. When a directory lost all it's refs (deleted) but still has dir items
1934  *    inside which were not processed yet (pending for move/delete). If anyone
1935  *    tried to get the path to the dir items, it would get a path inside that
1936  *    orphan directory.
1937  * 3. When an inode is moved around or gets new links, it may overwrite the ref
1938  *    of an unprocessed inode. If in that case the first ref would be
1939  *    overwritten, the overwritten inode gets "orphanized". Later when we
1940  *    process this overwritten inode, it is restored at a new place by moving
1941  *    the orphan inode.
1942  *
1943  * sctx->send_progress tells this function at which point in time receiving
1944  * would be.
1945  */
1946 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
1947 			struct fs_path *dest)
1948 {
1949 	int ret = 0;
1950 	struct fs_path *name = NULL;
1951 	u64 parent_inode = 0;
1952 	u64 parent_gen = 0;
1953 	int stop = 0;
1954 
1955 	name = fs_path_alloc(sctx);
1956 	if (!name) {
1957 		ret = -ENOMEM;
1958 		goto out;
1959 	}
1960 
1961 	dest->reversed = 1;
1962 	fs_path_reset(dest);
1963 
1964 	while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
1965 		fs_path_reset(name);
1966 
1967 		ret = __get_cur_name_and_parent(sctx, ino, gen,
1968 				&parent_inode, &parent_gen, name);
1969 		if (ret < 0)
1970 			goto out;
1971 		if (ret)
1972 			stop = 1;
1973 
1974 		ret = fs_path_add_path(dest, name);
1975 		if (ret < 0)
1976 			goto out;
1977 
1978 		ino = parent_inode;
1979 		gen = parent_gen;
1980 	}
1981 
1982 out:
1983 	fs_path_free(sctx, name);
1984 	if (!ret)
1985 		fs_path_unreverse(dest);
1986 	return ret;
1987 }
1988 
1989 /*
1990  * Called for regular files when sending extents data. Opens a struct file
1991  * to read from the file.
1992  */
1993 static int open_cur_inode_file(struct send_ctx *sctx)
1994 {
1995 	int ret = 0;
1996 	struct btrfs_key key;
1997 	struct path path;
1998 	struct inode *inode;
1999 	struct dentry *dentry;
2000 	struct file *filp;
2001 	int new = 0;
2002 
2003 	if (sctx->cur_inode_filp)
2004 		goto out;
2005 
2006 	key.objectid = sctx->cur_ino;
2007 	key.type = BTRFS_INODE_ITEM_KEY;
2008 	key.offset = 0;
2009 
2010 	inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
2011 			&new);
2012 	if (IS_ERR(inode)) {
2013 		ret = PTR_ERR(inode);
2014 		goto out;
2015 	}
2016 
2017 	dentry = d_obtain_alias(inode);
2018 	inode = NULL;
2019 	if (IS_ERR(dentry)) {
2020 		ret = PTR_ERR(dentry);
2021 		goto out;
2022 	}
2023 
2024 	path.mnt = sctx->mnt;
2025 	path.dentry = dentry;
2026 	filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
2027 	dput(dentry);
2028 	dentry = NULL;
2029 	if (IS_ERR(filp)) {
2030 		ret = PTR_ERR(filp);
2031 		goto out;
2032 	}
2033 	sctx->cur_inode_filp = filp;
2034 
2035 out:
2036 	/*
2037 	 * no xxxput required here as every vfs op
2038 	 * does it by itself on failure
2039 	 */
2040 	return ret;
2041 }
2042 
2043 /*
2044  * Closes the struct file that was created in open_cur_inode_file
2045  */
2046 static int close_cur_inode_file(struct send_ctx *sctx)
2047 {
2048 	int ret = 0;
2049 
2050 	if (!sctx->cur_inode_filp)
2051 		goto out;
2052 
2053 	ret = filp_close(sctx->cur_inode_filp, NULL);
2054 	sctx->cur_inode_filp = NULL;
2055 
2056 out:
2057 	return ret;
2058 }
2059 
2060 /*
2061  * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
2062  */
2063 static int send_subvol_begin(struct send_ctx *sctx)
2064 {
2065 	int ret;
2066 	struct btrfs_root *send_root = sctx->send_root;
2067 	struct btrfs_root *parent_root = sctx->parent_root;
2068 	struct btrfs_path *path;
2069 	struct btrfs_key key;
2070 	struct btrfs_root_ref *ref;
2071 	struct extent_buffer *leaf;
2072 	char *name = NULL;
2073 	int namelen;
2074 
2075 	path = alloc_path_for_send();
2076 	if (!path)
2077 		return -ENOMEM;
2078 
2079 	name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
2080 	if (!name) {
2081 		btrfs_free_path(path);
2082 		return -ENOMEM;
2083 	}
2084 
2085 	key.objectid = send_root->objectid;
2086 	key.type = BTRFS_ROOT_BACKREF_KEY;
2087 	key.offset = 0;
2088 
2089 	ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root,
2090 				&key, path, 1, 0);
2091 	if (ret < 0)
2092 		goto out;
2093 	if (ret) {
2094 		ret = -ENOENT;
2095 		goto out;
2096 	}
2097 
2098 	leaf = path->nodes[0];
2099 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2100 	if (key.type != BTRFS_ROOT_BACKREF_KEY ||
2101 	    key.objectid != send_root->objectid) {
2102 		ret = -ENOENT;
2103 		goto out;
2104 	}
2105 	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
2106 	namelen = btrfs_root_ref_name_len(leaf, ref);
2107 	read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen);
2108 	btrfs_release_path(path);
2109 
2110 	if (ret < 0)
2111 		goto out;
2112 
2113 	if (parent_root) {
2114 		ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT);
2115 		if (ret < 0)
2116 			goto out;
2117 	} else {
2118 		ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL);
2119 		if (ret < 0)
2120 			goto out;
2121 	}
2122 
2123 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
2124 	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
2125 			sctx->send_root->root_item.uuid);
2126 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
2127 			sctx->send_root->root_item.ctransid);
2128 	if (parent_root) {
2129 		TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
2130 				sctx->parent_root->root_item.uuid);
2131 		TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
2132 				sctx->parent_root->root_item.ctransid);
2133 	}
2134 
2135 	ret = send_cmd(sctx);
2136 
2137 tlv_put_failure:
2138 out:
2139 	btrfs_free_path(path);
2140 	kfree(name);
2141 	return ret;
2142 }
2143 
2144 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size)
2145 {
2146 	int ret = 0;
2147 	struct fs_path *p;
2148 
2149 verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size);
2150 
2151 	p = fs_path_alloc(sctx);
2152 	if (!p)
2153 		return -ENOMEM;
2154 
2155 	ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE);
2156 	if (ret < 0)
2157 		goto out;
2158 
2159 	ret = get_cur_path(sctx, ino, gen, p);
2160 	if (ret < 0)
2161 		goto out;
2162 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2163 	TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size);
2164 
2165 	ret = send_cmd(sctx);
2166 
2167 tlv_put_failure:
2168 out:
2169 	fs_path_free(sctx, p);
2170 	return ret;
2171 }
2172 
2173 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode)
2174 {
2175 	int ret = 0;
2176 	struct fs_path *p;
2177 
2178 verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode);
2179 
2180 	p = fs_path_alloc(sctx);
2181 	if (!p)
2182 		return -ENOMEM;
2183 
2184 	ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD);
2185 	if (ret < 0)
2186 		goto out;
2187 
2188 	ret = get_cur_path(sctx, ino, gen, p);
2189 	if (ret < 0)
2190 		goto out;
2191 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2192 	TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777);
2193 
2194 	ret = send_cmd(sctx);
2195 
2196 tlv_put_failure:
2197 out:
2198 	fs_path_free(sctx, p);
2199 	return ret;
2200 }
2201 
2202 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid)
2203 {
2204 	int ret = 0;
2205 	struct fs_path *p;
2206 
2207 verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid);
2208 
2209 	p = fs_path_alloc(sctx);
2210 	if (!p)
2211 		return -ENOMEM;
2212 
2213 	ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN);
2214 	if (ret < 0)
2215 		goto out;
2216 
2217 	ret = get_cur_path(sctx, ino, gen, p);
2218 	if (ret < 0)
2219 		goto out;
2220 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2221 	TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid);
2222 	TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid);
2223 
2224 	ret = send_cmd(sctx);
2225 
2226 tlv_put_failure:
2227 out:
2228 	fs_path_free(sctx, p);
2229 	return ret;
2230 }
2231 
2232 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
2233 {
2234 	int ret = 0;
2235 	struct fs_path *p = NULL;
2236 	struct btrfs_inode_item *ii;
2237 	struct btrfs_path *path = NULL;
2238 	struct extent_buffer *eb;
2239 	struct btrfs_key key;
2240 	int slot;
2241 
2242 verbose_printk("btrfs: send_utimes %llu\n", ino);
2243 
2244 	p = fs_path_alloc(sctx);
2245 	if (!p)
2246 		return -ENOMEM;
2247 
2248 	path = alloc_path_for_send();
2249 	if (!path) {
2250 		ret = -ENOMEM;
2251 		goto out;
2252 	}
2253 
2254 	key.objectid = ino;
2255 	key.type = BTRFS_INODE_ITEM_KEY;
2256 	key.offset = 0;
2257 	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
2258 	if (ret < 0)
2259 		goto out;
2260 
2261 	eb = path->nodes[0];
2262 	slot = path->slots[0];
2263 	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2264 
2265 	ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES);
2266 	if (ret < 0)
2267 		goto out;
2268 
2269 	ret = get_cur_path(sctx, ino, gen, p);
2270 	if (ret < 0)
2271 		goto out;
2272 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2273 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb,
2274 			btrfs_inode_atime(ii));
2275 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb,
2276 			btrfs_inode_mtime(ii));
2277 	TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb,
2278 			btrfs_inode_ctime(ii));
2279 	/* TODO otime? */
2280 
2281 	ret = send_cmd(sctx);
2282 
2283 tlv_put_failure:
2284 out:
2285 	fs_path_free(sctx, p);
2286 	btrfs_free_path(path);
2287 	return ret;
2288 }
2289 
2290 /*
2291  * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have
2292  * a valid path yet because we did not process the refs yet. So, the inode
2293  * is created as orphan.
2294  */
2295 static int send_create_inode(struct send_ctx *sctx, struct btrfs_path *path,
2296 			     struct btrfs_key *key)
2297 {
2298 	int ret = 0;
2299 	struct extent_buffer *eb = path->nodes[0];
2300 	struct btrfs_inode_item *ii;
2301 	struct fs_path *p;
2302 	int slot = path->slots[0];
2303 	int cmd;
2304 	u64 mode;
2305 
2306 verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino);
2307 
2308 	p = fs_path_alloc(sctx);
2309 	if (!p)
2310 		return -ENOMEM;
2311 
2312 	ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
2313 	mode = btrfs_inode_mode(eb, ii);
2314 
2315 	if (S_ISREG(mode))
2316 		cmd = BTRFS_SEND_C_MKFILE;
2317 	else if (S_ISDIR(mode))
2318 		cmd = BTRFS_SEND_C_MKDIR;
2319 	else if (S_ISLNK(mode))
2320 		cmd = BTRFS_SEND_C_SYMLINK;
2321 	else if (S_ISCHR(mode) || S_ISBLK(mode))
2322 		cmd = BTRFS_SEND_C_MKNOD;
2323 	else if (S_ISFIFO(mode))
2324 		cmd = BTRFS_SEND_C_MKFIFO;
2325 	else if (S_ISSOCK(mode))
2326 		cmd = BTRFS_SEND_C_MKSOCK;
2327 	else {
2328 		printk(KERN_WARNING "btrfs: unexpected inode type %o",
2329 				(int)(mode & S_IFMT));
2330 		ret = -ENOTSUPP;
2331 		goto out;
2332 	}
2333 
2334 	ret = begin_cmd(sctx, cmd);
2335 	if (ret < 0)
2336 		goto out;
2337 
2338 	ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
2339 	if (ret < 0)
2340 		goto out;
2341 
2342 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
2343 	TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, sctx->cur_ino);
2344 
2345 	if (S_ISLNK(mode)) {
2346 		fs_path_reset(p);
2347 		ret = read_symlink(sctx, sctx->send_root, sctx->cur_ino, p);
2348 		if (ret < 0)
2349 			goto out;
2350 		TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
2351 	} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
2352 		   S_ISFIFO(mode) || S_ISSOCK(mode)) {
2353 		TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, btrfs_inode_rdev(eb, ii));
2354 	}
2355 
2356 	ret = send_cmd(sctx);
2357 	if (ret < 0)
2358 		goto out;
2359 
2360 
2361 tlv_put_failure:
2362 out:
2363 	fs_path_free(sctx, p);
2364 	return ret;
2365 }
2366 
2367 struct recorded_ref {
2368 	struct list_head list;
2369 	char *dir_path;
2370 	char *name;
2371 	struct fs_path *full_path;
2372 	u64 dir;
2373 	u64 dir_gen;
2374 	int dir_path_len;
2375 	int name_len;
2376 };
2377 
2378 /*
2379  * We need to process new refs before deleted refs, but compare_tree gives us
2380  * everything mixed. So we first record all refs and later process them.
2381  * This function is a helper to record one ref.
2382  */
2383 static int record_ref(struct list_head *head, u64 dir,
2384 		      u64 dir_gen, struct fs_path *path)
2385 {
2386 	struct recorded_ref *ref;
2387 	char *tmp;
2388 
2389 	ref = kmalloc(sizeof(*ref), GFP_NOFS);
2390 	if (!ref)
2391 		return -ENOMEM;
2392 
2393 	ref->dir = dir;
2394 	ref->dir_gen = dir_gen;
2395 	ref->full_path = path;
2396 
2397 	tmp = strrchr(ref->full_path->start, '/');
2398 	if (!tmp) {
2399 		ref->name_len = ref->full_path->end - ref->full_path->start;
2400 		ref->name = ref->full_path->start;
2401 		ref->dir_path_len = 0;
2402 		ref->dir_path = ref->full_path->start;
2403 	} else {
2404 		tmp++;
2405 		ref->name_len = ref->full_path->end - tmp;
2406 		ref->name = tmp;
2407 		ref->dir_path = ref->full_path->start;
2408 		ref->dir_path_len = ref->full_path->end -
2409 				ref->full_path->start - 1 - ref->name_len;
2410 	}
2411 
2412 	list_add_tail(&ref->list, head);
2413 	return 0;
2414 }
2415 
2416 static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head)
2417 {
2418 	struct recorded_ref *cur;
2419 	struct recorded_ref *tmp;
2420 
2421 	list_for_each_entry_safe(cur, tmp, head, list) {
2422 		fs_path_free(sctx, cur->full_path);
2423 		kfree(cur);
2424 	}
2425 	INIT_LIST_HEAD(head);
2426 }
2427 
2428 static void free_recorded_refs(struct send_ctx *sctx)
2429 {
2430 	__free_recorded_refs(sctx, &sctx->new_refs);
2431 	__free_recorded_refs(sctx, &sctx->deleted_refs);
2432 }
2433 
2434 /*
2435  * Renames/moves a file/dir to it's orphan name. Used when the first
2436  * ref of an unprocessed inode gets overwritten and for all non empty
2437  * directories.
2438  */
2439 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen,
2440 			  struct fs_path *path)
2441 {
2442 	int ret;
2443 	struct fs_path *orphan;
2444 
2445 	orphan = fs_path_alloc(sctx);
2446 	if (!orphan)
2447 		return -ENOMEM;
2448 
2449 	ret = gen_unique_name(sctx, ino, gen, orphan);
2450 	if (ret < 0)
2451 		goto out;
2452 
2453 	ret = send_rename(sctx, path, orphan);
2454 
2455 out:
2456 	fs_path_free(sctx, orphan);
2457 	return ret;
2458 }
2459 
2460 /*
2461  * Returns 1 if a directory can be removed at this point in time.
2462  * We check this by iterating all dir items and checking if the inode behind
2463  * the dir item was already processed.
2464  */
2465 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress)
2466 {
2467 	int ret = 0;
2468 	struct btrfs_root *root = sctx->parent_root;
2469 	struct btrfs_path *path;
2470 	struct btrfs_key key;
2471 	struct btrfs_key found_key;
2472 	struct btrfs_key loc;
2473 	struct btrfs_dir_item *di;
2474 
2475 	path = alloc_path_for_send();
2476 	if (!path)
2477 		return -ENOMEM;
2478 
2479 	key.objectid = dir;
2480 	key.type = BTRFS_DIR_INDEX_KEY;
2481 	key.offset = 0;
2482 
2483 	while (1) {
2484 		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
2485 		if (ret < 0)
2486 			goto out;
2487 		if (!ret) {
2488 			btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2489 					path->slots[0]);
2490 		}
2491 		if (ret || found_key.objectid != key.objectid ||
2492 		    found_key.type != key.type) {
2493 			break;
2494 		}
2495 
2496 		di = btrfs_item_ptr(path->nodes[0], path->slots[0],
2497 				struct btrfs_dir_item);
2498 		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc);
2499 
2500 		if (loc.objectid > send_progress) {
2501 			ret = 0;
2502 			goto out;
2503 		}
2504 
2505 		btrfs_release_path(path);
2506 		key.offset = found_key.offset + 1;
2507 	}
2508 
2509 	ret = 1;
2510 
2511 out:
2512 	btrfs_free_path(path);
2513 	return ret;
2514 }
2515 
2516 struct finish_unordered_dir_ctx {
2517 	struct send_ctx *sctx;
2518 	struct fs_path *cur_path;
2519 	struct fs_path *dir_path;
2520 	u64 dir_ino;
2521 	int need_delete;
2522 	int delete_pass;
2523 };
2524 
2525 int __finish_unordered_dir(int num, struct btrfs_key *di_key,
2526 			   const char *name, int name_len,
2527 			   const char *data, int data_len,
2528 			   u8 type, void *ctx)
2529 {
2530 	int ret = 0;
2531 	struct finish_unordered_dir_ctx *fctx = ctx;
2532 	struct send_ctx *sctx = fctx->sctx;
2533 	u64 di_gen;
2534 	u64 di_mode;
2535 	int is_orphan = 0;
2536 
2537 	if (di_key->objectid >= fctx->dir_ino)
2538 		goto out;
2539 
2540 	fs_path_reset(fctx->cur_path);
2541 
2542 	ret = get_inode_info(sctx->send_root, di_key->objectid,
2543 			NULL, &di_gen, &di_mode, NULL, NULL);
2544 	if (ret < 0)
2545 		goto out;
2546 
2547 	ret = is_first_ref(sctx, sctx->send_root, di_key->objectid,
2548 			fctx->dir_ino, name, name_len);
2549 	if (ret < 0)
2550 		goto out;
2551 	if (ret) {
2552 		is_orphan = 1;
2553 		ret = gen_unique_name(sctx, di_key->objectid, di_gen,
2554 				fctx->cur_path);
2555 	} else {
2556 		ret = get_cur_path(sctx, di_key->objectid, di_gen,
2557 				fctx->cur_path);
2558 	}
2559 	if (ret < 0)
2560 		goto out;
2561 
2562 	ret = fs_path_add(fctx->dir_path, name, name_len);
2563 	if (ret < 0)
2564 		goto out;
2565 
2566 	if (!fctx->delete_pass) {
2567 		if (S_ISDIR(di_mode)) {
2568 			ret = send_rename(sctx, fctx->cur_path,
2569 					fctx->dir_path);
2570 		} else {
2571 			ret = send_link(sctx, fctx->dir_path,
2572 					fctx->cur_path);
2573 			if (is_orphan)
2574 				fctx->need_delete = 1;
2575 		}
2576 	} else if (!S_ISDIR(di_mode)) {
2577 		ret = send_unlink(sctx, fctx->cur_path);
2578 	} else {
2579 		ret = 0;
2580 	}
2581 
2582 	fs_path_remove(fctx->dir_path);
2583 
2584 out:
2585 	return ret;
2586 }
2587 
2588 /*
2589  * Go through all dir items and see if we find refs which could not be created
2590  * in the past because the dir did not exist at that time.
2591  */
2592 static int finish_outoforder_dir(struct send_ctx *sctx, u64 dir, u64 dir_gen)
2593 {
2594 	int ret = 0;
2595 	struct btrfs_path *path = NULL;
2596 	struct btrfs_key key;
2597 	struct btrfs_key found_key;
2598 	struct extent_buffer *eb;
2599 	struct finish_unordered_dir_ctx fctx;
2600 	int slot;
2601 
2602 	path = alloc_path_for_send();
2603 	if (!path) {
2604 		ret = -ENOMEM;
2605 		goto out;
2606 	}
2607 
2608 	memset(&fctx, 0, sizeof(fctx));
2609 	fctx.sctx = sctx;
2610 	fctx.cur_path = fs_path_alloc(sctx);
2611 	fctx.dir_path = fs_path_alloc(sctx);
2612 	if (!fctx.cur_path || !fctx.dir_path) {
2613 		ret = -ENOMEM;
2614 		goto out;
2615 	}
2616 	fctx.dir_ino = dir;
2617 
2618 	ret = get_cur_path(sctx, dir, dir_gen, fctx.dir_path);
2619 	if (ret < 0)
2620 		goto out;
2621 
2622 	/*
2623 	 * We do two passes. The first links in the new refs and the second
2624 	 * deletes orphans if required. Deletion of orphans is not required for
2625 	 * directory inodes, as we always have only one ref and use rename
2626 	 * instead of link for those.
2627 	 */
2628 
2629 again:
2630 	key.objectid = dir;
2631 	key.type = BTRFS_DIR_ITEM_KEY;
2632 	key.offset = 0;
2633 	while (1) {
2634 		ret = btrfs_search_slot_for_read(sctx->send_root, &key, path,
2635 				1, 0);
2636 		if (ret < 0)
2637 			goto out;
2638 		eb = path->nodes[0];
2639 		slot = path->slots[0];
2640 		btrfs_item_key_to_cpu(eb, &found_key, slot);
2641 
2642 		if (found_key.objectid != key.objectid ||
2643 		    found_key.type != key.type) {
2644 			btrfs_release_path(path);
2645 			break;
2646 		}
2647 
2648 		ret = iterate_dir_item(sctx, sctx->send_root, path,
2649 				&found_key, __finish_unordered_dir,
2650 				&fctx);
2651 		if (ret < 0)
2652 			goto out;
2653 
2654 		key.offset = found_key.offset + 1;
2655 		btrfs_release_path(path);
2656 	}
2657 
2658 	if (!fctx.delete_pass && fctx.need_delete) {
2659 		fctx.delete_pass = 1;
2660 		goto again;
2661 	}
2662 
2663 out:
2664 	btrfs_free_path(path);
2665 	fs_path_free(sctx, fctx.cur_path);
2666 	fs_path_free(sctx, fctx.dir_path);
2667 	return ret;
2668 }
2669 
2670 /*
2671  * This does all the move/link/unlink/rmdir magic.
2672  */
2673 static int process_recorded_refs(struct send_ctx *sctx)
2674 {
2675 	int ret = 0;
2676 	struct recorded_ref *cur;
2677 	struct ulist *check_dirs = NULL;
2678 	struct ulist_iterator uit;
2679 	struct ulist_node *un;
2680 	struct fs_path *valid_path = NULL;
2681 	u64 ow_inode = 0;
2682 	u64 ow_gen;
2683 	int did_overwrite = 0;
2684 	int is_orphan = 0;
2685 
2686 verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
2687 
2688 	valid_path = fs_path_alloc(sctx);
2689 	if (!valid_path) {
2690 		ret = -ENOMEM;
2691 		goto out;
2692 	}
2693 
2694 	check_dirs = ulist_alloc(GFP_NOFS);
2695 	if (!check_dirs) {
2696 		ret = -ENOMEM;
2697 		goto out;
2698 	}
2699 
2700 	/*
2701 	 * First, check if the first ref of the current inode was overwritten
2702 	 * before. If yes, we know that the current inode was already orphanized
2703 	 * and thus use the orphan name. If not, we can use get_cur_path to
2704 	 * get the path of the first ref as it would like while receiving at
2705 	 * this point in time.
2706 	 * New inodes are always orphan at the beginning, so force to use the
2707 	 * orphan name in this case.
2708 	 * The first ref is stored in valid_path and will be updated if it
2709 	 * gets moved around.
2710 	 */
2711 	if (!sctx->cur_inode_new) {
2712 		ret = did_overwrite_first_ref(sctx, sctx->cur_ino,
2713 				sctx->cur_inode_gen);
2714 		if (ret < 0)
2715 			goto out;
2716 		if (ret)
2717 			did_overwrite = 1;
2718 	}
2719 	if (sctx->cur_inode_new || did_overwrite) {
2720 		ret = gen_unique_name(sctx, sctx->cur_ino,
2721 				sctx->cur_inode_gen, valid_path);
2722 		if (ret < 0)
2723 			goto out;
2724 		is_orphan = 1;
2725 	} else {
2726 		ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen,
2727 				valid_path);
2728 		if (ret < 0)
2729 			goto out;
2730 	}
2731 
2732 	list_for_each_entry(cur, &sctx->new_refs, list) {
2733 		/*
2734 		 * Check if this new ref would overwrite the first ref of
2735 		 * another unprocessed inode. If yes, orphanize the
2736 		 * overwritten inode. If we find an overwritten ref that is
2737 		 * not the first ref, simply unlink it.
2738 		 */
2739 		ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2740 				cur->name, cur->name_len,
2741 				&ow_inode, &ow_gen);
2742 		if (ret < 0)
2743 			goto out;
2744 		if (ret) {
2745 			ret = is_first_ref(sctx, sctx->parent_root,
2746 					ow_inode, cur->dir, cur->name,
2747 					cur->name_len);
2748 			if (ret < 0)
2749 				goto out;
2750 			if (ret) {
2751 				ret = orphanize_inode(sctx, ow_inode, ow_gen,
2752 						cur->full_path);
2753 				if (ret < 0)
2754 					goto out;
2755 			} else {
2756 				ret = send_unlink(sctx, cur->full_path);
2757 				if (ret < 0)
2758 					goto out;
2759 			}
2760 		}
2761 
2762 		/*
2763 		 * link/move the ref to the new place. If we have an orphan
2764 		 * inode, move it and update valid_path. If not, link or move
2765 		 * it depending on the inode mode.
2766 		 */
2767 		if (is_orphan && !sctx->cur_inode_first_ref_orphan) {
2768 			ret = send_rename(sctx, valid_path, cur->full_path);
2769 			if (ret < 0)
2770 				goto out;
2771 			is_orphan = 0;
2772 			ret = fs_path_copy(valid_path, cur->full_path);
2773 			if (ret < 0)
2774 				goto out;
2775 		} else {
2776 			if (S_ISDIR(sctx->cur_inode_mode)) {
2777 				/*
2778 				 * Dirs can't be linked, so move it. For moved
2779 				 * dirs, we always have one new and one deleted
2780 				 * ref. The deleted ref is ignored later.
2781 				 */
2782 				ret = send_rename(sctx, valid_path,
2783 						cur->full_path);
2784 				if (ret < 0)
2785 					goto out;
2786 				ret = fs_path_copy(valid_path, cur->full_path);
2787 				if (ret < 0)
2788 					goto out;
2789 			} else {
2790 				ret = send_link(sctx, cur->full_path,
2791 						valid_path);
2792 				if (ret < 0)
2793 					goto out;
2794 			}
2795 		}
2796 		ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2797 				GFP_NOFS);
2798 		if (ret < 0)
2799 			goto out;
2800 	}
2801 
2802 	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) {
2803 		/*
2804 		 * Check if we can already rmdir the directory. If not,
2805 		 * orphanize it. For every dir item inside that gets deleted
2806 		 * later, we do this check again and rmdir it then if possible.
2807 		 * See the use of check_dirs for more details.
2808 		 */
2809 		ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino);
2810 		if (ret < 0)
2811 			goto out;
2812 		if (ret) {
2813 			ret = send_rmdir(sctx, valid_path);
2814 			if (ret < 0)
2815 				goto out;
2816 		} else if (!is_orphan) {
2817 			ret = orphanize_inode(sctx, sctx->cur_ino,
2818 					sctx->cur_inode_gen, valid_path);
2819 			if (ret < 0)
2820 				goto out;
2821 			is_orphan = 1;
2822 		}
2823 
2824 		list_for_each_entry(cur, &sctx->deleted_refs, list) {
2825 			ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2826 					GFP_NOFS);
2827 			if (ret < 0)
2828 				goto out;
2829 		}
2830 	} else if (!S_ISDIR(sctx->cur_inode_mode)) {
2831 		/*
2832 		 * We have a non dir inode. Go through all deleted refs and
2833 		 * unlink them if they were not already overwritten by other
2834 		 * inodes.
2835 		 */
2836 		list_for_each_entry(cur, &sctx->deleted_refs, list) {
2837 			ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen,
2838 					sctx->cur_ino, sctx->cur_inode_gen,
2839 					cur->name, cur->name_len);
2840 			if (ret < 0)
2841 				goto out;
2842 			if (!ret) {
2843 				/*
2844 				 * In case the inode was moved to a directory
2845 				 * that was not created yet (see
2846 				 * __record_new_ref), we can not unlink the ref
2847 				 * as it will be needed later when the parent
2848 				 * directory is created, so that we can move in
2849 				 * the inode to the new dir.
2850 				 */
2851 				if (!is_orphan &&
2852 				    sctx->cur_inode_first_ref_orphan) {
2853 					ret = orphanize_inode(sctx,
2854 							sctx->cur_ino,
2855 							sctx->cur_inode_gen,
2856 							cur->full_path);
2857 					if (ret < 0)
2858 						goto out;
2859 					ret = gen_unique_name(sctx,
2860 							sctx->cur_ino,
2861 							sctx->cur_inode_gen,
2862 							valid_path);
2863 					if (ret < 0)
2864 						goto out;
2865 					is_orphan = 1;
2866 
2867 				} else {
2868 					ret = send_unlink(sctx, cur->full_path);
2869 					if (ret < 0)
2870 						goto out;
2871 				}
2872 			}
2873 			ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
2874 					GFP_NOFS);
2875 			if (ret < 0)
2876 				goto out;
2877 		}
2878 
2879 		/*
2880 		 * If the inode is still orphan, unlink the orphan. This may
2881 		 * happen when a previous inode did overwrite the first ref
2882 		 * of this inode and no new refs were added for the current
2883 		 * inode.
2884 		 * We can however not delete the orphan in case the inode relies
2885 		 * in a directory that was not created yet (see
2886 		 * __record_new_ref)
2887 		 */
2888 		if (is_orphan && !sctx->cur_inode_first_ref_orphan) {
2889 			ret = send_unlink(sctx, valid_path);
2890 			if (ret < 0)
2891 				goto out;
2892 		}
2893 	}
2894 
2895 	/*
2896 	 * We did collect all parent dirs where cur_inode was once located. We
2897 	 * now go through all these dirs and check if they are pending for
2898 	 * deletion and if it's finally possible to perform the rmdir now.
2899 	 * We also update the inode stats of the parent dirs here.
2900 	 */
2901 	ULIST_ITER_INIT(&uit);
2902 	while ((un = ulist_next(check_dirs, &uit))) {
2903 		if (un->val > sctx->cur_ino)
2904 			continue;
2905 
2906 		ret = get_cur_inode_state(sctx, un->val, un->aux);
2907 		if (ret < 0)
2908 			goto out;
2909 
2910 		if (ret == inode_state_did_create ||
2911 		    ret == inode_state_no_change) {
2912 			/* TODO delayed utimes */
2913 			ret = send_utimes(sctx, un->val, un->aux);
2914 			if (ret < 0)
2915 				goto out;
2916 		} else if (ret == inode_state_did_delete) {
2917 			ret = can_rmdir(sctx, un->val, sctx->cur_ino);
2918 			if (ret < 0)
2919 				goto out;
2920 			if (ret) {
2921 				ret = get_cur_path(sctx, un->val, un->aux,
2922 						valid_path);
2923 				if (ret < 0)
2924 					goto out;
2925 				ret = send_rmdir(sctx, valid_path);
2926 				if (ret < 0)
2927 					goto out;
2928 			}
2929 		}
2930 	}
2931 
2932 	/*
2933 	 * Current inode is now at it's new position, so we must increase
2934 	 * send_progress
2935 	 */
2936 	sctx->send_progress = sctx->cur_ino + 1;
2937 
2938 	/*
2939 	 * We may have a directory here that has pending refs which could not
2940 	 * be created before (because the dir did not exist before, see
2941 	 * __record_new_ref). finish_outoforder_dir will link/move the pending
2942 	 * refs.
2943 	 */
2944 	if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_new) {
2945 		ret = finish_outoforder_dir(sctx, sctx->cur_ino,
2946 				sctx->cur_inode_gen);
2947 		if (ret < 0)
2948 			goto out;
2949 	}
2950 
2951 	ret = 0;
2952 
2953 out:
2954 	free_recorded_refs(sctx);
2955 	ulist_free(check_dirs);
2956 	fs_path_free(sctx, valid_path);
2957 	return ret;
2958 }
2959 
2960 static int __record_new_ref(int num, u64 dir, int index,
2961 			    struct fs_path *name,
2962 			    void *ctx)
2963 {
2964 	int ret = 0;
2965 	struct send_ctx *sctx = ctx;
2966 	struct fs_path *p;
2967 	u64 gen;
2968 
2969 	p = fs_path_alloc(sctx);
2970 	if (!p)
2971 		return -ENOMEM;
2972 
2973 	ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL,
2974 			NULL);
2975 	if (ret < 0)
2976 		goto out;
2977 
2978 	/*
2979 	 * The parent may be non-existent at this point in time. This happens
2980 	 * if the ino of the parent dir is higher then the current ino. In this
2981 	 * case, we can not process this ref until the parent dir is finally
2982 	 * created. If we reach the parent dir later, process_recorded_refs
2983 	 * will go through all dir items and process the refs that could not be
2984 	 * processed before. In case this is the first ref, we set
2985 	 * cur_inode_first_ref_orphan to 1 to inform process_recorded_refs to
2986 	 * keep an orphan of the inode so that it later can be used for
2987 	 * link/move
2988 	 */
2989 	ret = is_inode_existent(sctx, dir, gen);
2990 	if (ret < 0)
2991 		goto out;
2992 	if (!ret) {
2993 		ret = is_first_ref(sctx, sctx->send_root, sctx->cur_ino, dir,
2994 				name->start, fs_path_len(name));
2995 		if (ret < 0)
2996 			goto out;
2997 		if (ret)
2998 			sctx->cur_inode_first_ref_orphan = 1;
2999 		ret = 0;
3000 		goto out;
3001 	}
3002 
3003 	ret = get_cur_path(sctx, dir, gen, p);
3004 	if (ret < 0)
3005 		goto out;
3006 	ret = fs_path_add_path(p, name);
3007 	if (ret < 0)
3008 		goto out;
3009 
3010 	ret = record_ref(&sctx->new_refs, dir, gen, p);
3011 
3012 out:
3013 	if (ret)
3014 		fs_path_free(sctx, p);
3015 	return ret;
3016 }
3017 
3018 static int __record_deleted_ref(int num, u64 dir, int index,
3019 				struct fs_path *name,
3020 				void *ctx)
3021 {
3022 	int ret = 0;
3023 	struct send_ctx *sctx = ctx;
3024 	struct fs_path *p;
3025 	u64 gen;
3026 
3027 	p = fs_path_alloc(sctx);
3028 	if (!p)
3029 		return -ENOMEM;
3030 
3031 	ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL,
3032 			NULL);
3033 	if (ret < 0)
3034 		goto out;
3035 
3036 	ret = get_cur_path(sctx, dir, gen, p);
3037 	if (ret < 0)
3038 		goto out;
3039 	ret = fs_path_add_path(p, name);
3040 	if (ret < 0)
3041 		goto out;
3042 
3043 	ret = record_ref(&sctx->deleted_refs, dir, gen, p);
3044 
3045 out:
3046 	if (ret)
3047 		fs_path_free(sctx, p);
3048 	return ret;
3049 }
3050 
3051 static int record_new_ref(struct send_ctx *sctx)
3052 {
3053 	int ret;
3054 
3055 	ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path,
3056 			sctx->cmp_key, 0, __record_new_ref, sctx);
3057 	if (ret < 0)
3058 		goto out;
3059 	ret = 0;
3060 
3061 out:
3062 	return ret;
3063 }
3064 
3065 static int record_deleted_ref(struct send_ctx *sctx)
3066 {
3067 	int ret;
3068 
3069 	ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path,
3070 			sctx->cmp_key, 0, __record_deleted_ref, sctx);
3071 	if (ret < 0)
3072 		goto out;
3073 	ret = 0;
3074 
3075 out:
3076 	return ret;
3077 }
3078 
3079 struct find_ref_ctx {
3080 	u64 dir;
3081 	struct fs_path *name;
3082 	int found_idx;
3083 };
3084 
3085 static int __find_iref(int num, u64 dir, int index,
3086 		       struct fs_path *name,
3087 		       void *ctx_)
3088 {
3089 	struct find_ref_ctx *ctx = ctx_;
3090 
3091 	if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
3092 	    strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
3093 		ctx->found_idx = num;
3094 		return 1;
3095 	}
3096 	return 0;
3097 }
3098 
3099 static int find_iref(struct send_ctx *sctx,
3100 		     struct btrfs_root *root,
3101 		     struct btrfs_path *path,
3102 		     struct btrfs_key *key,
3103 		     u64 dir, struct fs_path *name)
3104 {
3105 	int ret;
3106 	struct find_ref_ctx ctx;
3107 
3108 	ctx.dir = dir;
3109 	ctx.name = name;
3110 	ctx.found_idx = -1;
3111 
3112 	ret = iterate_inode_ref(sctx, root, path, key, 0, __find_iref, &ctx);
3113 	if (ret < 0)
3114 		return ret;
3115 
3116 	if (ctx.found_idx == -1)
3117 		return -ENOENT;
3118 
3119 	return ctx.found_idx;
3120 }
3121 
3122 static int __record_changed_new_ref(int num, u64 dir, int index,
3123 				    struct fs_path *name,
3124 				    void *ctx)
3125 {
3126 	int ret;
3127 	struct send_ctx *sctx = ctx;
3128 
3129 	ret = find_iref(sctx, sctx->parent_root, sctx->right_path,
3130 			sctx->cmp_key, dir, name);
3131 	if (ret == -ENOENT)
3132 		ret = __record_new_ref(num, dir, index, name, sctx);
3133 	else if (ret > 0)
3134 		ret = 0;
3135 
3136 	return ret;
3137 }
3138 
3139 static int __record_changed_deleted_ref(int num, u64 dir, int index,
3140 					struct fs_path *name,
3141 					void *ctx)
3142 {
3143 	int ret;
3144 	struct send_ctx *sctx = ctx;
3145 
3146 	ret = find_iref(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key,
3147 			dir, name);
3148 	if (ret == -ENOENT)
3149 		ret = __record_deleted_ref(num, dir, index, name, sctx);
3150 	else if (ret > 0)
3151 		ret = 0;
3152 
3153 	return ret;
3154 }
3155 
3156 static int record_changed_ref(struct send_ctx *sctx)
3157 {
3158 	int ret = 0;
3159 
3160 	ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path,
3161 			sctx->cmp_key, 0, __record_changed_new_ref, sctx);
3162 	if (ret < 0)
3163 		goto out;
3164 	ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path,
3165 			sctx->cmp_key, 0, __record_changed_deleted_ref, sctx);
3166 	if (ret < 0)
3167 		goto out;
3168 	ret = 0;
3169 
3170 out:
3171 	return ret;
3172 }
3173 
3174 /*
3175  * Record and process all refs at once. Needed when an inode changes the
3176  * generation number, which means that it was deleted and recreated.
3177  */
3178 static int process_all_refs(struct send_ctx *sctx,
3179 			    enum btrfs_compare_tree_result cmd)
3180 {
3181 	int ret;
3182 	struct btrfs_root *root;
3183 	struct btrfs_path *path;
3184 	struct btrfs_key key;
3185 	struct btrfs_key found_key;
3186 	struct extent_buffer *eb;
3187 	int slot;
3188 	iterate_inode_ref_t cb;
3189 
3190 	path = alloc_path_for_send();
3191 	if (!path)
3192 		return -ENOMEM;
3193 
3194 	if (cmd == BTRFS_COMPARE_TREE_NEW) {
3195 		root = sctx->send_root;
3196 		cb = __record_new_ref;
3197 	} else if (cmd == BTRFS_COMPARE_TREE_DELETED) {
3198 		root = sctx->parent_root;
3199 		cb = __record_deleted_ref;
3200 	} else {
3201 		BUG();
3202 	}
3203 
3204 	key.objectid = sctx->cmp_key->objectid;
3205 	key.type = BTRFS_INODE_REF_KEY;
3206 	key.offset = 0;
3207 	while (1) {
3208 		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3209 		if (ret < 0) {
3210 			btrfs_release_path(path);
3211 			goto out;
3212 		}
3213 		if (ret) {
3214 			btrfs_release_path(path);
3215 			break;
3216 		}
3217 
3218 		eb = path->nodes[0];
3219 		slot = path->slots[0];
3220 		btrfs_item_key_to_cpu(eb, &found_key, slot);
3221 
3222 		if (found_key.objectid != key.objectid ||
3223 		    found_key.type != key.type) {
3224 			btrfs_release_path(path);
3225 			break;
3226 		}
3227 
3228 		ret = iterate_inode_ref(sctx, sctx->parent_root, path,
3229 				&found_key, 0, cb, sctx);
3230 		btrfs_release_path(path);
3231 		if (ret < 0)
3232 			goto out;
3233 
3234 		key.offset = found_key.offset + 1;
3235 	}
3236 
3237 	ret = process_recorded_refs(sctx);
3238 
3239 out:
3240 	btrfs_free_path(path);
3241 	return ret;
3242 }
3243 
3244 static int send_set_xattr(struct send_ctx *sctx,
3245 			  struct fs_path *path,
3246 			  const char *name, int name_len,
3247 			  const char *data, int data_len)
3248 {
3249 	int ret = 0;
3250 
3251 	ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR);
3252 	if (ret < 0)
3253 		goto out;
3254 
3255 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3256 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3257 	TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len);
3258 
3259 	ret = send_cmd(sctx);
3260 
3261 tlv_put_failure:
3262 out:
3263 	return ret;
3264 }
3265 
3266 static int send_remove_xattr(struct send_ctx *sctx,
3267 			  struct fs_path *path,
3268 			  const char *name, int name_len)
3269 {
3270 	int ret = 0;
3271 
3272 	ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR);
3273 	if (ret < 0)
3274 		goto out;
3275 
3276 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
3277 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len);
3278 
3279 	ret = send_cmd(sctx);
3280 
3281 tlv_put_failure:
3282 out:
3283 	return ret;
3284 }
3285 
3286 static int __process_new_xattr(int num, struct btrfs_key *di_key,
3287 			       const char *name, int name_len,
3288 			       const char *data, int data_len,
3289 			       u8 type, void *ctx)
3290 {
3291 	int ret;
3292 	struct send_ctx *sctx = ctx;
3293 	struct fs_path *p;
3294 	posix_acl_xattr_header dummy_acl;
3295 
3296 	p = fs_path_alloc(sctx);
3297 	if (!p)
3298 		return -ENOMEM;
3299 
3300 	/*
3301 	 * This hack is needed because empty acl's are stored as zero byte
3302 	 * data in xattrs. Problem with that is, that receiving these zero byte
3303 	 * acl's will fail later. To fix this, we send a dummy acl list that
3304 	 * only contains the version number and no entries.
3305 	 */
3306 	if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) ||
3307 	    !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) {
3308 		if (data_len == 0) {
3309 			dummy_acl.a_version =
3310 					cpu_to_le32(POSIX_ACL_XATTR_VERSION);
3311 			data = (char *)&dummy_acl;
3312 			data_len = sizeof(dummy_acl);
3313 		}
3314 	}
3315 
3316 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3317 	if (ret < 0)
3318 		goto out;
3319 
3320 	ret = send_set_xattr(sctx, p, name, name_len, data, data_len);
3321 
3322 out:
3323 	fs_path_free(sctx, p);
3324 	return ret;
3325 }
3326 
3327 static int __process_deleted_xattr(int num, struct btrfs_key *di_key,
3328 				   const char *name, int name_len,
3329 				   const char *data, int data_len,
3330 				   u8 type, void *ctx)
3331 {
3332 	int ret;
3333 	struct send_ctx *sctx = ctx;
3334 	struct fs_path *p;
3335 
3336 	p = fs_path_alloc(sctx);
3337 	if (!p)
3338 		return -ENOMEM;
3339 
3340 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3341 	if (ret < 0)
3342 		goto out;
3343 
3344 	ret = send_remove_xattr(sctx, p, name, name_len);
3345 
3346 out:
3347 	fs_path_free(sctx, p);
3348 	return ret;
3349 }
3350 
3351 static int process_new_xattr(struct send_ctx *sctx)
3352 {
3353 	int ret = 0;
3354 
3355 	ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path,
3356 			sctx->cmp_key, __process_new_xattr, sctx);
3357 
3358 	return ret;
3359 }
3360 
3361 static int process_deleted_xattr(struct send_ctx *sctx)
3362 {
3363 	int ret;
3364 
3365 	ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path,
3366 			sctx->cmp_key, __process_deleted_xattr, sctx);
3367 
3368 	return ret;
3369 }
3370 
3371 struct find_xattr_ctx {
3372 	const char *name;
3373 	int name_len;
3374 	int found_idx;
3375 	char *found_data;
3376 	int found_data_len;
3377 };
3378 
3379 static int __find_xattr(int num, struct btrfs_key *di_key,
3380 			const char *name, int name_len,
3381 			const char *data, int data_len,
3382 			u8 type, void *vctx)
3383 {
3384 	struct find_xattr_ctx *ctx = vctx;
3385 
3386 	if (name_len == ctx->name_len &&
3387 	    strncmp(name, ctx->name, name_len) == 0) {
3388 		ctx->found_idx = num;
3389 		ctx->found_data_len = data_len;
3390 		ctx->found_data = kmalloc(data_len, GFP_NOFS);
3391 		if (!ctx->found_data)
3392 			return -ENOMEM;
3393 		memcpy(ctx->found_data, data, data_len);
3394 		return 1;
3395 	}
3396 	return 0;
3397 }
3398 
3399 static int find_xattr(struct send_ctx *sctx,
3400 		      struct btrfs_root *root,
3401 		      struct btrfs_path *path,
3402 		      struct btrfs_key *key,
3403 		      const char *name, int name_len,
3404 		      char **data, int *data_len)
3405 {
3406 	int ret;
3407 	struct find_xattr_ctx ctx;
3408 
3409 	ctx.name = name;
3410 	ctx.name_len = name_len;
3411 	ctx.found_idx = -1;
3412 	ctx.found_data = NULL;
3413 	ctx.found_data_len = 0;
3414 
3415 	ret = iterate_dir_item(sctx, root, path, key, __find_xattr, &ctx);
3416 	if (ret < 0)
3417 		return ret;
3418 
3419 	if (ctx.found_idx == -1)
3420 		return -ENOENT;
3421 	if (data) {
3422 		*data = ctx.found_data;
3423 		*data_len = ctx.found_data_len;
3424 	} else {
3425 		kfree(ctx.found_data);
3426 	}
3427 	return ctx.found_idx;
3428 }
3429 
3430 
3431 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
3432 				       const char *name, int name_len,
3433 				       const char *data, int data_len,
3434 				       u8 type, void *ctx)
3435 {
3436 	int ret;
3437 	struct send_ctx *sctx = ctx;
3438 	char *found_data = NULL;
3439 	int found_data_len  = 0;
3440 	struct fs_path *p = NULL;
3441 
3442 	ret = find_xattr(sctx, sctx->parent_root, sctx->right_path,
3443 			sctx->cmp_key, name, name_len, &found_data,
3444 			&found_data_len);
3445 	if (ret == -ENOENT) {
3446 		ret = __process_new_xattr(num, di_key, name, name_len, data,
3447 				data_len, type, ctx);
3448 	} else if (ret >= 0) {
3449 		if (data_len != found_data_len ||
3450 		    memcmp(data, found_data, data_len)) {
3451 			ret = __process_new_xattr(num, di_key, name, name_len,
3452 					data, data_len, type, ctx);
3453 		} else {
3454 			ret = 0;
3455 		}
3456 	}
3457 
3458 	kfree(found_data);
3459 	fs_path_free(sctx, p);
3460 	return ret;
3461 }
3462 
3463 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key,
3464 					   const char *name, int name_len,
3465 					   const char *data, int data_len,
3466 					   u8 type, void *ctx)
3467 {
3468 	int ret;
3469 	struct send_ctx *sctx = ctx;
3470 
3471 	ret = find_xattr(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key,
3472 			name, name_len, NULL, NULL);
3473 	if (ret == -ENOENT)
3474 		ret = __process_deleted_xattr(num, di_key, name, name_len, data,
3475 				data_len, type, ctx);
3476 	else if (ret >= 0)
3477 		ret = 0;
3478 
3479 	return ret;
3480 }
3481 
3482 static int process_changed_xattr(struct send_ctx *sctx)
3483 {
3484 	int ret = 0;
3485 
3486 	ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path,
3487 			sctx->cmp_key, __process_changed_new_xattr, sctx);
3488 	if (ret < 0)
3489 		goto out;
3490 	ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path,
3491 			sctx->cmp_key, __process_changed_deleted_xattr, sctx);
3492 
3493 out:
3494 	return ret;
3495 }
3496 
3497 static int process_all_new_xattrs(struct send_ctx *sctx)
3498 {
3499 	int ret;
3500 	struct btrfs_root *root;
3501 	struct btrfs_path *path;
3502 	struct btrfs_key key;
3503 	struct btrfs_key found_key;
3504 	struct extent_buffer *eb;
3505 	int slot;
3506 
3507 	path = alloc_path_for_send();
3508 	if (!path)
3509 		return -ENOMEM;
3510 
3511 	root = sctx->send_root;
3512 
3513 	key.objectid = sctx->cmp_key->objectid;
3514 	key.type = BTRFS_XATTR_ITEM_KEY;
3515 	key.offset = 0;
3516 	while (1) {
3517 		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3518 		if (ret < 0)
3519 			goto out;
3520 		if (ret) {
3521 			ret = 0;
3522 			goto out;
3523 		}
3524 
3525 		eb = path->nodes[0];
3526 		slot = path->slots[0];
3527 		btrfs_item_key_to_cpu(eb, &found_key, slot);
3528 
3529 		if (found_key.objectid != key.objectid ||
3530 		    found_key.type != key.type) {
3531 			ret = 0;
3532 			goto out;
3533 		}
3534 
3535 		ret = iterate_dir_item(sctx, root, path, &found_key,
3536 				__process_new_xattr, sctx);
3537 		if (ret < 0)
3538 			goto out;
3539 
3540 		btrfs_release_path(path);
3541 		key.offset = found_key.offset + 1;
3542 	}
3543 
3544 out:
3545 	btrfs_free_path(path);
3546 	return ret;
3547 }
3548 
3549 /*
3550  * Read some bytes from the current inode/file and send a write command to
3551  * user space.
3552  */
3553 static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
3554 {
3555 	int ret = 0;
3556 	struct fs_path *p;
3557 	loff_t pos = offset;
3558 	int readed = 0;
3559 	mm_segment_t old_fs;
3560 
3561 	p = fs_path_alloc(sctx);
3562 	if (!p)
3563 		return -ENOMEM;
3564 
3565 	/*
3566 	 * vfs normally only accepts user space buffers for security reasons.
3567 	 * we only read from the file and also only provide the read_buf buffer
3568 	 * to vfs. As this buffer does not come from a user space call, it's
3569 	 * ok to temporary allow kernel space buffers.
3570 	 */
3571 	old_fs = get_fs();
3572 	set_fs(KERNEL_DS);
3573 
3574 verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
3575 
3576 	ret = open_cur_inode_file(sctx);
3577 	if (ret < 0)
3578 		goto out;
3579 
3580 	ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
3581 	if (ret < 0)
3582 		goto out;
3583 	readed = ret;
3584 	if (!readed)
3585 		goto out;
3586 
3587 	ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
3588 	if (ret < 0)
3589 		goto out;
3590 
3591 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3592 	if (ret < 0)
3593 		goto out;
3594 
3595 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3596 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3597 	TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed);
3598 
3599 	ret = send_cmd(sctx);
3600 
3601 tlv_put_failure:
3602 out:
3603 	fs_path_free(sctx, p);
3604 	set_fs(old_fs);
3605 	if (ret < 0)
3606 		return ret;
3607 	return readed;
3608 }
3609 
3610 /*
3611  * Send a clone command to user space.
3612  */
3613 static int send_clone(struct send_ctx *sctx,
3614 		      u64 offset, u32 len,
3615 		      struct clone_root *clone_root)
3616 {
3617 	int ret = 0;
3618 	struct btrfs_root *clone_root2 = clone_root->root;
3619 	struct fs_path *p;
3620 	u64 gen;
3621 
3622 verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
3623 	       "clone_inode=%llu, clone_offset=%llu\n", offset, len,
3624 		clone_root->root->objectid, clone_root->ino,
3625 		clone_root->offset);
3626 
3627 	p = fs_path_alloc(sctx);
3628 	if (!p)
3629 		return -ENOMEM;
3630 
3631 	ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE);
3632 	if (ret < 0)
3633 		goto out;
3634 
3635 	ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3636 	if (ret < 0)
3637 		goto out;
3638 
3639 	TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3640 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len);
3641 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3642 
3643 	if (clone_root2 == sctx->send_root) {
3644 		ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
3645 				&gen, NULL, NULL, NULL);
3646 		if (ret < 0)
3647 			goto out;
3648 		ret = get_cur_path(sctx, clone_root->ino, gen, p);
3649 	} else {
3650 		ret = get_inode_path(sctx, clone_root2, clone_root->ino, p);
3651 	}
3652 	if (ret < 0)
3653 		goto out;
3654 
3655 	TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
3656 			clone_root2->root_item.uuid);
3657 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
3658 			clone_root2->root_item.ctransid);
3659 	TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
3660 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
3661 			clone_root->offset);
3662 
3663 	ret = send_cmd(sctx);
3664 
3665 tlv_put_failure:
3666 out:
3667 	fs_path_free(sctx, p);
3668 	return ret;
3669 }
3670 
3671 static int send_write_or_clone(struct send_ctx *sctx,
3672 			       struct btrfs_path *path,
3673 			       struct btrfs_key *key,
3674 			       struct clone_root *clone_root)
3675 {
3676 	int ret = 0;
3677 	struct btrfs_file_extent_item *ei;
3678 	u64 offset = key->offset;
3679 	u64 pos = 0;
3680 	u64 len;
3681 	u32 l;
3682 	u8 type;
3683 
3684 	ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3685 			struct btrfs_file_extent_item);
3686 	type = btrfs_file_extent_type(path->nodes[0], ei);
3687 	if (type == BTRFS_FILE_EXTENT_INLINE)
3688 		len = btrfs_file_extent_inline_len(path->nodes[0], ei);
3689 	else
3690 		len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
3691 
3692 	if (offset + len > sctx->cur_inode_size)
3693 		len = sctx->cur_inode_size - offset;
3694 	if (len == 0) {
3695 		ret = 0;
3696 		goto out;
3697 	}
3698 
3699 	if (!clone_root) {
3700 		while (pos < len) {
3701 			l = len - pos;
3702 			if (l > BTRFS_SEND_READ_SIZE)
3703 				l = BTRFS_SEND_READ_SIZE;
3704 			ret = send_write(sctx, pos + offset, l);
3705 			if (ret < 0)
3706 				goto out;
3707 			if (!ret)
3708 				break;
3709 			pos += ret;
3710 		}
3711 		ret = 0;
3712 	} else {
3713 		ret = send_clone(sctx, offset, len, clone_root);
3714 	}
3715 
3716 out:
3717 	return ret;
3718 }
3719 
3720 static int is_extent_unchanged(struct send_ctx *sctx,
3721 			       struct btrfs_path *left_path,
3722 			       struct btrfs_key *ekey)
3723 {
3724 	int ret = 0;
3725 	struct btrfs_key key;
3726 	struct btrfs_path *path = NULL;
3727 	struct extent_buffer *eb;
3728 	int slot;
3729 	struct btrfs_key found_key;
3730 	struct btrfs_file_extent_item *ei;
3731 	u64 left_disknr;
3732 	u64 right_disknr;
3733 	u64 left_offset;
3734 	u64 right_offset;
3735 	u64 left_offset_fixed;
3736 	u64 left_len;
3737 	u64 right_len;
3738 	u8 left_type;
3739 	u8 right_type;
3740 
3741 	path = alloc_path_for_send();
3742 	if (!path)
3743 		return -ENOMEM;
3744 
3745 	eb = left_path->nodes[0];
3746 	slot = left_path->slots[0];
3747 
3748 	ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3749 	left_type = btrfs_file_extent_type(eb, ei);
3750 	left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3751 	left_len = btrfs_file_extent_num_bytes(eb, ei);
3752 	left_offset = btrfs_file_extent_offset(eb, ei);
3753 
3754 	if (left_type != BTRFS_FILE_EXTENT_REG) {
3755 		ret = 0;
3756 		goto out;
3757 	}
3758 
3759 	/*
3760 	 * Following comments will refer to these graphics. L is the left
3761 	 * extents which we are checking at the moment. 1-8 are the right
3762 	 * extents that we iterate.
3763 	 *
3764 	 *       |-----L-----|
3765 	 * |-1-|-2a-|-3-|-4-|-5-|-6-|
3766 	 *
3767 	 *       |-----L-----|
3768 	 * |--1--|-2b-|...(same as above)
3769 	 *
3770 	 * Alternative situation. Happens on files where extents got split.
3771 	 *       |-----L-----|
3772 	 * |-----------7-----------|-6-|
3773 	 *
3774 	 * Alternative situation. Happens on files which got larger.
3775 	 *       |-----L-----|
3776 	 * |-8-|
3777 	 * Nothing follows after 8.
3778 	 */
3779 
3780 	key.objectid = ekey->objectid;
3781 	key.type = BTRFS_EXTENT_DATA_KEY;
3782 	key.offset = ekey->offset;
3783 	ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
3784 	if (ret < 0)
3785 		goto out;
3786 	if (ret) {
3787 		ret = 0;
3788 		goto out;
3789 	}
3790 
3791 	/*
3792 	 * Handle special case where the right side has no extents at all.
3793 	 */
3794 	eb = path->nodes[0];
3795 	slot = path->slots[0];
3796 	btrfs_item_key_to_cpu(eb, &found_key, slot);
3797 	if (found_key.objectid != key.objectid ||
3798 	    found_key.type != key.type) {
3799 		ret = 0;
3800 		goto out;
3801 	}
3802 
3803 	/*
3804 	 * We're now on 2a, 2b or 7.
3805 	 */
3806 	key = found_key;
3807 	while (key.offset < ekey->offset + left_len) {
3808 		ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
3809 		right_type = btrfs_file_extent_type(eb, ei);
3810 		right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
3811 		right_len = btrfs_file_extent_num_bytes(eb, ei);
3812 		right_offset = btrfs_file_extent_offset(eb, ei);
3813 
3814 		if (right_type != BTRFS_FILE_EXTENT_REG) {
3815 			ret = 0;
3816 			goto out;
3817 		}
3818 
3819 		/*
3820 		 * Are we at extent 8? If yes, we know the extent is changed.
3821 		 * This may only happen on the first iteration.
3822 		 */
3823 		if (found_key.offset + right_len < ekey->offset) {
3824 			ret = 0;
3825 			goto out;
3826 		}
3827 
3828 		left_offset_fixed = left_offset;
3829 		if (key.offset < ekey->offset) {
3830 			/* Fix the right offset for 2a and 7. */
3831 			right_offset += ekey->offset - key.offset;
3832 		} else {
3833 			/* Fix the left offset for all behind 2a and 2b */
3834 			left_offset_fixed += key.offset - ekey->offset;
3835 		}
3836 
3837 		/*
3838 		 * Check if we have the same extent.
3839 		 */
3840 		if (left_disknr + left_offset_fixed !=
3841 				right_disknr + right_offset) {
3842 			ret = 0;
3843 			goto out;
3844 		}
3845 
3846 		/*
3847 		 * Go to the next extent.
3848 		 */
3849 		ret = btrfs_next_item(sctx->parent_root, path);
3850 		if (ret < 0)
3851 			goto out;
3852 		if (!ret) {
3853 			eb = path->nodes[0];
3854 			slot = path->slots[0];
3855 			btrfs_item_key_to_cpu(eb, &found_key, slot);
3856 		}
3857 		if (ret || found_key.objectid != key.objectid ||
3858 		    found_key.type != key.type) {
3859 			key.offset += right_len;
3860 			break;
3861 		} else {
3862 			if (found_key.offset != key.offset + right_len) {
3863 				/* Should really not happen */
3864 				ret = -EIO;
3865 				goto out;
3866 			}
3867 		}
3868 		key = found_key;
3869 	}
3870 
3871 	/*
3872 	 * We're now behind the left extent (treat as unchanged) or at the end
3873 	 * of the right side (treat as changed).
3874 	 */
3875 	if (key.offset >= ekey->offset + left_len)
3876 		ret = 1;
3877 	else
3878 		ret = 0;
3879 
3880 
3881 out:
3882 	btrfs_free_path(path);
3883 	return ret;
3884 }
3885 
3886 static int process_extent(struct send_ctx *sctx,
3887 			  struct btrfs_path *path,
3888 			  struct btrfs_key *key)
3889 {
3890 	int ret = 0;
3891 	struct clone_root *found_clone = NULL;
3892 
3893 	if (S_ISLNK(sctx->cur_inode_mode))
3894 		return 0;
3895 
3896 	if (sctx->parent_root && !sctx->cur_inode_new) {
3897 		ret = is_extent_unchanged(sctx, path, key);
3898 		if (ret < 0)
3899 			goto out;
3900 		if (ret) {
3901 			ret = 0;
3902 			goto out;
3903 		}
3904 	}
3905 
3906 	ret = find_extent_clone(sctx, path, key->objectid, key->offset,
3907 			sctx->cur_inode_size, &found_clone);
3908 	if (ret != -ENOENT && ret < 0)
3909 		goto out;
3910 
3911 	ret = send_write_or_clone(sctx, path, key, found_clone);
3912 
3913 out:
3914 	return ret;
3915 }
3916 
3917 static int process_all_extents(struct send_ctx *sctx)
3918 {
3919 	int ret;
3920 	struct btrfs_root *root;
3921 	struct btrfs_path *path;
3922 	struct btrfs_key key;
3923 	struct btrfs_key found_key;
3924 	struct extent_buffer *eb;
3925 	int slot;
3926 
3927 	root = sctx->send_root;
3928 	path = alloc_path_for_send();
3929 	if (!path)
3930 		return -ENOMEM;
3931 
3932 	key.objectid = sctx->cmp_key->objectid;
3933 	key.type = BTRFS_EXTENT_DATA_KEY;
3934 	key.offset = 0;
3935 	while (1) {
3936 		ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
3937 		if (ret < 0)
3938 			goto out;
3939 		if (ret) {
3940 			ret = 0;
3941 			goto out;
3942 		}
3943 
3944 		eb = path->nodes[0];
3945 		slot = path->slots[0];
3946 		btrfs_item_key_to_cpu(eb, &found_key, slot);
3947 
3948 		if (found_key.objectid != key.objectid ||
3949 		    found_key.type != key.type) {
3950 			ret = 0;
3951 			goto out;
3952 		}
3953 
3954 		ret = process_extent(sctx, path, &found_key);
3955 		if (ret < 0)
3956 			goto out;
3957 
3958 		btrfs_release_path(path);
3959 		key.offset = found_key.offset + 1;
3960 	}
3961 
3962 out:
3963 	btrfs_free_path(path);
3964 	return ret;
3965 }
3966 
3967 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end)
3968 {
3969 	int ret = 0;
3970 
3971 	if (sctx->cur_ino == 0)
3972 		goto out;
3973 	if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid &&
3974 	    sctx->cmp_key->type <= BTRFS_INODE_REF_KEY)
3975 		goto out;
3976 	if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs))
3977 		goto out;
3978 
3979 	ret = process_recorded_refs(sctx);
3980 
3981 out:
3982 	return ret;
3983 }
3984 
3985 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
3986 {
3987 	int ret = 0;
3988 	u64 left_mode;
3989 	u64 left_uid;
3990 	u64 left_gid;
3991 	u64 right_mode;
3992 	u64 right_uid;
3993 	u64 right_gid;
3994 	int need_chmod = 0;
3995 	int need_chown = 0;
3996 
3997 	ret = process_recorded_refs_if_needed(sctx, at_end);
3998 	if (ret < 0)
3999 		goto out;
4000 
4001 	if (sctx->cur_ino == 0 || sctx->cur_inode_deleted)
4002 		goto out;
4003 	if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
4004 		goto out;
4005 
4006 	ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
4007 			&left_mode, &left_uid, &left_gid);
4008 	if (ret < 0)
4009 		goto out;
4010 
4011 	if (!S_ISLNK(sctx->cur_inode_mode)) {
4012 		if (!sctx->parent_root || sctx->cur_inode_new) {
4013 			need_chmod = 1;
4014 			need_chown = 1;
4015 		} else {
4016 			ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
4017 					NULL, NULL, &right_mode, &right_uid,
4018 					&right_gid);
4019 			if (ret < 0)
4020 				goto out;
4021 
4022 			if (left_uid != right_uid || left_gid != right_gid)
4023 				need_chown = 1;
4024 			if (left_mode != right_mode)
4025 				need_chmod = 1;
4026 		}
4027 	}
4028 
4029 	if (S_ISREG(sctx->cur_inode_mode)) {
4030 		ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4031 				sctx->cur_inode_size);
4032 		if (ret < 0)
4033 			goto out;
4034 	}
4035 
4036 	if (need_chown) {
4037 		ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4038 				left_uid, left_gid);
4039 		if (ret < 0)
4040 			goto out;
4041 	}
4042 	if (need_chmod) {
4043 		ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen,
4044 				left_mode);
4045 		if (ret < 0)
4046 			goto out;
4047 	}
4048 
4049 	/*
4050 	 * Need to send that every time, no matter if it actually changed
4051 	 * between the two trees as we have done changes to the inode before.
4052 	 */
4053 	ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen);
4054 	if (ret < 0)
4055 		goto out;
4056 
4057 out:
4058 	return ret;
4059 }
4060 
4061 static int changed_inode(struct send_ctx *sctx,
4062 			 enum btrfs_compare_tree_result result)
4063 {
4064 	int ret = 0;
4065 	struct btrfs_key *key = sctx->cmp_key;
4066 	struct btrfs_inode_item *left_ii = NULL;
4067 	struct btrfs_inode_item *right_ii = NULL;
4068 	u64 left_gen = 0;
4069 	u64 right_gen = 0;
4070 
4071 	ret = close_cur_inode_file(sctx);
4072 	if (ret < 0)
4073 		goto out;
4074 
4075 	sctx->cur_ino = key->objectid;
4076 	sctx->cur_inode_new_gen = 0;
4077 	sctx->cur_inode_first_ref_orphan = 0;
4078 	sctx->send_progress = sctx->cur_ino;
4079 
4080 	if (result == BTRFS_COMPARE_TREE_NEW ||
4081 	    result == BTRFS_COMPARE_TREE_CHANGED) {
4082 		left_ii = btrfs_item_ptr(sctx->left_path->nodes[0],
4083 				sctx->left_path->slots[0],
4084 				struct btrfs_inode_item);
4085 		left_gen = btrfs_inode_generation(sctx->left_path->nodes[0],
4086 				left_ii);
4087 	} else {
4088 		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4089 				sctx->right_path->slots[0],
4090 				struct btrfs_inode_item);
4091 		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4092 				right_ii);
4093 	}
4094 	if (result == BTRFS_COMPARE_TREE_CHANGED) {
4095 		right_ii = btrfs_item_ptr(sctx->right_path->nodes[0],
4096 				sctx->right_path->slots[0],
4097 				struct btrfs_inode_item);
4098 
4099 		right_gen = btrfs_inode_generation(sctx->right_path->nodes[0],
4100 				right_ii);
4101 		if (left_gen != right_gen)
4102 			sctx->cur_inode_new_gen = 1;
4103 	}
4104 
4105 	if (result == BTRFS_COMPARE_TREE_NEW) {
4106 		sctx->cur_inode_gen = left_gen;
4107 		sctx->cur_inode_new = 1;
4108 		sctx->cur_inode_deleted = 0;
4109 		sctx->cur_inode_size = btrfs_inode_size(
4110 				sctx->left_path->nodes[0], left_ii);
4111 		sctx->cur_inode_mode = btrfs_inode_mode(
4112 				sctx->left_path->nodes[0], left_ii);
4113 		if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID)
4114 			ret = send_create_inode(sctx, sctx->left_path,
4115 					sctx->cmp_key);
4116 	} else if (result == BTRFS_COMPARE_TREE_DELETED) {
4117 		sctx->cur_inode_gen = right_gen;
4118 		sctx->cur_inode_new = 0;
4119 		sctx->cur_inode_deleted = 1;
4120 		sctx->cur_inode_size = btrfs_inode_size(
4121 				sctx->right_path->nodes[0], right_ii);
4122 		sctx->cur_inode_mode = btrfs_inode_mode(
4123 				sctx->right_path->nodes[0], right_ii);
4124 	} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
4125 		if (sctx->cur_inode_new_gen) {
4126 			sctx->cur_inode_gen = right_gen;
4127 			sctx->cur_inode_new = 0;
4128 			sctx->cur_inode_deleted = 1;
4129 			sctx->cur_inode_size = btrfs_inode_size(
4130 					sctx->right_path->nodes[0], right_ii);
4131 			sctx->cur_inode_mode = btrfs_inode_mode(
4132 					sctx->right_path->nodes[0], right_ii);
4133 			ret = process_all_refs(sctx,
4134 					BTRFS_COMPARE_TREE_DELETED);
4135 			if (ret < 0)
4136 				goto out;
4137 
4138 			sctx->cur_inode_gen = left_gen;
4139 			sctx->cur_inode_new = 1;
4140 			sctx->cur_inode_deleted = 0;
4141 			sctx->cur_inode_size = btrfs_inode_size(
4142 					sctx->left_path->nodes[0], left_ii);
4143 			sctx->cur_inode_mode = btrfs_inode_mode(
4144 					sctx->left_path->nodes[0], left_ii);
4145 			ret = send_create_inode(sctx, sctx->left_path,
4146 					sctx->cmp_key);
4147 			if (ret < 0)
4148 				goto out;
4149 
4150 			ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
4151 			if (ret < 0)
4152 				goto out;
4153 			ret = process_all_extents(sctx);
4154 			if (ret < 0)
4155 				goto out;
4156 			ret = process_all_new_xattrs(sctx);
4157 			if (ret < 0)
4158 				goto out;
4159 		} else {
4160 			sctx->cur_inode_gen = left_gen;
4161 			sctx->cur_inode_new = 0;
4162 			sctx->cur_inode_new_gen = 0;
4163 			sctx->cur_inode_deleted = 0;
4164 			sctx->cur_inode_size = btrfs_inode_size(
4165 					sctx->left_path->nodes[0], left_ii);
4166 			sctx->cur_inode_mode = btrfs_inode_mode(
4167 					sctx->left_path->nodes[0], left_ii);
4168 		}
4169 	}
4170 
4171 out:
4172 	return ret;
4173 }
4174 
4175 static int changed_ref(struct send_ctx *sctx,
4176 		       enum btrfs_compare_tree_result result)
4177 {
4178 	int ret = 0;
4179 
4180 	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4181 
4182 	if (!sctx->cur_inode_new_gen &&
4183 	    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
4184 		if (result == BTRFS_COMPARE_TREE_NEW)
4185 			ret = record_new_ref(sctx);
4186 		else if (result == BTRFS_COMPARE_TREE_DELETED)
4187 			ret = record_deleted_ref(sctx);
4188 		else if (result == BTRFS_COMPARE_TREE_CHANGED)
4189 			ret = record_changed_ref(sctx);
4190 	}
4191 
4192 	return ret;
4193 }
4194 
4195 static int changed_xattr(struct send_ctx *sctx,
4196 			 enum btrfs_compare_tree_result result)
4197 {
4198 	int ret = 0;
4199 
4200 	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4201 
4202 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4203 		if (result == BTRFS_COMPARE_TREE_NEW)
4204 			ret = process_new_xattr(sctx);
4205 		else if (result == BTRFS_COMPARE_TREE_DELETED)
4206 			ret = process_deleted_xattr(sctx);
4207 		else if (result == BTRFS_COMPARE_TREE_CHANGED)
4208 			ret = process_changed_xattr(sctx);
4209 	}
4210 
4211 	return ret;
4212 }
4213 
4214 static int changed_extent(struct send_ctx *sctx,
4215 			  enum btrfs_compare_tree_result result)
4216 {
4217 	int ret = 0;
4218 
4219 	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
4220 
4221 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
4222 		if (result != BTRFS_COMPARE_TREE_DELETED)
4223 			ret = process_extent(sctx, sctx->left_path,
4224 					sctx->cmp_key);
4225 	}
4226 
4227 	return ret;
4228 }
4229 
4230 
4231 static int changed_cb(struct btrfs_root *left_root,
4232 		      struct btrfs_root *right_root,
4233 		      struct btrfs_path *left_path,
4234 		      struct btrfs_path *right_path,
4235 		      struct btrfs_key *key,
4236 		      enum btrfs_compare_tree_result result,
4237 		      void *ctx)
4238 {
4239 	int ret = 0;
4240 	struct send_ctx *sctx = ctx;
4241 
4242 	sctx->left_path = left_path;
4243 	sctx->right_path = right_path;
4244 	sctx->cmp_key = key;
4245 
4246 	ret = finish_inode_if_needed(sctx, 0);
4247 	if (ret < 0)
4248 		goto out;
4249 
4250 	if (key->type == BTRFS_INODE_ITEM_KEY)
4251 		ret = changed_inode(sctx, result);
4252 	else if (key->type == BTRFS_INODE_REF_KEY)
4253 		ret = changed_ref(sctx, result);
4254 	else if (key->type == BTRFS_XATTR_ITEM_KEY)
4255 		ret = changed_xattr(sctx, result);
4256 	else if (key->type == BTRFS_EXTENT_DATA_KEY)
4257 		ret = changed_extent(sctx, result);
4258 
4259 out:
4260 	return ret;
4261 }
4262 
4263 static int full_send_tree(struct send_ctx *sctx)
4264 {
4265 	int ret;
4266 	struct btrfs_trans_handle *trans = NULL;
4267 	struct btrfs_root *send_root = sctx->send_root;
4268 	struct btrfs_key key;
4269 	struct btrfs_key found_key;
4270 	struct btrfs_path *path;
4271 	struct extent_buffer *eb;
4272 	int slot;
4273 	u64 start_ctransid;
4274 	u64 ctransid;
4275 
4276 	path = alloc_path_for_send();
4277 	if (!path)
4278 		return -ENOMEM;
4279 
4280 	spin_lock(&send_root->root_times_lock);
4281 	start_ctransid = btrfs_root_ctransid(&send_root->root_item);
4282 	spin_unlock(&send_root->root_times_lock);
4283 
4284 	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
4285 	key.type = BTRFS_INODE_ITEM_KEY;
4286 	key.offset = 0;
4287 
4288 join_trans:
4289 	/*
4290 	 * We need to make sure the transaction does not get committed
4291 	 * while we do anything on commit roots. Join a transaction to prevent
4292 	 * this.
4293 	 */
4294 	trans = btrfs_join_transaction(send_root);
4295 	if (IS_ERR(trans)) {
4296 		ret = PTR_ERR(trans);
4297 		trans = NULL;
4298 		goto out;
4299 	}
4300 
4301 	/*
4302 	 * Make sure the tree has not changed
4303 	 */
4304 	spin_lock(&send_root->root_times_lock);
4305 	ctransid = btrfs_root_ctransid(&send_root->root_item);
4306 	spin_unlock(&send_root->root_times_lock);
4307 
4308 	if (ctransid != start_ctransid) {
4309 		WARN(1, KERN_WARNING "btrfs: the root that you're trying to "
4310 				     "send was modified in between. This is "
4311 				     "probably a bug.\n");
4312 		ret = -EIO;
4313 		goto out;
4314 	}
4315 
4316 	ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
4317 	if (ret < 0)
4318 		goto out;
4319 	if (ret)
4320 		goto out_finish;
4321 
4322 	while (1) {
4323 		/*
4324 		 * When someone want to commit while we iterate, end the
4325 		 * joined transaction and rejoin.
4326 		 */
4327 		if (btrfs_should_end_transaction(trans, send_root)) {
4328 			ret = btrfs_end_transaction(trans, send_root);
4329 			trans = NULL;
4330 			if (ret < 0)
4331 				goto out;
4332 			btrfs_release_path(path);
4333 			goto join_trans;
4334 		}
4335 
4336 		eb = path->nodes[0];
4337 		slot = path->slots[0];
4338 		btrfs_item_key_to_cpu(eb, &found_key, slot);
4339 
4340 		ret = changed_cb(send_root, NULL, path, NULL,
4341 				&found_key, BTRFS_COMPARE_TREE_NEW, sctx);
4342 		if (ret < 0)
4343 			goto out;
4344 
4345 		key.objectid = found_key.objectid;
4346 		key.type = found_key.type;
4347 		key.offset = found_key.offset + 1;
4348 
4349 		ret = btrfs_next_item(send_root, path);
4350 		if (ret < 0)
4351 			goto out;
4352 		if (ret) {
4353 			ret  = 0;
4354 			break;
4355 		}
4356 	}
4357 
4358 out_finish:
4359 	ret = finish_inode_if_needed(sctx, 1);
4360 
4361 out:
4362 	btrfs_free_path(path);
4363 	if (trans) {
4364 		if (!ret)
4365 			ret = btrfs_end_transaction(trans, send_root);
4366 		else
4367 			btrfs_end_transaction(trans, send_root);
4368 	}
4369 	return ret;
4370 }
4371 
4372 static int send_subvol(struct send_ctx *sctx)
4373 {
4374 	int ret;
4375 
4376 	ret = send_header(sctx);
4377 	if (ret < 0)
4378 		goto out;
4379 
4380 	ret = send_subvol_begin(sctx);
4381 	if (ret < 0)
4382 		goto out;
4383 
4384 	if (sctx->parent_root) {
4385 		ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root,
4386 				changed_cb, sctx);
4387 		if (ret < 0)
4388 			goto out;
4389 		ret = finish_inode_if_needed(sctx, 1);
4390 		if (ret < 0)
4391 			goto out;
4392 	} else {
4393 		ret = full_send_tree(sctx);
4394 		if (ret < 0)
4395 			goto out;
4396 	}
4397 
4398 out:
4399 	if (!ret)
4400 		ret = close_cur_inode_file(sctx);
4401 	else
4402 		close_cur_inode_file(sctx);
4403 
4404 	free_recorded_refs(sctx);
4405 	return ret;
4406 }
4407 
4408 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4409 {
4410 	int ret = 0;
4411 	struct btrfs_root *send_root;
4412 	struct btrfs_root *clone_root;
4413 	struct btrfs_fs_info *fs_info;
4414 	struct btrfs_ioctl_send_args *arg = NULL;
4415 	struct btrfs_key key;
4416 	struct file *filp = NULL;
4417 	struct send_ctx *sctx = NULL;
4418 	u32 i;
4419 	u64 *clone_sources_tmp = NULL;
4420 
4421 	if (!capable(CAP_SYS_ADMIN))
4422 		return -EPERM;
4423 
4424 	send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root;
4425 	fs_info = send_root->fs_info;
4426 
4427 	arg = memdup_user(arg_, sizeof(*arg));
4428 	if (IS_ERR(arg)) {
4429 		ret = PTR_ERR(arg);
4430 		arg = NULL;
4431 		goto out;
4432 	}
4433 
4434 	if (!access_ok(VERIFY_READ, arg->clone_sources,
4435 			sizeof(*arg->clone_sources *
4436 			arg->clone_sources_count))) {
4437 		ret = -EFAULT;
4438 		goto out;
4439 	}
4440 
4441 	sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
4442 	if (!sctx) {
4443 		ret = -ENOMEM;
4444 		goto out;
4445 	}
4446 
4447 	INIT_LIST_HEAD(&sctx->new_refs);
4448 	INIT_LIST_HEAD(&sctx->deleted_refs);
4449 	INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
4450 	INIT_LIST_HEAD(&sctx->name_cache_list);
4451 
4452 	sctx->send_filp = fget(arg->send_fd);
4453 	if (IS_ERR(sctx->send_filp)) {
4454 		ret = PTR_ERR(sctx->send_filp);
4455 		goto out;
4456 	}
4457 
4458 	sctx->mnt = mnt_file->f_path.mnt;
4459 
4460 	sctx->send_root = send_root;
4461 	sctx->clone_roots_cnt = arg->clone_sources_count;
4462 
4463 	sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
4464 	sctx->send_buf = vmalloc(sctx->send_max_size);
4465 	if (!sctx->send_buf) {
4466 		ret = -ENOMEM;
4467 		goto out;
4468 	}
4469 
4470 	sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
4471 	if (!sctx->read_buf) {
4472 		ret = -ENOMEM;
4473 		goto out;
4474 	}
4475 
4476 	sctx->clone_roots = vzalloc(sizeof(struct clone_root) *
4477 			(arg->clone_sources_count + 1));
4478 	if (!sctx->clone_roots) {
4479 		ret = -ENOMEM;
4480 		goto out;
4481 	}
4482 
4483 	if (arg->clone_sources_count) {
4484 		clone_sources_tmp = vmalloc(arg->clone_sources_count *
4485 				sizeof(*arg->clone_sources));
4486 		if (!clone_sources_tmp) {
4487 			ret = -ENOMEM;
4488 			goto out;
4489 		}
4490 
4491 		ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
4492 				arg->clone_sources_count *
4493 				sizeof(*arg->clone_sources));
4494 		if (ret) {
4495 			ret = -EFAULT;
4496 			goto out;
4497 		}
4498 
4499 		for (i = 0; i < arg->clone_sources_count; i++) {
4500 			key.objectid = clone_sources_tmp[i];
4501 			key.type = BTRFS_ROOT_ITEM_KEY;
4502 			key.offset = (u64)-1;
4503 			clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
4504 			if (!clone_root) {
4505 				ret = -EINVAL;
4506 				goto out;
4507 			}
4508 			if (IS_ERR(clone_root)) {
4509 				ret = PTR_ERR(clone_root);
4510 				goto out;
4511 			}
4512 			sctx->clone_roots[i].root = clone_root;
4513 		}
4514 		vfree(clone_sources_tmp);
4515 		clone_sources_tmp = NULL;
4516 	}
4517 
4518 	if (arg->parent_root) {
4519 		key.objectid = arg->parent_root;
4520 		key.type = BTRFS_ROOT_ITEM_KEY;
4521 		key.offset = (u64)-1;
4522 		sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
4523 		if (!sctx->parent_root) {
4524 			ret = -EINVAL;
4525 			goto out;
4526 		}
4527 	}
4528 
4529 	/*
4530 	 * Clones from send_root are allowed, but only if the clone source
4531 	 * is behind the current send position. This is checked while searching
4532 	 * for possible clone sources.
4533 	 */
4534 	sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
4535 
4536 	/* We do a bsearch later */
4537 	sort(sctx->clone_roots, sctx->clone_roots_cnt,
4538 			sizeof(*sctx->clone_roots), __clone_root_cmp_sort,
4539 			NULL);
4540 
4541 	ret = send_subvol(sctx);
4542 	if (ret < 0)
4543 		goto out;
4544 
4545 	ret = begin_cmd(sctx, BTRFS_SEND_C_END);
4546 	if (ret < 0)
4547 		goto out;
4548 	ret = send_cmd(sctx);
4549 	if (ret < 0)
4550 		goto out;
4551 
4552 out:
4553 	if (filp)
4554 		fput(filp);
4555 	kfree(arg);
4556 	vfree(clone_sources_tmp);
4557 
4558 	if (sctx) {
4559 		if (sctx->send_filp)
4560 			fput(sctx->send_filp);
4561 
4562 		vfree(sctx->clone_roots);
4563 		vfree(sctx->send_buf);
4564 		vfree(sctx->read_buf);
4565 
4566 		name_cache_free(sctx);
4567 
4568 		kfree(sctx);
4569 	}
4570 
4571 	return ret;
4572 }
4573