1 /* 2 * Copyright (C) 2012 Alexander Block. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/bsearch.h> 20 #include <linux/fs.h> 21 #include <linux/file.h> 22 #include <linux/sort.h> 23 #include <linux/mount.h> 24 #include <linux/xattr.h> 25 #include <linux/posix_acl_xattr.h> 26 #include <linux/radix-tree.h> 27 #include <linux/vmalloc.h> 28 #include <linux/string.h> 29 30 #include "send.h" 31 #include "backref.h" 32 #include "hash.h" 33 #include "locking.h" 34 #include "disk-io.h" 35 #include "btrfs_inode.h" 36 #include "transaction.h" 37 38 static int g_verbose = 0; 39 40 #define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) 41 42 /* 43 * A fs_path is a helper to dynamically build path names with unknown size. 44 * It reallocates the internal buffer on demand. 45 * It allows fast adding of path elements on the right side (normal path) and 46 * fast adding to the left side (reversed path). A reversed path can also be 47 * unreversed if needed. 48 */ 49 struct fs_path { 50 union { 51 struct { 52 char *start; 53 char *end; 54 55 char *buf; 56 unsigned short buf_len:15; 57 unsigned short reversed:1; 58 char inline_buf[]; 59 }; 60 /* 61 * Average path length does not exceed 200 bytes, we'll have 62 * better packing in the slab and higher chance to satisfy 63 * a allocation later during send. 64 */ 65 char pad[256]; 66 }; 67 }; 68 #define FS_PATH_INLINE_SIZE \ 69 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) 70 71 72 /* reused for each extent */ 73 struct clone_root { 74 struct btrfs_root *root; 75 u64 ino; 76 u64 offset; 77 78 u64 found_refs; 79 }; 80 81 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 82 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) 83 84 struct send_ctx { 85 struct file *send_filp; 86 loff_t send_off; 87 char *send_buf; 88 u32 send_size; 89 u32 send_max_size; 90 u64 total_send_size; 91 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 92 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 93 94 struct btrfs_root *send_root; 95 struct btrfs_root *parent_root; 96 struct clone_root *clone_roots; 97 int clone_roots_cnt; 98 99 /* current state of the compare_tree call */ 100 struct btrfs_path *left_path; 101 struct btrfs_path *right_path; 102 struct btrfs_key *cmp_key; 103 104 /* 105 * infos of the currently processed inode. In case of deleted inodes, 106 * these are the values from the deleted inode. 107 */ 108 u64 cur_ino; 109 u64 cur_inode_gen; 110 int cur_inode_new; 111 int cur_inode_new_gen; 112 int cur_inode_deleted; 113 u64 cur_inode_size; 114 u64 cur_inode_mode; 115 u64 cur_inode_rdev; 116 u64 cur_inode_last_extent; 117 118 u64 send_progress; 119 120 struct list_head new_refs; 121 struct list_head deleted_refs; 122 123 struct radix_tree_root name_cache; 124 struct list_head name_cache_list; 125 int name_cache_size; 126 127 struct file_ra_state ra; 128 129 char *read_buf; 130 131 /* 132 * We process inodes by their increasing order, so if before an 133 * incremental send we reverse the parent/child relationship of 134 * directories such that a directory with a lower inode number was 135 * the parent of a directory with a higher inode number, and the one 136 * becoming the new parent got renamed too, we can't rename/move the 137 * directory with lower inode number when we finish processing it - we 138 * must process the directory with higher inode number first, then 139 * rename/move it and then rename/move the directory with lower inode 140 * number. Example follows. 141 * 142 * Tree state when the first send was performed: 143 * 144 * . 145 * |-- a (ino 257) 146 * |-- b (ino 258) 147 * | 148 * | 149 * |-- c (ino 259) 150 * | |-- d (ino 260) 151 * | 152 * |-- c2 (ino 261) 153 * 154 * Tree state when the second (incremental) send is performed: 155 * 156 * . 157 * |-- a (ino 257) 158 * |-- b (ino 258) 159 * |-- c2 (ino 261) 160 * |-- d2 (ino 260) 161 * |-- cc (ino 259) 162 * 163 * The sequence of steps that lead to the second state was: 164 * 165 * mv /a/b/c/d /a/b/c2/d2 166 * mv /a/b/c /a/b/c2/d2/cc 167 * 168 * "c" has lower inode number, but we can't move it (2nd mv operation) 169 * before we move "d", which has higher inode number. 170 * 171 * So we just memorize which move/rename operations must be performed 172 * later when their respective parent is processed and moved/renamed. 173 */ 174 175 /* Indexed by parent directory inode number. */ 176 struct rb_root pending_dir_moves; 177 178 /* 179 * Reverse index, indexed by the inode number of a directory that 180 * is waiting for the move/rename of its immediate parent before its 181 * own move/rename can be performed. 182 */ 183 struct rb_root waiting_dir_moves; 184 185 /* 186 * A directory that is going to be rm'ed might have a child directory 187 * which is in the pending directory moves index above. In this case, 188 * the directory can only be removed after the move/rename of its child 189 * is performed. Example: 190 * 191 * Parent snapshot: 192 * 193 * . (ino 256) 194 * |-- a/ (ino 257) 195 * |-- b/ (ino 258) 196 * |-- c/ (ino 259) 197 * | |-- x/ (ino 260) 198 * | 199 * |-- y/ (ino 261) 200 * 201 * Send snapshot: 202 * 203 * . (ino 256) 204 * |-- a/ (ino 257) 205 * |-- b/ (ino 258) 206 * |-- YY/ (ino 261) 207 * |-- x/ (ino 260) 208 * 209 * Sequence of steps that lead to the send snapshot: 210 * rm -f /a/b/c/foo.txt 211 * mv /a/b/y /a/b/YY 212 * mv /a/b/c/x /a/b/YY 213 * rmdir /a/b/c 214 * 215 * When the child is processed, its move/rename is delayed until its 216 * parent is processed (as explained above), but all other operations 217 * like update utimes, chown, chgrp, etc, are performed and the paths 218 * that it uses for those operations must use the orphanized name of 219 * its parent (the directory we're going to rm later), so we need to 220 * memorize that name. 221 * 222 * Indexed by the inode number of the directory to be deleted. 223 */ 224 struct rb_root orphan_dirs; 225 }; 226 227 struct pending_dir_move { 228 struct rb_node node; 229 struct list_head list; 230 u64 parent_ino; 231 u64 ino; 232 u64 gen; 233 struct list_head update_refs; 234 }; 235 236 struct waiting_dir_move { 237 struct rb_node node; 238 u64 ino; 239 /* 240 * There might be some directory that could not be removed because it 241 * was waiting for this directory inode to be moved first. Therefore 242 * after this directory is moved, we can try to rmdir the ino rmdir_ino. 243 */ 244 u64 rmdir_ino; 245 }; 246 247 struct orphan_dir_info { 248 struct rb_node node; 249 u64 ino; 250 u64 gen; 251 }; 252 253 struct name_cache_entry { 254 struct list_head list; 255 /* 256 * radix_tree has only 32bit entries but we need to handle 64bit inums. 257 * We use the lower 32bit of the 64bit inum to store it in the tree. If 258 * more then one inum would fall into the same entry, we use radix_list 259 * to store the additional entries. radix_list is also used to store 260 * entries where two entries have the same inum but different 261 * generations. 262 */ 263 struct list_head radix_list; 264 u64 ino; 265 u64 gen; 266 u64 parent_ino; 267 u64 parent_gen; 268 int ret; 269 int need_later_update; 270 int name_len; 271 char name[]; 272 }; 273 274 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 275 276 static struct waiting_dir_move * 277 get_waiting_dir_move(struct send_ctx *sctx, u64 ino); 278 279 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); 280 281 static int need_send_hole(struct send_ctx *sctx) 282 { 283 return (sctx->parent_root && !sctx->cur_inode_new && 284 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && 285 S_ISREG(sctx->cur_inode_mode)); 286 } 287 288 static void fs_path_reset(struct fs_path *p) 289 { 290 if (p->reversed) { 291 p->start = p->buf + p->buf_len - 1; 292 p->end = p->start; 293 *p->start = 0; 294 } else { 295 p->start = p->buf; 296 p->end = p->start; 297 *p->start = 0; 298 } 299 } 300 301 static struct fs_path *fs_path_alloc(void) 302 { 303 struct fs_path *p; 304 305 p = kmalloc(sizeof(*p), GFP_NOFS); 306 if (!p) 307 return NULL; 308 p->reversed = 0; 309 p->buf = p->inline_buf; 310 p->buf_len = FS_PATH_INLINE_SIZE; 311 fs_path_reset(p); 312 return p; 313 } 314 315 static struct fs_path *fs_path_alloc_reversed(void) 316 { 317 struct fs_path *p; 318 319 p = fs_path_alloc(); 320 if (!p) 321 return NULL; 322 p->reversed = 1; 323 fs_path_reset(p); 324 return p; 325 } 326 327 static void fs_path_free(struct fs_path *p) 328 { 329 if (!p) 330 return; 331 if (p->buf != p->inline_buf) 332 kfree(p->buf); 333 kfree(p); 334 } 335 336 static int fs_path_len(struct fs_path *p) 337 { 338 return p->end - p->start; 339 } 340 341 static int fs_path_ensure_buf(struct fs_path *p, int len) 342 { 343 char *tmp_buf; 344 int path_len; 345 int old_buf_len; 346 347 len++; 348 349 if (p->buf_len >= len) 350 return 0; 351 352 if (len > PATH_MAX) { 353 WARN_ON(1); 354 return -ENOMEM; 355 } 356 357 path_len = p->end - p->start; 358 old_buf_len = p->buf_len; 359 360 /* 361 * First time the inline_buf does not suffice 362 */ 363 if (p->buf == p->inline_buf) 364 tmp_buf = kmalloc(len, GFP_NOFS); 365 else 366 tmp_buf = krealloc(p->buf, len, GFP_NOFS); 367 if (!tmp_buf) 368 return -ENOMEM; 369 p->buf = tmp_buf; 370 /* 371 * The real size of the buffer is bigger, this will let the fast path 372 * happen most of the time 373 */ 374 p->buf_len = ksize(p->buf); 375 376 if (p->reversed) { 377 tmp_buf = p->buf + old_buf_len - path_len - 1; 378 p->end = p->buf + p->buf_len - 1; 379 p->start = p->end - path_len; 380 memmove(p->start, tmp_buf, path_len + 1); 381 } else { 382 p->start = p->buf; 383 p->end = p->start + path_len; 384 } 385 return 0; 386 } 387 388 static int fs_path_prepare_for_add(struct fs_path *p, int name_len, 389 char **prepared) 390 { 391 int ret; 392 int new_len; 393 394 new_len = p->end - p->start + name_len; 395 if (p->start != p->end) 396 new_len++; 397 ret = fs_path_ensure_buf(p, new_len); 398 if (ret < 0) 399 goto out; 400 401 if (p->reversed) { 402 if (p->start != p->end) 403 *--p->start = '/'; 404 p->start -= name_len; 405 *prepared = p->start; 406 } else { 407 if (p->start != p->end) 408 *p->end++ = '/'; 409 *prepared = p->end; 410 p->end += name_len; 411 *p->end = 0; 412 } 413 414 out: 415 return ret; 416 } 417 418 static int fs_path_add(struct fs_path *p, const char *name, int name_len) 419 { 420 int ret; 421 char *prepared; 422 423 ret = fs_path_prepare_for_add(p, name_len, &prepared); 424 if (ret < 0) 425 goto out; 426 memcpy(prepared, name, name_len); 427 428 out: 429 return ret; 430 } 431 432 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) 433 { 434 int ret; 435 char *prepared; 436 437 ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); 438 if (ret < 0) 439 goto out; 440 memcpy(prepared, p2->start, p2->end - p2->start); 441 442 out: 443 return ret; 444 } 445 446 static int fs_path_add_from_extent_buffer(struct fs_path *p, 447 struct extent_buffer *eb, 448 unsigned long off, int len) 449 { 450 int ret; 451 char *prepared; 452 453 ret = fs_path_prepare_for_add(p, len, &prepared); 454 if (ret < 0) 455 goto out; 456 457 read_extent_buffer(eb, prepared, off, len); 458 459 out: 460 return ret; 461 } 462 463 static int fs_path_copy(struct fs_path *p, struct fs_path *from) 464 { 465 int ret; 466 467 p->reversed = from->reversed; 468 fs_path_reset(p); 469 470 ret = fs_path_add_path(p, from); 471 472 return ret; 473 } 474 475 476 static void fs_path_unreverse(struct fs_path *p) 477 { 478 char *tmp; 479 int len; 480 481 if (!p->reversed) 482 return; 483 484 tmp = p->start; 485 len = p->end - p->start; 486 p->start = p->buf; 487 p->end = p->start + len; 488 memmove(p->start, tmp, len + 1); 489 p->reversed = 0; 490 } 491 492 static struct btrfs_path *alloc_path_for_send(void) 493 { 494 struct btrfs_path *path; 495 496 path = btrfs_alloc_path(); 497 if (!path) 498 return NULL; 499 path->search_commit_root = 1; 500 path->skip_locking = 1; 501 path->need_commit_sem = 1; 502 return path; 503 } 504 505 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 506 { 507 int ret; 508 mm_segment_t old_fs; 509 u32 pos = 0; 510 511 old_fs = get_fs(); 512 set_fs(KERNEL_DS); 513 514 while (pos < len) { 515 ret = vfs_write(filp, (char *)buf + pos, len - pos, off); 516 /* TODO handle that correctly */ 517 /*if (ret == -ERESTARTSYS) { 518 continue; 519 }*/ 520 if (ret < 0) 521 goto out; 522 if (ret == 0) { 523 ret = -EIO; 524 goto out; 525 } 526 pos += ret; 527 } 528 529 ret = 0; 530 531 out: 532 set_fs(old_fs); 533 return ret; 534 } 535 536 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) 537 { 538 struct btrfs_tlv_header *hdr; 539 int total_len = sizeof(*hdr) + len; 540 int left = sctx->send_max_size - sctx->send_size; 541 542 if (unlikely(left < total_len)) 543 return -EOVERFLOW; 544 545 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); 546 hdr->tlv_type = cpu_to_le16(attr); 547 hdr->tlv_len = cpu_to_le16(len); 548 memcpy(hdr + 1, data, len); 549 sctx->send_size += total_len; 550 551 return 0; 552 } 553 554 #define TLV_PUT_DEFINE_INT(bits) \ 555 static int tlv_put_u##bits(struct send_ctx *sctx, \ 556 u##bits attr, u##bits value) \ 557 { \ 558 __le##bits __tmp = cpu_to_le##bits(value); \ 559 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ 560 } 561 562 TLV_PUT_DEFINE_INT(64) 563 564 static int tlv_put_string(struct send_ctx *sctx, u16 attr, 565 const char *str, int len) 566 { 567 if (len == -1) 568 len = strlen(str); 569 return tlv_put(sctx, attr, str, len); 570 } 571 572 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, 573 const u8 *uuid) 574 { 575 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 576 } 577 578 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 579 struct extent_buffer *eb, 580 struct btrfs_timespec *ts) 581 { 582 struct btrfs_timespec bts; 583 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); 584 return tlv_put(sctx, attr, &bts, sizeof(bts)); 585 } 586 587 588 #define TLV_PUT(sctx, attrtype, attrlen, data) \ 589 do { \ 590 ret = tlv_put(sctx, attrtype, attrlen, data); \ 591 if (ret < 0) \ 592 goto tlv_put_failure; \ 593 } while (0) 594 595 #define TLV_PUT_INT(sctx, attrtype, bits, value) \ 596 do { \ 597 ret = tlv_put_u##bits(sctx, attrtype, value); \ 598 if (ret < 0) \ 599 goto tlv_put_failure; \ 600 } while (0) 601 602 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) 603 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) 604 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) 605 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) 606 #define TLV_PUT_STRING(sctx, attrtype, str, len) \ 607 do { \ 608 ret = tlv_put_string(sctx, attrtype, str, len); \ 609 if (ret < 0) \ 610 goto tlv_put_failure; \ 611 } while (0) 612 #define TLV_PUT_PATH(sctx, attrtype, p) \ 613 do { \ 614 ret = tlv_put_string(sctx, attrtype, p->start, \ 615 p->end - p->start); \ 616 if (ret < 0) \ 617 goto tlv_put_failure; \ 618 } while(0) 619 #define TLV_PUT_UUID(sctx, attrtype, uuid) \ 620 do { \ 621 ret = tlv_put_uuid(sctx, attrtype, uuid); \ 622 if (ret < 0) \ 623 goto tlv_put_failure; \ 624 } while (0) 625 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 626 do { \ 627 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 628 if (ret < 0) \ 629 goto tlv_put_failure; \ 630 } while (0) 631 632 static int send_header(struct send_ctx *sctx) 633 { 634 struct btrfs_stream_header hdr; 635 636 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); 637 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); 638 639 return write_buf(sctx->send_filp, &hdr, sizeof(hdr), 640 &sctx->send_off); 641 } 642 643 /* 644 * For each command/item we want to send to userspace, we call this function. 645 */ 646 static int begin_cmd(struct send_ctx *sctx, int cmd) 647 { 648 struct btrfs_cmd_header *hdr; 649 650 if (WARN_ON(!sctx->send_buf)) 651 return -EINVAL; 652 653 BUG_ON(sctx->send_size); 654 655 sctx->send_size += sizeof(*hdr); 656 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 657 hdr->cmd = cpu_to_le16(cmd); 658 659 return 0; 660 } 661 662 static int send_cmd(struct send_ctx *sctx) 663 { 664 int ret; 665 struct btrfs_cmd_header *hdr; 666 u32 crc; 667 668 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 669 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 670 hdr->crc = 0; 671 672 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 673 hdr->crc = cpu_to_le32(crc); 674 675 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 676 &sctx->send_off); 677 678 sctx->total_send_size += sctx->send_size; 679 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; 680 sctx->send_size = 0; 681 682 return ret; 683 } 684 685 /* 686 * Sends a move instruction to user space 687 */ 688 static int send_rename(struct send_ctx *sctx, 689 struct fs_path *from, struct fs_path *to) 690 { 691 int ret; 692 693 verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); 694 695 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); 696 if (ret < 0) 697 goto out; 698 699 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); 700 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); 701 702 ret = send_cmd(sctx); 703 704 tlv_put_failure: 705 out: 706 return ret; 707 } 708 709 /* 710 * Sends a link instruction to user space 711 */ 712 static int send_link(struct send_ctx *sctx, 713 struct fs_path *path, struct fs_path *lnk) 714 { 715 int ret; 716 717 verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); 718 719 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); 720 if (ret < 0) 721 goto out; 722 723 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 724 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); 725 726 ret = send_cmd(sctx); 727 728 tlv_put_failure: 729 out: 730 return ret; 731 } 732 733 /* 734 * Sends an unlink instruction to user space 735 */ 736 static int send_unlink(struct send_ctx *sctx, struct fs_path *path) 737 { 738 int ret; 739 740 verbose_printk("btrfs: send_unlink %s\n", path->start); 741 742 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); 743 if (ret < 0) 744 goto out; 745 746 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 747 748 ret = send_cmd(sctx); 749 750 tlv_put_failure: 751 out: 752 return ret; 753 } 754 755 /* 756 * Sends a rmdir instruction to user space 757 */ 758 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) 759 { 760 int ret; 761 762 verbose_printk("btrfs: send_rmdir %s\n", path->start); 763 764 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); 765 if (ret < 0) 766 goto out; 767 768 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 769 770 ret = send_cmd(sctx); 771 772 tlv_put_failure: 773 out: 774 return ret; 775 } 776 777 /* 778 * Helper function to retrieve some fields from an inode item. 779 */ 780 static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path, 781 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid, 782 u64 *gid, u64 *rdev) 783 { 784 int ret; 785 struct btrfs_inode_item *ii; 786 struct btrfs_key key; 787 788 key.objectid = ino; 789 key.type = BTRFS_INODE_ITEM_KEY; 790 key.offset = 0; 791 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 792 if (ret) { 793 if (ret > 0) 794 ret = -ENOENT; 795 return ret; 796 } 797 798 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 799 struct btrfs_inode_item); 800 if (size) 801 *size = btrfs_inode_size(path->nodes[0], ii); 802 if (gen) 803 *gen = btrfs_inode_generation(path->nodes[0], ii); 804 if (mode) 805 *mode = btrfs_inode_mode(path->nodes[0], ii); 806 if (uid) 807 *uid = btrfs_inode_uid(path->nodes[0], ii); 808 if (gid) 809 *gid = btrfs_inode_gid(path->nodes[0], ii); 810 if (rdev) 811 *rdev = btrfs_inode_rdev(path->nodes[0], ii); 812 813 return ret; 814 } 815 816 static int get_inode_info(struct btrfs_root *root, 817 u64 ino, u64 *size, u64 *gen, 818 u64 *mode, u64 *uid, u64 *gid, 819 u64 *rdev) 820 { 821 struct btrfs_path *path; 822 int ret; 823 824 path = alloc_path_for_send(); 825 if (!path) 826 return -ENOMEM; 827 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid, 828 rdev); 829 btrfs_free_path(path); 830 return ret; 831 } 832 833 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, 834 struct fs_path *p, 835 void *ctx); 836 837 /* 838 * Helper function to iterate the entries in ONE btrfs_inode_ref or 839 * btrfs_inode_extref. 840 * The iterate callback may return a non zero value to stop iteration. This can 841 * be a negative value for error codes or 1 to simply stop it. 842 * 843 * path must point to the INODE_REF or INODE_EXTREF when called. 844 */ 845 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, 846 struct btrfs_key *found_key, int resolve, 847 iterate_inode_ref_t iterate, void *ctx) 848 { 849 struct extent_buffer *eb = path->nodes[0]; 850 struct btrfs_item *item; 851 struct btrfs_inode_ref *iref; 852 struct btrfs_inode_extref *extref; 853 struct btrfs_path *tmp_path; 854 struct fs_path *p; 855 u32 cur = 0; 856 u32 total; 857 int slot = path->slots[0]; 858 u32 name_len; 859 char *start; 860 int ret = 0; 861 int num = 0; 862 int index; 863 u64 dir; 864 unsigned long name_off; 865 unsigned long elem_size; 866 unsigned long ptr; 867 868 p = fs_path_alloc_reversed(); 869 if (!p) 870 return -ENOMEM; 871 872 tmp_path = alloc_path_for_send(); 873 if (!tmp_path) { 874 fs_path_free(p); 875 return -ENOMEM; 876 } 877 878 879 if (found_key->type == BTRFS_INODE_REF_KEY) { 880 ptr = (unsigned long)btrfs_item_ptr(eb, slot, 881 struct btrfs_inode_ref); 882 item = btrfs_item_nr(slot); 883 total = btrfs_item_size(eb, item); 884 elem_size = sizeof(*iref); 885 } else { 886 ptr = btrfs_item_ptr_offset(eb, slot); 887 total = btrfs_item_size_nr(eb, slot); 888 elem_size = sizeof(*extref); 889 } 890 891 while (cur < total) { 892 fs_path_reset(p); 893 894 if (found_key->type == BTRFS_INODE_REF_KEY) { 895 iref = (struct btrfs_inode_ref *)(ptr + cur); 896 name_len = btrfs_inode_ref_name_len(eb, iref); 897 name_off = (unsigned long)(iref + 1); 898 index = btrfs_inode_ref_index(eb, iref); 899 dir = found_key->offset; 900 } else { 901 extref = (struct btrfs_inode_extref *)(ptr + cur); 902 name_len = btrfs_inode_extref_name_len(eb, extref); 903 name_off = (unsigned long)&extref->name; 904 index = btrfs_inode_extref_index(eb, extref); 905 dir = btrfs_inode_extref_parent(eb, extref); 906 } 907 908 if (resolve) { 909 start = btrfs_ref_to_path(root, tmp_path, name_len, 910 name_off, eb, dir, 911 p->buf, p->buf_len); 912 if (IS_ERR(start)) { 913 ret = PTR_ERR(start); 914 goto out; 915 } 916 if (start < p->buf) { 917 /* overflow , try again with larger buffer */ 918 ret = fs_path_ensure_buf(p, 919 p->buf_len + p->buf - start); 920 if (ret < 0) 921 goto out; 922 start = btrfs_ref_to_path(root, tmp_path, 923 name_len, name_off, 924 eb, dir, 925 p->buf, p->buf_len); 926 if (IS_ERR(start)) { 927 ret = PTR_ERR(start); 928 goto out; 929 } 930 BUG_ON(start < p->buf); 931 } 932 p->start = start; 933 } else { 934 ret = fs_path_add_from_extent_buffer(p, eb, name_off, 935 name_len); 936 if (ret < 0) 937 goto out; 938 } 939 940 cur += elem_size + name_len; 941 ret = iterate(num, dir, index, p, ctx); 942 if (ret) 943 goto out; 944 num++; 945 } 946 947 out: 948 btrfs_free_path(tmp_path); 949 fs_path_free(p); 950 return ret; 951 } 952 953 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, 954 const char *name, int name_len, 955 const char *data, int data_len, 956 u8 type, void *ctx); 957 958 /* 959 * Helper function to iterate the entries in ONE btrfs_dir_item. 960 * The iterate callback may return a non zero value to stop iteration. This can 961 * be a negative value for error codes or 1 to simply stop it. 962 * 963 * path must point to the dir item when called. 964 */ 965 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, 966 struct btrfs_key *found_key, 967 iterate_dir_item_t iterate, void *ctx) 968 { 969 int ret = 0; 970 struct extent_buffer *eb; 971 struct btrfs_item *item; 972 struct btrfs_dir_item *di; 973 struct btrfs_key di_key; 974 char *buf = NULL; 975 const int buf_len = PATH_MAX; 976 u32 name_len; 977 u32 data_len; 978 u32 cur; 979 u32 len; 980 u32 total; 981 int slot; 982 int num; 983 u8 type; 984 985 buf = kmalloc(buf_len, GFP_NOFS); 986 if (!buf) { 987 ret = -ENOMEM; 988 goto out; 989 } 990 991 eb = path->nodes[0]; 992 slot = path->slots[0]; 993 item = btrfs_item_nr(slot); 994 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 995 cur = 0; 996 len = 0; 997 total = btrfs_item_size(eb, item); 998 999 num = 0; 1000 while (cur < total) { 1001 name_len = btrfs_dir_name_len(eb, di); 1002 data_len = btrfs_dir_data_len(eb, di); 1003 type = btrfs_dir_type(eb, di); 1004 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 1005 1006 /* 1007 * Path too long 1008 */ 1009 if (name_len + data_len > buf_len) { 1010 ret = -ENAMETOOLONG; 1011 goto out; 1012 } 1013 1014 read_extent_buffer(eb, buf, (unsigned long)(di + 1), 1015 name_len + data_len); 1016 1017 len = sizeof(*di) + name_len + data_len; 1018 di = (struct btrfs_dir_item *)((char *)di + len); 1019 cur += len; 1020 1021 ret = iterate(num, &di_key, buf, name_len, buf + name_len, 1022 data_len, type, ctx); 1023 if (ret < 0) 1024 goto out; 1025 if (ret) { 1026 ret = 0; 1027 goto out; 1028 } 1029 1030 num++; 1031 } 1032 1033 out: 1034 kfree(buf); 1035 return ret; 1036 } 1037 1038 static int __copy_first_ref(int num, u64 dir, int index, 1039 struct fs_path *p, void *ctx) 1040 { 1041 int ret; 1042 struct fs_path *pt = ctx; 1043 1044 ret = fs_path_copy(pt, p); 1045 if (ret < 0) 1046 return ret; 1047 1048 /* we want the first only */ 1049 return 1; 1050 } 1051 1052 /* 1053 * Retrieve the first path of an inode. If an inode has more then one 1054 * ref/hardlink, this is ignored. 1055 */ 1056 static int get_inode_path(struct btrfs_root *root, 1057 u64 ino, struct fs_path *path) 1058 { 1059 int ret; 1060 struct btrfs_key key, found_key; 1061 struct btrfs_path *p; 1062 1063 p = alloc_path_for_send(); 1064 if (!p) 1065 return -ENOMEM; 1066 1067 fs_path_reset(path); 1068 1069 key.objectid = ino; 1070 key.type = BTRFS_INODE_REF_KEY; 1071 key.offset = 0; 1072 1073 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); 1074 if (ret < 0) 1075 goto out; 1076 if (ret) { 1077 ret = 1; 1078 goto out; 1079 } 1080 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1081 if (found_key.objectid != ino || 1082 (found_key.type != BTRFS_INODE_REF_KEY && 1083 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1084 ret = -ENOENT; 1085 goto out; 1086 } 1087 1088 ret = iterate_inode_ref(root, p, &found_key, 1, 1089 __copy_first_ref, path); 1090 if (ret < 0) 1091 goto out; 1092 ret = 0; 1093 1094 out: 1095 btrfs_free_path(p); 1096 return ret; 1097 } 1098 1099 struct backref_ctx { 1100 struct send_ctx *sctx; 1101 1102 struct btrfs_path *path; 1103 /* number of total found references */ 1104 u64 found; 1105 1106 /* 1107 * used for clones found in send_root. clones found behind cur_objectid 1108 * and cur_offset are not considered as allowed clones. 1109 */ 1110 u64 cur_objectid; 1111 u64 cur_offset; 1112 1113 /* may be truncated in case it's the last extent in a file */ 1114 u64 extent_len; 1115 1116 /* Just to check for bugs in backref resolving */ 1117 int found_itself; 1118 }; 1119 1120 static int __clone_root_cmp_bsearch(const void *key, const void *elt) 1121 { 1122 u64 root = (u64)(uintptr_t)key; 1123 struct clone_root *cr = (struct clone_root *)elt; 1124 1125 if (root < cr->root->objectid) 1126 return -1; 1127 if (root > cr->root->objectid) 1128 return 1; 1129 return 0; 1130 } 1131 1132 static int __clone_root_cmp_sort(const void *e1, const void *e2) 1133 { 1134 struct clone_root *cr1 = (struct clone_root *)e1; 1135 struct clone_root *cr2 = (struct clone_root *)e2; 1136 1137 if (cr1->root->objectid < cr2->root->objectid) 1138 return -1; 1139 if (cr1->root->objectid > cr2->root->objectid) 1140 return 1; 1141 return 0; 1142 } 1143 1144 /* 1145 * Called for every backref that is found for the current extent. 1146 * Results are collected in sctx->clone_roots->ino/offset/found_refs 1147 */ 1148 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) 1149 { 1150 struct backref_ctx *bctx = ctx_; 1151 struct clone_root *found; 1152 int ret; 1153 u64 i_size; 1154 1155 /* First check if the root is in the list of accepted clone sources */ 1156 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, 1157 bctx->sctx->clone_roots_cnt, 1158 sizeof(struct clone_root), 1159 __clone_root_cmp_bsearch); 1160 if (!found) 1161 return 0; 1162 1163 if (found->root == bctx->sctx->send_root && 1164 ino == bctx->cur_objectid && 1165 offset == bctx->cur_offset) { 1166 bctx->found_itself = 1; 1167 } 1168 1169 /* 1170 * There are inodes that have extents that lie behind its i_size. Don't 1171 * accept clones from these extents. 1172 */ 1173 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, 1174 NULL, NULL, NULL); 1175 btrfs_release_path(bctx->path); 1176 if (ret < 0) 1177 return ret; 1178 1179 if (offset + bctx->extent_len > i_size) 1180 return 0; 1181 1182 /* 1183 * Make sure we don't consider clones from send_root that are 1184 * behind the current inode/offset. 1185 */ 1186 if (found->root == bctx->sctx->send_root) { 1187 /* 1188 * TODO for the moment we don't accept clones from the inode 1189 * that is currently send. We may change this when 1190 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same 1191 * file. 1192 */ 1193 if (ino >= bctx->cur_objectid) 1194 return 0; 1195 #if 0 1196 if (ino > bctx->cur_objectid) 1197 return 0; 1198 if (offset + bctx->extent_len > bctx->cur_offset) 1199 return 0; 1200 #endif 1201 } 1202 1203 bctx->found++; 1204 found->found_refs++; 1205 if (ino < found->ino) { 1206 found->ino = ino; 1207 found->offset = offset; 1208 } else if (found->ino == ino) { 1209 /* 1210 * same extent found more then once in the same file. 1211 */ 1212 if (found->offset > offset + bctx->extent_len) 1213 found->offset = offset; 1214 } 1215 1216 return 0; 1217 } 1218 1219 /* 1220 * Given an inode, offset and extent item, it finds a good clone for a clone 1221 * instruction. Returns -ENOENT when none could be found. The function makes 1222 * sure that the returned clone is usable at the point where sending is at the 1223 * moment. This means, that no clones are accepted which lie behind the current 1224 * inode+offset. 1225 * 1226 * path must point to the extent item when called. 1227 */ 1228 static int find_extent_clone(struct send_ctx *sctx, 1229 struct btrfs_path *path, 1230 u64 ino, u64 data_offset, 1231 u64 ino_size, 1232 struct clone_root **found) 1233 { 1234 int ret; 1235 int extent_type; 1236 u64 logical; 1237 u64 disk_byte; 1238 u64 num_bytes; 1239 u64 extent_item_pos; 1240 u64 flags = 0; 1241 struct btrfs_file_extent_item *fi; 1242 struct extent_buffer *eb = path->nodes[0]; 1243 struct backref_ctx *backref_ctx = NULL; 1244 struct clone_root *cur_clone_root; 1245 struct btrfs_key found_key; 1246 struct btrfs_path *tmp_path; 1247 int compressed; 1248 u32 i; 1249 1250 tmp_path = alloc_path_for_send(); 1251 if (!tmp_path) 1252 return -ENOMEM; 1253 1254 /* We only use this path under the commit sem */ 1255 tmp_path->need_commit_sem = 0; 1256 1257 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); 1258 if (!backref_ctx) { 1259 ret = -ENOMEM; 1260 goto out; 1261 } 1262 1263 backref_ctx->path = tmp_path; 1264 1265 if (data_offset >= ino_size) { 1266 /* 1267 * There may be extents that lie behind the file's size. 1268 * I at least had this in combination with snapshotting while 1269 * writing large files. 1270 */ 1271 ret = 0; 1272 goto out; 1273 } 1274 1275 fi = btrfs_item_ptr(eb, path->slots[0], 1276 struct btrfs_file_extent_item); 1277 extent_type = btrfs_file_extent_type(eb, fi); 1278 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1279 ret = -ENOENT; 1280 goto out; 1281 } 1282 compressed = btrfs_file_extent_compression(eb, fi); 1283 1284 num_bytes = btrfs_file_extent_num_bytes(eb, fi); 1285 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 1286 if (disk_byte == 0) { 1287 ret = -ENOENT; 1288 goto out; 1289 } 1290 logical = disk_byte + btrfs_file_extent_offset(eb, fi); 1291 1292 down_read(&sctx->send_root->fs_info->commit_root_sem); 1293 ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, 1294 &found_key, &flags); 1295 up_read(&sctx->send_root->fs_info->commit_root_sem); 1296 btrfs_release_path(tmp_path); 1297 1298 if (ret < 0) 1299 goto out; 1300 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1301 ret = -EIO; 1302 goto out; 1303 } 1304 1305 /* 1306 * Setup the clone roots. 1307 */ 1308 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1309 cur_clone_root = sctx->clone_roots + i; 1310 cur_clone_root->ino = (u64)-1; 1311 cur_clone_root->offset = 0; 1312 cur_clone_root->found_refs = 0; 1313 } 1314 1315 backref_ctx->sctx = sctx; 1316 backref_ctx->found = 0; 1317 backref_ctx->cur_objectid = ino; 1318 backref_ctx->cur_offset = data_offset; 1319 backref_ctx->found_itself = 0; 1320 backref_ctx->extent_len = num_bytes; 1321 1322 /* 1323 * The last extent of a file may be too large due to page alignment. 1324 * We need to adjust extent_len in this case so that the checks in 1325 * __iterate_backrefs work. 1326 */ 1327 if (data_offset + num_bytes >= ino_size) 1328 backref_ctx->extent_len = ino_size - data_offset; 1329 1330 /* 1331 * Now collect all backrefs. 1332 */ 1333 if (compressed == BTRFS_COMPRESS_NONE) 1334 extent_item_pos = logical - found_key.objectid; 1335 else 1336 extent_item_pos = 0; 1337 ret = iterate_extent_inodes(sctx->send_root->fs_info, 1338 found_key.objectid, extent_item_pos, 1, 1339 __iterate_backrefs, backref_ctx); 1340 1341 if (ret < 0) 1342 goto out; 1343 1344 if (!backref_ctx->found_itself) { 1345 /* found a bug in backref code? */ 1346 ret = -EIO; 1347 btrfs_err(sctx->send_root->fs_info, "did not find backref in " 1348 "send_root. inode=%llu, offset=%llu, " 1349 "disk_byte=%llu found extent=%llu\n", 1350 ino, data_offset, disk_byte, found_key.objectid); 1351 goto out; 1352 } 1353 1354 verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " 1355 "ino=%llu, " 1356 "num_bytes=%llu, logical=%llu\n", 1357 data_offset, ino, num_bytes, logical); 1358 1359 if (!backref_ctx->found) 1360 verbose_printk("btrfs: no clones found\n"); 1361 1362 cur_clone_root = NULL; 1363 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1364 if (sctx->clone_roots[i].found_refs) { 1365 if (!cur_clone_root) 1366 cur_clone_root = sctx->clone_roots + i; 1367 else if (sctx->clone_roots[i].root == sctx->send_root) 1368 /* prefer clones from send_root over others */ 1369 cur_clone_root = sctx->clone_roots + i; 1370 } 1371 1372 } 1373 1374 if (cur_clone_root) { 1375 if (compressed != BTRFS_COMPRESS_NONE) { 1376 /* 1377 * Offsets given by iterate_extent_inodes() are relative 1378 * to the start of the extent, we need to add logical 1379 * offset from the file extent item. 1380 * (See why at backref.c:check_extent_in_eb()) 1381 */ 1382 cur_clone_root->offset += btrfs_file_extent_offset(eb, 1383 fi); 1384 } 1385 *found = cur_clone_root; 1386 ret = 0; 1387 } else { 1388 ret = -ENOENT; 1389 } 1390 1391 out: 1392 btrfs_free_path(tmp_path); 1393 kfree(backref_ctx); 1394 return ret; 1395 } 1396 1397 static int read_symlink(struct btrfs_root *root, 1398 u64 ino, 1399 struct fs_path *dest) 1400 { 1401 int ret; 1402 struct btrfs_path *path; 1403 struct btrfs_key key; 1404 struct btrfs_file_extent_item *ei; 1405 u8 type; 1406 u8 compression; 1407 unsigned long off; 1408 int len; 1409 1410 path = alloc_path_for_send(); 1411 if (!path) 1412 return -ENOMEM; 1413 1414 key.objectid = ino; 1415 key.type = BTRFS_EXTENT_DATA_KEY; 1416 key.offset = 0; 1417 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1418 if (ret < 0) 1419 goto out; 1420 BUG_ON(ret); 1421 1422 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 1423 struct btrfs_file_extent_item); 1424 type = btrfs_file_extent_type(path->nodes[0], ei); 1425 compression = btrfs_file_extent_compression(path->nodes[0], ei); 1426 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); 1427 BUG_ON(compression); 1428 1429 off = btrfs_file_extent_inline_start(ei); 1430 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1431 1432 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1433 1434 out: 1435 btrfs_free_path(path); 1436 return ret; 1437 } 1438 1439 /* 1440 * Helper function to generate a file name that is unique in the root of 1441 * send_root and parent_root. This is used to generate names for orphan inodes. 1442 */ 1443 static int gen_unique_name(struct send_ctx *sctx, 1444 u64 ino, u64 gen, 1445 struct fs_path *dest) 1446 { 1447 int ret = 0; 1448 struct btrfs_path *path; 1449 struct btrfs_dir_item *di; 1450 char tmp[64]; 1451 int len; 1452 u64 idx = 0; 1453 1454 path = alloc_path_for_send(); 1455 if (!path) 1456 return -ENOMEM; 1457 1458 while (1) { 1459 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", 1460 ino, gen, idx); 1461 ASSERT(len < sizeof(tmp)); 1462 1463 di = btrfs_lookup_dir_item(NULL, sctx->send_root, 1464 path, BTRFS_FIRST_FREE_OBJECTID, 1465 tmp, strlen(tmp), 0); 1466 btrfs_release_path(path); 1467 if (IS_ERR(di)) { 1468 ret = PTR_ERR(di); 1469 goto out; 1470 } 1471 if (di) { 1472 /* not unique, try again */ 1473 idx++; 1474 continue; 1475 } 1476 1477 if (!sctx->parent_root) { 1478 /* unique */ 1479 ret = 0; 1480 break; 1481 } 1482 1483 di = btrfs_lookup_dir_item(NULL, sctx->parent_root, 1484 path, BTRFS_FIRST_FREE_OBJECTID, 1485 tmp, strlen(tmp), 0); 1486 btrfs_release_path(path); 1487 if (IS_ERR(di)) { 1488 ret = PTR_ERR(di); 1489 goto out; 1490 } 1491 if (di) { 1492 /* not unique, try again */ 1493 idx++; 1494 continue; 1495 } 1496 /* unique */ 1497 break; 1498 } 1499 1500 ret = fs_path_add(dest, tmp, strlen(tmp)); 1501 1502 out: 1503 btrfs_free_path(path); 1504 return ret; 1505 } 1506 1507 enum inode_state { 1508 inode_state_no_change, 1509 inode_state_will_create, 1510 inode_state_did_create, 1511 inode_state_will_delete, 1512 inode_state_did_delete, 1513 }; 1514 1515 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) 1516 { 1517 int ret; 1518 int left_ret; 1519 int right_ret; 1520 u64 left_gen; 1521 u64 right_gen; 1522 1523 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, 1524 NULL, NULL); 1525 if (ret < 0 && ret != -ENOENT) 1526 goto out; 1527 left_ret = ret; 1528 1529 if (!sctx->parent_root) { 1530 right_ret = -ENOENT; 1531 } else { 1532 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, 1533 NULL, NULL, NULL, NULL); 1534 if (ret < 0 && ret != -ENOENT) 1535 goto out; 1536 right_ret = ret; 1537 } 1538 1539 if (!left_ret && !right_ret) { 1540 if (left_gen == gen && right_gen == gen) { 1541 ret = inode_state_no_change; 1542 } else if (left_gen == gen) { 1543 if (ino < sctx->send_progress) 1544 ret = inode_state_did_create; 1545 else 1546 ret = inode_state_will_create; 1547 } else if (right_gen == gen) { 1548 if (ino < sctx->send_progress) 1549 ret = inode_state_did_delete; 1550 else 1551 ret = inode_state_will_delete; 1552 } else { 1553 ret = -ENOENT; 1554 } 1555 } else if (!left_ret) { 1556 if (left_gen == gen) { 1557 if (ino < sctx->send_progress) 1558 ret = inode_state_did_create; 1559 else 1560 ret = inode_state_will_create; 1561 } else { 1562 ret = -ENOENT; 1563 } 1564 } else if (!right_ret) { 1565 if (right_gen == gen) { 1566 if (ino < sctx->send_progress) 1567 ret = inode_state_did_delete; 1568 else 1569 ret = inode_state_will_delete; 1570 } else { 1571 ret = -ENOENT; 1572 } 1573 } else { 1574 ret = -ENOENT; 1575 } 1576 1577 out: 1578 return ret; 1579 } 1580 1581 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) 1582 { 1583 int ret; 1584 1585 ret = get_cur_inode_state(sctx, ino, gen); 1586 if (ret < 0) 1587 goto out; 1588 1589 if (ret == inode_state_no_change || 1590 ret == inode_state_did_create || 1591 ret == inode_state_will_delete) 1592 ret = 1; 1593 else 1594 ret = 0; 1595 1596 out: 1597 return ret; 1598 } 1599 1600 /* 1601 * Helper function to lookup a dir item in a dir. 1602 */ 1603 static int lookup_dir_item_inode(struct btrfs_root *root, 1604 u64 dir, const char *name, int name_len, 1605 u64 *found_inode, 1606 u8 *found_type) 1607 { 1608 int ret = 0; 1609 struct btrfs_dir_item *di; 1610 struct btrfs_key key; 1611 struct btrfs_path *path; 1612 1613 path = alloc_path_for_send(); 1614 if (!path) 1615 return -ENOMEM; 1616 1617 di = btrfs_lookup_dir_item(NULL, root, path, 1618 dir, name, name_len, 0); 1619 if (!di) { 1620 ret = -ENOENT; 1621 goto out; 1622 } 1623 if (IS_ERR(di)) { 1624 ret = PTR_ERR(di); 1625 goto out; 1626 } 1627 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1628 *found_inode = key.objectid; 1629 *found_type = btrfs_dir_type(path->nodes[0], di); 1630 1631 out: 1632 btrfs_free_path(path); 1633 return ret; 1634 } 1635 1636 /* 1637 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, 1638 * generation of the parent dir and the name of the dir entry. 1639 */ 1640 static int get_first_ref(struct btrfs_root *root, u64 ino, 1641 u64 *dir, u64 *dir_gen, struct fs_path *name) 1642 { 1643 int ret; 1644 struct btrfs_key key; 1645 struct btrfs_key found_key; 1646 struct btrfs_path *path; 1647 int len; 1648 u64 parent_dir; 1649 1650 path = alloc_path_for_send(); 1651 if (!path) 1652 return -ENOMEM; 1653 1654 key.objectid = ino; 1655 key.type = BTRFS_INODE_REF_KEY; 1656 key.offset = 0; 1657 1658 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 1659 if (ret < 0) 1660 goto out; 1661 if (!ret) 1662 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1663 path->slots[0]); 1664 if (ret || found_key.objectid != ino || 1665 (found_key.type != BTRFS_INODE_REF_KEY && 1666 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1667 ret = -ENOENT; 1668 goto out; 1669 } 1670 1671 if (found_key.type == BTRFS_INODE_REF_KEY) { 1672 struct btrfs_inode_ref *iref; 1673 iref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1674 struct btrfs_inode_ref); 1675 len = btrfs_inode_ref_name_len(path->nodes[0], iref); 1676 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1677 (unsigned long)(iref + 1), 1678 len); 1679 parent_dir = found_key.offset; 1680 } else { 1681 struct btrfs_inode_extref *extref; 1682 extref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1683 struct btrfs_inode_extref); 1684 len = btrfs_inode_extref_name_len(path->nodes[0], extref); 1685 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1686 (unsigned long)&extref->name, len); 1687 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); 1688 } 1689 if (ret < 0) 1690 goto out; 1691 btrfs_release_path(path); 1692 1693 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL, 1694 NULL, NULL); 1695 if (ret < 0) 1696 goto out; 1697 1698 *dir = parent_dir; 1699 1700 out: 1701 btrfs_free_path(path); 1702 return ret; 1703 } 1704 1705 static int is_first_ref(struct btrfs_root *root, 1706 u64 ino, u64 dir, 1707 const char *name, int name_len) 1708 { 1709 int ret; 1710 struct fs_path *tmp_name; 1711 u64 tmp_dir; 1712 u64 tmp_dir_gen; 1713 1714 tmp_name = fs_path_alloc(); 1715 if (!tmp_name) 1716 return -ENOMEM; 1717 1718 ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); 1719 if (ret < 0) 1720 goto out; 1721 1722 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { 1723 ret = 0; 1724 goto out; 1725 } 1726 1727 ret = !memcmp(tmp_name->start, name, name_len); 1728 1729 out: 1730 fs_path_free(tmp_name); 1731 return ret; 1732 } 1733 1734 /* 1735 * Used by process_recorded_refs to determine if a new ref would overwrite an 1736 * already existing ref. In case it detects an overwrite, it returns the 1737 * inode/gen in who_ino/who_gen. 1738 * When an overwrite is detected, process_recorded_refs does proper orphanizing 1739 * to make sure later references to the overwritten inode are possible. 1740 * Orphanizing is however only required for the first ref of an inode. 1741 * process_recorded_refs does an additional is_first_ref check to see if 1742 * orphanizing is really required. 1743 */ 1744 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, 1745 const char *name, int name_len, 1746 u64 *who_ino, u64 *who_gen) 1747 { 1748 int ret = 0; 1749 u64 gen; 1750 u64 other_inode = 0; 1751 u8 other_type = 0; 1752 1753 if (!sctx->parent_root) 1754 goto out; 1755 1756 ret = is_inode_existent(sctx, dir, dir_gen); 1757 if (ret <= 0) 1758 goto out; 1759 1760 /* 1761 * If we have a parent root we need to verify that the parent dir was 1762 * not delted and then re-created, if it was then we have no overwrite 1763 * and we can just unlink this entry. 1764 */ 1765 if (sctx->parent_root) { 1766 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, 1767 NULL, NULL, NULL); 1768 if (ret < 0 && ret != -ENOENT) 1769 goto out; 1770 if (ret) { 1771 ret = 0; 1772 goto out; 1773 } 1774 if (gen != dir_gen) 1775 goto out; 1776 } 1777 1778 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1779 &other_inode, &other_type); 1780 if (ret < 0 && ret != -ENOENT) 1781 goto out; 1782 if (ret) { 1783 ret = 0; 1784 goto out; 1785 } 1786 1787 /* 1788 * Check if the overwritten ref was already processed. If yes, the ref 1789 * was already unlinked/moved, so we can safely assume that we will not 1790 * overwrite anything at this point in time. 1791 */ 1792 if (other_inode > sctx->send_progress) { 1793 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1794 who_gen, NULL, NULL, NULL, NULL); 1795 if (ret < 0) 1796 goto out; 1797 1798 ret = 1; 1799 *who_ino = other_inode; 1800 } else { 1801 ret = 0; 1802 } 1803 1804 out: 1805 return ret; 1806 } 1807 1808 /* 1809 * Checks if the ref was overwritten by an already processed inode. This is 1810 * used by __get_cur_name_and_parent to find out if the ref was orphanized and 1811 * thus the orphan name needs be used. 1812 * process_recorded_refs also uses it to avoid unlinking of refs that were 1813 * overwritten. 1814 */ 1815 static int did_overwrite_ref(struct send_ctx *sctx, 1816 u64 dir, u64 dir_gen, 1817 u64 ino, u64 ino_gen, 1818 const char *name, int name_len) 1819 { 1820 int ret = 0; 1821 u64 gen; 1822 u64 ow_inode; 1823 u8 other_type; 1824 1825 if (!sctx->parent_root) 1826 goto out; 1827 1828 ret = is_inode_existent(sctx, dir, dir_gen); 1829 if (ret <= 0) 1830 goto out; 1831 1832 /* check if the ref was overwritten by another ref */ 1833 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, 1834 &ow_inode, &other_type); 1835 if (ret < 0 && ret != -ENOENT) 1836 goto out; 1837 if (ret) { 1838 /* was never and will never be overwritten */ 1839 ret = 0; 1840 goto out; 1841 } 1842 1843 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, 1844 NULL, NULL); 1845 if (ret < 0) 1846 goto out; 1847 1848 if (ow_inode == ino && gen == ino_gen) { 1849 ret = 0; 1850 goto out; 1851 } 1852 1853 /* we know that it is or will be overwritten. check this now */ 1854 if (ow_inode < sctx->send_progress) 1855 ret = 1; 1856 else 1857 ret = 0; 1858 1859 out: 1860 return ret; 1861 } 1862 1863 /* 1864 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode 1865 * that got overwritten. This is used by process_recorded_refs to determine 1866 * if it has to use the path as returned by get_cur_path or the orphan name. 1867 */ 1868 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) 1869 { 1870 int ret = 0; 1871 struct fs_path *name = NULL; 1872 u64 dir; 1873 u64 dir_gen; 1874 1875 if (!sctx->parent_root) 1876 goto out; 1877 1878 name = fs_path_alloc(); 1879 if (!name) 1880 return -ENOMEM; 1881 1882 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); 1883 if (ret < 0) 1884 goto out; 1885 1886 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, 1887 name->start, fs_path_len(name)); 1888 1889 out: 1890 fs_path_free(name); 1891 return ret; 1892 } 1893 1894 /* 1895 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, 1896 * so we need to do some special handling in case we have clashes. This function 1897 * takes care of this with the help of name_cache_entry::radix_list. 1898 * In case of error, nce is kfreed. 1899 */ 1900 static int name_cache_insert(struct send_ctx *sctx, 1901 struct name_cache_entry *nce) 1902 { 1903 int ret = 0; 1904 struct list_head *nce_head; 1905 1906 nce_head = radix_tree_lookup(&sctx->name_cache, 1907 (unsigned long)nce->ino); 1908 if (!nce_head) { 1909 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1910 if (!nce_head) { 1911 kfree(nce); 1912 return -ENOMEM; 1913 } 1914 INIT_LIST_HEAD(nce_head); 1915 1916 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1917 if (ret < 0) { 1918 kfree(nce_head); 1919 kfree(nce); 1920 return ret; 1921 } 1922 } 1923 list_add_tail(&nce->radix_list, nce_head); 1924 list_add_tail(&nce->list, &sctx->name_cache_list); 1925 sctx->name_cache_size++; 1926 1927 return ret; 1928 } 1929 1930 static void name_cache_delete(struct send_ctx *sctx, 1931 struct name_cache_entry *nce) 1932 { 1933 struct list_head *nce_head; 1934 1935 nce_head = radix_tree_lookup(&sctx->name_cache, 1936 (unsigned long)nce->ino); 1937 if (!nce_head) { 1938 btrfs_err(sctx->send_root->fs_info, 1939 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", 1940 nce->ino, sctx->name_cache_size); 1941 } 1942 1943 list_del(&nce->radix_list); 1944 list_del(&nce->list); 1945 sctx->name_cache_size--; 1946 1947 /* 1948 * We may not get to the final release of nce_head if the lookup fails 1949 */ 1950 if (nce_head && list_empty(nce_head)) { 1951 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); 1952 kfree(nce_head); 1953 } 1954 } 1955 1956 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, 1957 u64 ino, u64 gen) 1958 { 1959 struct list_head *nce_head; 1960 struct name_cache_entry *cur; 1961 1962 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); 1963 if (!nce_head) 1964 return NULL; 1965 1966 list_for_each_entry(cur, nce_head, radix_list) { 1967 if (cur->ino == ino && cur->gen == gen) 1968 return cur; 1969 } 1970 return NULL; 1971 } 1972 1973 /* 1974 * Removes the entry from the list and adds it back to the end. This marks the 1975 * entry as recently used so that name_cache_clean_unused does not remove it. 1976 */ 1977 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) 1978 { 1979 list_del(&nce->list); 1980 list_add_tail(&nce->list, &sctx->name_cache_list); 1981 } 1982 1983 /* 1984 * Remove some entries from the beginning of name_cache_list. 1985 */ 1986 static void name_cache_clean_unused(struct send_ctx *sctx) 1987 { 1988 struct name_cache_entry *nce; 1989 1990 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) 1991 return; 1992 1993 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { 1994 nce = list_entry(sctx->name_cache_list.next, 1995 struct name_cache_entry, list); 1996 name_cache_delete(sctx, nce); 1997 kfree(nce); 1998 } 1999 } 2000 2001 static void name_cache_free(struct send_ctx *sctx) 2002 { 2003 struct name_cache_entry *nce; 2004 2005 while (!list_empty(&sctx->name_cache_list)) { 2006 nce = list_entry(sctx->name_cache_list.next, 2007 struct name_cache_entry, list); 2008 name_cache_delete(sctx, nce); 2009 kfree(nce); 2010 } 2011 } 2012 2013 /* 2014 * Used by get_cur_path for each ref up to the root. 2015 * Returns 0 if it succeeded. 2016 * Returns 1 if the inode is not existent or got overwritten. In that case, the 2017 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 2018 * is returned, parent_ino/parent_gen are not guaranteed to be valid. 2019 * Returns <0 in case of error. 2020 */ 2021 static int __get_cur_name_and_parent(struct send_ctx *sctx, 2022 u64 ino, u64 gen, 2023 u64 *parent_ino, 2024 u64 *parent_gen, 2025 struct fs_path *dest) 2026 { 2027 int ret; 2028 int nce_ret; 2029 struct btrfs_path *path = NULL; 2030 struct name_cache_entry *nce = NULL; 2031 2032 /* 2033 * First check if we already did a call to this function with the same 2034 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 2035 * return the cached result. 2036 */ 2037 nce = name_cache_search(sctx, ino, gen); 2038 if (nce) { 2039 if (ino < sctx->send_progress && nce->need_later_update) { 2040 name_cache_delete(sctx, nce); 2041 kfree(nce); 2042 nce = NULL; 2043 } else { 2044 name_cache_used(sctx, nce); 2045 *parent_ino = nce->parent_ino; 2046 *parent_gen = nce->parent_gen; 2047 ret = fs_path_add(dest, nce->name, nce->name_len); 2048 if (ret < 0) 2049 goto out; 2050 ret = nce->ret; 2051 goto out; 2052 } 2053 } 2054 2055 path = alloc_path_for_send(); 2056 if (!path) 2057 return -ENOMEM; 2058 2059 /* 2060 * If the inode is not existent yet, add the orphan name and return 1. 2061 * This should only happen for the parent dir that we determine in 2062 * __record_new_ref 2063 */ 2064 ret = is_inode_existent(sctx, ino, gen); 2065 if (ret < 0) 2066 goto out; 2067 2068 if (!ret) { 2069 ret = gen_unique_name(sctx, ino, gen, dest); 2070 if (ret < 0) 2071 goto out; 2072 ret = 1; 2073 goto out_cache; 2074 } 2075 2076 /* 2077 * Depending on whether the inode was already processed or not, use 2078 * send_root or parent_root for ref lookup. 2079 */ 2080 if (ino < sctx->send_progress) 2081 ret = get_first_ref(sctx->send_root, ino, 2082 parent_ino, parent_gen, dest); 2083 else 2084 ret = get_first_ref(sctx->parent_root, ino, 2085 parent_ino, parent_gen, dest); 2086 if (ret < 0) 2087 goto out; 2088 2089 /* 2090 * Check if the ref was overwritten by an inode's ref that was processed 2091 * earlier. If yes, treat as orphan and return 1. 2092 */ 2093 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, 2094 dest->start, dest->end - dest->start); 2095 if (ret < 0) 2096 goto out; 2097 if (ret) { 2098 fs_path_reset(dest); 2099 ret = gen_unique_name(sctx, ino, gen, dest); 2100 if (ret < 0) 2101 goto out; 2102 ret = 1; 2103 } 2104 2105 out_cache: 2106 /* 2107 * Store the result of the lookup in the name cache. 2108 */ 2109 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); 2110 if (!nce) { 2111 ret = -ENOMEM; 2112 goto out; 2113 } 2114 2115 nce->ino = ino; 2116 nce->gen = gen; 2117 nce->parent_ino = *parent_ino; 2118 nce->parent_gen = *parent_gen; 2119 nce->name_len = fs_path_len(dest); 2120 nce->ret = ret; 2121 strcpy(nce->name, dest->start); 2122 2123 if (ino < sctx->send_progress) 2124 nce->need_later_update = 0; 2125 else 2126 nce->need_later_update = 1; 2127 2128 nce_ret = name_cache_insert(sctx, nce); 2129 if (nce_ret < 0) 2130 ret = nce_ret; 2131 name_cache_clean_unused(sctx); 2132 2133 out: 2134 btrfs_free_path(path); 2135 return ret; 2136 } 2137 2138 /* 2139 * Magic happens here. This function returns the first ref to an inode as it 2140 * would look like while receiving the stream at this point in time. 2141 * We walk the path up to the root. For every inode in between, we check if it 2142 * was already processed/sent. If yes, we continue with the parent as found 2143 * in send_root. If not, we continue with the parent as found in parent_root. 2144 * If we encounter an inode that was deleted at this point in time, we use the 2145 * inodes "orphan" name instead of the real name and stop. Same with new inodes 2146 * that were not created yet and overwritten inodes/refs. 2147 * 2148 * When do we have have orphan inodes: 2149 * 1. When an inode is freshly created and thus no valid refs are available yet 2150 * 2. When a directory lost all it's refs (deleted) but still has dir items 2151 * inside which were not processed yet (pending for move/delete). If anyone 2152 * tried to get the path to the dir items, it would get a path inside that 2153 * orphan directory. 2154 * 3. When an inode is moved around or gets new links, it may overwrite the ref 2155 * of an unprocessed inode. If in that case the first ref would be 2156 * overwritten, the overwritten inode gets "orphanized". Later when we 2157 * process this overwritten inode, it is restored at a new place by moving 2158 * the orphan inode. 2159 * 2160 * sctx->send_progress tells this function at which point in time receiving 2161 * would be. 2162 */ 2163 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, 2164 struct fs_path *dest) 2165 { 2166 int ret = 0; 2167 struct fs_path *name = NULL; 2168 u64 parent_inode = 0; 2169 u64 parent_gen = 0; 2170 int stop = 0; 2171 2172 name = fs_path_alloc(); 2173 if (!name) { 2174 ret = -ENOMEM; 2175 goto out; 2176 } 2177 2178 dest->reversed = 1; 2179 fs_path_reset(dest); 2180 2181 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2182 fs_path_reset(name); 2183 2184 if (is_waiting_for_rm(sctx, ino)) { 2185 ret = gen_unique_name(sctx, ino, gen, name); 2186 if (ret < 0) 2187 goto out; 2188 ret = fs_path_add_path(dest, name); 2189 break; 2190 } 2191 2192 if (is_waiting_for_move(sctx, ino)) { 2193 ret = get_first_ref(sctx->parent_root, ino, 2194 &parent_inode, &parent_gen, name); 2195 } else { 2196 ret = __get_cur_name_and_parent(sctx, ino, gen, 2197 &parent_inode, 2198 &parent_gen, name); 2199 if (ret) 2200 stop = 1; 2201 } 2202 2203 if (ret < 0) 2204 goto out; 2205 2206 ret = fs_path_add_path(dest, name); 2207 if (ret < 0) 2208 goto out; 2209 2210 ino = parent_inode; 2211 gen = parent_gen; 2212 } 2213 2214 out: 2215 fs_path_free(name); 2216 if (!ret) 2217 fs_path_unreverse(dest); 2218 return ret; 2219 } 2220 2221 /* 2222 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace 2223 */ 2224 static int send_subvol_begin(struct send_ctx *sctx) 2225 { 2226 int ret; 2227 struct btrfs_root *send_root = sctx->send_root; 2228 struct btrfs_root *parent_root = sctx->parent_root; 2229 struct btrfs_path *path; 2230 struct btrfs_key key; 2231 struct btrfs_root_ref *ref; 2232 struct extent_buffer *leaf; 2233 char *name = NULL; 2234 int namelen; 2235 2236 path = btrfs_alloc_path(); 2237 if (!path) 2238 return -ENOMEM; 2239 2240 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); 2241 if (!name) { 2242 btrfs_free_path(path); 2243 return -ENOMEM; 2244 } 2245 2246 key.objectid = send_root->objectid; 2247 key.type = BTRFS_ROOT_BACKREF_KEY; 2248 key.offset = 0; 2249 2250 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, 2251 &key, path, 1, 0); 2252 if (ret < 0) 2253 goto out; 2254 if (ret) { 2255 ret = -ENOENT; 2256 goto out; 2257 } 2258 2259 leaf = path->nodes[0]; 2260 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2261 if (key.type != BTRFS_ROOT_BACKREF_KEY || 2262 key.objectid != send_root->objectid) { 2263 ret = -ENOENT; 2264 goto out; 2265 } 2266 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 2267 namelen = btrfs_root_ref_name_len(leaf, ref); 2268 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); 2269 btrfs_release_path(path); 2270 2271 if (parent_root) { 2272 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); 2273 if (ret < 0) 2274 goto out; 2275 } else { 2276 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); 2277 if (ret < 0) 2278 goto out; 2279 } 2280 2281 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); 2282 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2283 sctx->send_root->root_item.uuid); 2284 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2285 le64_to_cpu(sctx->send_root->root_item.ctransid)); 2286 if (parent_root) { 2287 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2288 sctx->parent_root->root_item.uuid); 2289 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2290 le64_to_cpu(sctx->parent_root->root_item.ctransid)); 2291 } 2292 2293 ret = send_cmd(sctx); 2294 2295 tlv_put_failure: 2296 out: 2297 btrfs_free_path(path); 2298 kfree(name); 2299 return ret; 2300 } 2301 2302 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) 2303 { 2304 int ret = 0; 2305 struct fs_path *p; 2306 2307 verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); 2308 2309 p = fs_path_alloc(); 2310 if (!p) 2311 return -ENOMEM; 2312 2313 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); 2314 if (ret < 0) 2315 goto out; 2316 2317 ret = get_cur_path(sctx, ino, gen, p); 2318 if (ret < 0) 2319 goto out; 2320 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2321 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); 2322 2323 ret = send_cmd(sctx); 2324 2325 tlv_put_failure: 2326 out: 2327 fs_path_free(p); 2328 return ret; 2329 } 2330 2331 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) 2332 { 2333 int ret = 0; 2334 struct fs_path *p; 2335 2336 verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); 2337 2338 p = fs_path_alloc(); 2339 if (!p) 2340 return -ENOMEM; 2341 2342 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); 2343 if (ret < 0) 2344 goto out; 2345 2346 ret = get_cur_path(sctx, ino, gen, p); 2347 if (ret < 0) 2348 goto out; 2349 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2350 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); 2351 2352 ret = send_cmd(sctx); 2353 2354 tlv_put_failure: 2355 out: 2356 fs_path_free(p); 2357 return ret; 2358 } 2359 2360 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) 2361 { 2362 int ret = 0; 2363 struct fs_path *p; 2364 2365 verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); 2366 2367 p = fs_path_alloc(); 2368 if (!p) 2369 return -ENOMEM; 2370 2371 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); 2372 if (ret < 0) 2373 goto out; 2374 2375 ret = get_cur_path(sctx, ino, gen, p); 2376 if (ret < 0) 2377 goto out; 2378 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2379 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); 2380 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); 2381 2382 ret = send_cmd(sctx); 2383 2384 tlv_put_failure: 2385 out: 2386 fs_path_free(p); 2387 return ret; 2388 } 2389 2390 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) 2391 { 2392 int ret = 0; 2393 struct fs_path *p = NULL; 2394 struct btrfs_inode_item *ii; 2395 struct btrfs_path *path = NULL; 2396 struct extent_buffer *eb; 2397 struct btrfs_key key; 2398 int slot; 2399 2400 verbose_printk("btrfs: send_utimes %llu\n", ino); 2401 2402 p = fs_path_alloc(); 2403 if (!p) 2404 return -ENOMEM; 2405 2406 path = alloc_path_for_send(); 2407 if (!path) { 2408 ret = -ENOMEM; 2409 goto out; 2410 } 2411 2412 key.objectid = ino; 2413 key.type = BTRFS_INODE_ITEM_KEY; 2414 key.offset = 0; 2415 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2416 if (ret < 0) 2417 goto out; 2418 2419 eb = path->nodes[0]; 2420 slot = path->slots[0]; 2421 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 2422 2423 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); 2424 if (ret < 0) 2425 goto out; 2426 2427 ret = get_cur_path(sctx, ino, gen, p); 2428 if (ret < 0) 2429 goto out; 2430 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2431 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, 2432 btrfs_inode_atime(ii)); 2433 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, 2434 btrfs_inode_mtime(ii)); 2435 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, 2436 btrfs_inode_ctime(ii)); 2437 /* TODO Add otime support when the otime patches get into upstream */ 2438 2439 ret = send_cmd(sctx); 2440 2441 tlv_put_failure: 2442 out: 2443 fs_path_free(p); 2444 btrfs_free_path(path); 2445 return ret; 2446 } 2447 2448 /* 2449 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have 2450 * a valid path yet because we did not process the refs yet. So, the inode 2451 * is created as orphan. 2452 */ 2453 static int send_create_inode(struct send_ctx *sctx, u64 ino) 2454 { 2455 int ret = 0; 2456 struct fs_path *p; 2457 int cmd; 2458 u64 gen; 2459 u64 mode; 2460 u64 rdev; 2461 2462 verbose_printk("btrfs: send_create_inode %llu\n", ino); 2463 2464 p = fs_path_alloc(); 2465 if (!p) 2466 return -ENOMEM; 2467 2468 if (ino != sctx->cur_ino) { 2469 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, 2470 NULL, NULL, &rdev); 2471 if (ret < 0) 2472 goto out; 2473 } else { 2474 gen = sctx->cur_inode_gen; 2475 mode = sctx->cur_inode_mode; 2476 rdev = sctx->cur_inode_rdev; 2477 } 2478 2479 if (S_ISREG(mode)) { 2480 cmd = BTRFS_SEND_C_MKFILE; 2481 } else if (S_ISDIR(mode)) { 2482 cmd = BTRFS_SEND_C_MKDIR; 2483 } else if (S_ISLNK(mode)) { 2484 cmd = BTRFS_SEND_C_SYMLINK; 2485 } else if (S_ISCHR(mode) || S_ISBLK(mode)) { 2486 cmd = BTRFS_SEND_C_MKNOD; 2487 } else if (S_ISFIFO(mode)) { 2488 cmd = BTRFS_SEND_C_MKFIFO; 2489 } else if (S_ISSOCK(mode)) { 2490 cmd = BTRFS_SEND_C_MKSOCK; 2491 } else { 2492 printk(KERN_WARNING "btrfs: unexpected inode type %o", 2493 (int)(mode & S_IFMT)); 2494 ret = -ENOTSUPP; 2495 goto out; 2496 } 2497 2498 ret = begin_cmd(sctx, cmd); 2499 if (ret < 0) 2500 goto out; 2501 2502 ret = gen_unique_name(sctx, ino, gen, p); 2503 if (ret < 0) 2504 goto out; 2505 2506 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2507 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); 2508 2509 if (S_ISLNK(mode)) { 2510 fs_path_reset(p); 2511 ret = read_symlink(sctx->send_root, ino, p); 2512 if (ret < 0) 2513 goto out; 2514 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); 2515 } else if (S_ISCHR(mode) || S_ISBLK(mode) || 2516 S_ISFIFO(mode) || S_ISSOCK(mode)) { 2517 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); 2518 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); 2519 } 2520 2521 ret = send_cmd(sctx); 2522 if (ret < 0) 2523 goto out; 2524 2525 2526 tlv_put_failure: 2527 out: 2528 fs_path_free(p); 2529 return ret; 2530 } 2531 2532 /* 2533 * We need some special handling for inodes that get processed before the parent 2534 * directory got created. See process_recorded_refs for details. 2535 * This function does the check if we already created the dir out of order. 2536 */ 2537 static int did_create_dir(struct send_ctx *sctx, u64 dir) 2538 { 2539 int ret = 0; 2540 struct btrfs_path *path = NULL; 2541 struct btrfs_key key; 2542 struct btrfs_key found_key; 2543 struct btrfs_key di_key; 2544 struct extent_buffer *eb; 2545 struct btrfs_dir_item *di; 2546 int slot; 2547 2548 path = alloc_path_for_send(); 2549 if (!path) { 2550 ret = -ENOMEM; 2551 goto out; 2552 } 2553 2554 key.objectid = dir; 2555 key.type = BTRFS_DIR_INDEX_KEY; 2556 key.offset = 0; 2557 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2558 if (ret < 0) 2559 goto out; 2560 2561 while (1) { 2562 eb = path->nodes[0]; 2563 slot = path->slots[0]; 2564 if (slot >= btrfs_header_nritems(eb)) { 2565 ret = btrfs_next_leaf(sctx->send_root, path); 2566 if (ret < 0) { 2567 goto out; 2568 } else if (ret > 0) { 2569 ret = 0; 2570 break; 2571 } 2572 continue; 2573 } 2574 2575 btrfs_item_key_to_cpu(eb, &found_key, slot); 2576 if (found_key.objectid != key.objectid || 2577 found_key.type != key.type) { 2578 ret = 0; 2579 goto out; 2580 } 2581 2582 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2583 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2584 2585 if (di_key.type != BTRFS_ROOT_ITEM_KEY && 2586 di_key.objectid < sctx->send_progress) { 2587 ret = 1; 2588 goto out; 2589 } 2590 2591 path->slots[0]++; 2592 } 2593 2594 out: 2595 btrfs_free_path(path); 2596 return ret; 2597 } 2598 2599 /* 2600 * Only creates the inode if it is: 2601 * 1. Not a directory 2602 * 2. Or a directory which was not created already due to out of order 2603 * directories. See did_create_dir and process_recorded_refs for details. 2604 */ 2605 static int send_create_inode_if_needed(struct send_ctx *sctx) 2606 { 2607 int ret; 2608 2609 if (S_ISDIR(sctx->cur_inode_mode)) { 2610 ret = did_create_dir(sctx, sctx->cur_ino); 2611 if (ret < 0) 2612 goto out; 2613 if (ret) { 2614 ret = 0; 2615 goto out; 2616 } 2617 } 2618 2619 ret = send_create_inode(sctx, sctx->cur_ino); 2620 if (ret < 0) 2621 goto out; 2622 2623 out: 2624 return ret; 2625 } 2626 2627 struct recorded_ref { 2628 struct list_head list; 2629 char *dir_path; 2630 char *name; 2631 struct fs_path *full_path; 2632 u64 dir; 2633 u64 dir_gen; 2634 int dir_path_len; 2635 int name_len; 2636 }; 2637 2638 /* 2639 * We need to process new refs before deleted refs, but compare_tree gives us 2640 * everything mixed. So we first record all refs and later process them. 2641 * This function is a helper to record one ref. 2642 */ 2643 static int __record_ref(struct list_head *head, u64 dir, 2644 u64 dir_gen, struct fs_path *path) 2645 { 2646 struct recorded_ref *ref; 2647 2648 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2649 if (!ref) 2650 return -ENOMEM; 2651 2652 ref->dir = dir; 2653 ref->dir_gen = dir_gen; 2654 ref->full_path = path; 2655 2656 ref->name = (char *)kbasename(ref->full_path->start); 2657 ref->name_len = ref->full_path->end - ref->name; 2658 ref->dir_path = ref->full_path->start; 2659 if (ref->name == ref->full_path->start) 2660 ref->dir_path_len = 0; 2661 else 2662 ref->dir_path_len = ref->full_path->end - 2663 ref->full_path->start - 1 - ref->name_len; 2664 2665 list_add_tail(&ref->list, head); 2666 return 0; 2667 } 2668 2669 static int dup_ref(struct recorded_ref *ref, struct list_head *list) 2670 { 2671 struct recorded_ref *new; 2672 2673 new = kmalloc(sizeof(*ref), GFP_NOFS); 2674 if (!new) 2675 return -ENOMEM; 2676 2677 new->dir = ref->dir; 2678 new->dir_gen = ref->dir_gen; 2679 new->full_path = NULL; 2680 INIT_LIST_HEAD(&new->list); 2681 list_add_tail(&new->list, list); 2682 return 0; 2683 } 2684 2685 static void __free_recorded_refs(struct list_head *head) 2686 { 2687 struct recorded_ref *cur; 2688 2689 while (!list_empty(head)) { 2690 cur = list_entry(head->next, struct recorded_ref, list); 2691 fs_path_free(cur->full_path); 2692 list_del(&cur->list); 2693 kfree(cur); 2694 } 2695 } 2696 2697 static void free_recorded_refs(struct send_ctx *sctx) 2698 { 2699 __free_recorded_refs(&sctx->new_refs); 2700 __free_recorded_refs(&sctx->deleted_refs); 2701 } 2702 2703 /* 2704 * Renames/moves a file/dir to its orphan name. Used when the first 2705 * ref of an unprocessed inode gets overwritten and for all non empty 2706 * directories. 2707 */ 2708 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, 2709 struct fs_path *path) 2710 { 2711 int ret; 2712 struct fs_path *orphan; 2713 2714 orphan = fs_path_alloc(); 2715 if (!orphan) 2716 return -ENOMEM; 2717 2718 ret = gen_unique_name(sctx, ino, gen, orphan); 2719 if (ret < 0) 2720 goto out; 2721 2722 ret = send_rename(sctx, path, orphan); 2723 2724 out: 2725 fs_path_free(orphan); 2726 return ret; 2727 } 2728 2729 static struct orphan_dir_info * 2730 add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2731 { 2732 struct rb_node **p = &sctx->orphan_dirs.rb_node; 2733 struct rb_node *parent = NULL; 2734 struct orphan_dir_info *entry, *odi; 2735 2736 odi = kmalloc(sizeof(*odi), GFP_NOFS); 2737 if (!odi) 2738 return ERR_PTR(-ENOMEM); 2739 odi->ino = dir_ino; 2740 odi->gen = 0; 2741 2742 while (*p) { 2743 parent = *p; 2744 entry = rb_entry(parent, struct orphan_dir_info, node); 2745 if (dir_ino < entry->ino) { 2746 p = &(*p)->rb_left; 2747 } else if (dir_ino > entry->ino) { 2748 p = &(*p)->rb_right; 2749 } else { 2750 kfree(odi); 2751 return entry; 2752 } 2753 } 2754 2755 rb_link_node(&odi->node, parent, p); 2756 rb_insert_color(&odi->node, &sctx->orphan_dirs); 2757 return odi; 2758 } 2759 2760 static struct orphan_dir_info * 2761 get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2762 { 2763 struct rb_node *n = sctx->orphan_dirs.rb_node; 2764 struct orphan_dir_info *entry; 2765 2766 while (n) { 2767 entry = rb_entry(n, struct orphan_dir_info, node); 2768 if (dir_ino < entry->ino) 2769 n = n->rb_left; 2770 else if (dir_ino > entry->ino) 2771 n = n->rb_right; 2772 else 2773 return entry; 2774 } 2775 return NULL; 2776 } 2777 2778 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) 2779 { 2780 struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); 2781 2782 return odi != NULL; 2783 } 2784 2785 static void free_orphan_dir_info(struct send_ctx *sctx, 2786 struct orphan_dir_info *odi) 2787 { 2788 if (!odi) 2789 return; 2790 rb_erase(&odi->node, &sctx->orphan_dirs); 2791 kfree(odi); 2792 } 2793 2794 /* 2795 * Returns 1 if a directory can be removed at this point in time. 2796 * We check this by iterating all dir items and checking if the inode behind 2797 * the dir item was already processed. 2798 */ 2799 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, 2800 u64 send_progress) 2801 { 2802 int ret = 0; 2803 struct btrfs_root *root = sctx->parent_root; 2804 struct btrfs_path *path; 2805 struct btrfs_key key; 2806 struct btrfs_key found_key; 2807 struct btrfs_key loc; 2808 struct btrfs_dir_item *di; 2809 2810 /* 2811 * Don't try to rmdir the top/root subvolume dir. 2812 */ 2813 if (dir == BTRFS_FIRST_FREE_OBJECTID) 2814 return 0; 2815 2816 path = alloc_path_for_send(); 2817 if (!path) 2818 return -ENOMEM; 2819 2820 key.objectid = dir; 2821 key.type = BTRFS_DIR_INDEX_KEY; 2822 key.offset = 0; 2823 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2824 if (ret < 0) 2825 goto out; 2826 2827 while (1) { 2828 struct waiting_dir_move *dm; 2829 2830 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 2831 ret = btrfs_next_leaf(root, path); 2832 if (ret < 0) 2833 goto out; 2834 else if (ret > 0) 2835 break; 2836 continue; 2837 } 2838 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2839 path->slots[0]); 2840 if (found_key.objectid != key.objectid || 2841 found_key.type != key.type) 2842 break; 2843 2844 di = btrfs_item_ptr(path->nodes[0], path->slots[0], 2845 struct btrfs_dir_item); 2846 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); 2847 2848 dm = get_waiting_dir_move(sctx, loc.objectid); 2849 if (dm) { 2850 struct orphan_dir_info *odi; 2851 2852 odi = add_orphan_dir_info(sctx, dir); 2853 if (IS_ERR(odi)) { 2854 ret = PTR_ERR(odi); 2855 goto out; 2856 } 2857 odi->gen = dir_gen; 2858 dm->rmdir_ino = dir; 2859 ret = 0; 2860 goto out; 2861 } 2862 2863 if (loc.objectid > send_progress) { 2864 ret = 0; 2865 goto out; 2866 } 2867 2868 path->slots[0]++; 2869 } 2870 2871 ret = 1; 2872 2873 out: 2874 btrfs_free_path(path); 2875 return ret; 2876 } 2877 2878 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) 2879 { 2880 struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); 2881 2882 return entry != NULL; 2883 } 2884 2885 static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) 2886 { 2887 struct rb_node **p = &sctx->waiting_dir_moves.rb_node; 2888 struct rb_node *parent = NULL; 2889 struct waiting_dir_move *entry, *dm; 2890 2891 dm = kmalloc(sizeof(*dm), GFP_NOFS); 2892 if (!dm) 2893 return -ENOMEM; 2894 dm->ino = ino; 2895 dm->rmdir_ino = 0; 2896 2897 while (*p) { 2898 parent = *p; 2899 entry = rb_entry(parent, struct waiting_dir_move, node); 2900 if (ino < entry->ino) { 2901 p = &(*p)->rb_left; 2902 } else if (ino > entry->ino) { 2903 p = &(*p)->rb_right; 2904 } else { 2905 kfree(dm); 2906 return -EEXIST; 2907 } 2908 } 2909 2910 rb_link_node(&dm->node, parent, p); 2911 rb_insert_color(&dm->node, &sctx->waiting_dir_moves); 2912 return 0; 2913 } 2914 2915 static struct waiting_dir_move * 2916 get_waiting_dir_move(struct send_ctx *sctx, u64 ino) 2917 { 2918 struct rb_node *n = sctx->waiting_dir_moves.rb_node; 2919 struct waiting_dir_move *entry; 2920 2921 while (n) { 2922 entry = rb_entry(n, struct waiting_dir_move, node); 2923 if (ino < entry->ino) 2924 n = n->rb_left; 2925 else if (ino > entry->ino) 2926 n = n->rb_right; 2927 else 2928 return entry; 2929 } 2930 return NULL; 2931 } 2932 2933 static void free_waiting_dir_move(struct send_ctx *sctx, 2934 struct waiting_dir_move *dm) 2935 { 2936 if (!dm) 2937 return; 2938 rb_erase(&dm->node, &sctx->waiting_dir_moves); 2939 kfree(dm); 2940 } 2941 2942 static int add_pending_dir_move(struct send_ctx *sctx, 2943 u64 ino, 2944 u64 ino_gen, 2945 u64 parent_ino) 2946 { 2947 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 2948 struct rb_node *parent = NULL; 2949 struct pending_dir_move *entry = NULL, *pm; 2950 struct recorded_ref *cur; 2951 int exists = 0; 2952 int ret; 2953 2954 pm = kmalloc(sizeof(*pm), GFP_NOFS); 2955 if (!pm) 2956 return -ENOMEM; 2957 pm->parent_ino = parent_ino; 2958 pm->ino = ino; 2959 pm->gen = ino_gen; 2960 INIT_LIST_HEAD(&pm->list); 2961 INIT_LIST_HEAD(&pm->update_refs); 2962 RB_CLEAR_NODE(&pm->node); 2963 2964 while (*p) { 2965 parent = *p; 2966 entry = rb_entry(parent, struct pending_dir_move, node); 2967 if (parent_ino < entry->parent_ino) { 2968 p = &(*p)->rb_left; 2969 } else if (parent_ino > entry->parent_ino) { 2970 p = &(*p)->rb_right; 2971 } else { 2972 exists = 1; 2973 break; 2974 } 2975 } 2976 2977 list_for_each_entry(cur, &sctx->deleted_refs, list) { 2978 ret = dup_ref(cur, &pm->update_refs); 2979 if (ret < 0) 2980 goto out; 2981 } 2982 list_for_each_entry(cur, &sctx->new_refs, list) { 2983 ret = dup_ref(cur, &pm->update_refs); 2984 if (ret < 0) 2985 goto out; 2986 } 2987 2988 ret = add_waiting_dir_move(sctx, pm->ino); 2989 if (ret) 2990 goto out; 2991 2992 if (exists) { 2993 list_add_tail(&pm->list, &entry->list); 2994 } else { 2995 rb_link_node(&pm->node, parent, p); 2996 rb_insert_color(&pm->node, &sctx->pending_dir_moves); 2997 } 2998 ret = 0; 2999 out: 3000 if (ret) { 3001 __free_recorded_refs(&pm->update_refs); 3002 kfree(pm); 3003 } 3004 return ret; 3005 } 3006 3007 static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, 3008 u64 parent_ino) 3009 { 3010 struct rb_node *n = sctx->pending_dir_moves.rb_node; 3011 struct pending_dir_move *entry; 3012 3013 while (n) { 3014 entry = rb_entry(n, struct pending_dir_move, node); 3015 if (parent_ino < entry->parent_ino) 3016 n = n->rb_left; 3017 else if (parent_ino > entry->parent_ino) 3018 n = n->rb_right; 3019 else 3020 return entry; 3021 } 3022 return NULL; 3023 } 3024 3025 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3026 { 3027 struct fs_path *from_path = NULL; 3028 struct fs_path *to_path = NULL; 3029 struct fs_path *name = NULL; 3030 u64 orig_progress = sctx->send_progress; 3031 struct recorded_ref *cur; 3032 u64 parent_ino, parent_gen; 3033 struct waiting_dir_move *dm = NULL; 3034 u64 rmdir_ino = 0; 3035 int ret; 3036 3037 name = fs_path_alloc(); 3038 from_path = fs_path_alloc(); 3039 if (!name || !from_path) { 3040 ret = -ENOMEM; 3041 goto out; 3042 } 3043 3044 dm = get_waiting_dir_move(sctx, pm->ino); 3045 ASSERT(dm); 3046 rmdir_ino = dm->rmdir_ino; 3047 free_waiting_dir_move(sctx, dm); 3048 3049 ret = get_first_ref(sctx->parent_root, pm->ino, 3050 &parent_ino, &parent_gen, name); 3051 if (ret < 0) 3052 goto out; 3053 3054 if (parent_ino == sctx->cur_ino) { 3055 /* child only renamed, not moved */ 3056 ASSERT(parent_gen == sctx->cur_inode_gen); 3057 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3058 from_path); 3059 if (ret < 0) 3060 goto out; 3061 ret = fs_path_add_path(from_path, name); 3062 if (ret < 0) 3063 goto out; 3064 } else { 3065 /* child moved and maybe renamed too */ 3066 sctx->send_progress = pm->ino; 3067 ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); 3068 if (ret < 0) 3069 goto out; 3070 } 3071 3072 fs_path_free(name); 3073 name = NULL; 3074 3075 to_path = fs_path_alloc(); 3076 if (!to_path) { 3077 ret = -ENOMEM; 3078 goto out; 3079 } 3080 3081 sctx->send_progress = sctx->cur_ino + 1; 3082 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); 3083 if (ret < 0) 3084 goto out; 3085 3086 ret = send_rename(sctx, from_path, to_path); 3087 if (ret < 0) 3088 goto out; 3089 3090 if (rmdir_ino) { 3091 struct orphan_dir_info *odi; 3092 3093 odi = get_orphan_dir_info(sctx, rmdir_ino); 3094 if (!odi) { 3095 /* already deleted */ 3096 goto finish; 3097 } 3098 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); 3099 if (ret < 0) 3100 goto out; 3101 if (!ret) 3102 goto finish; 3103 3104 name = fs_path_alloc(); 3105 if (!name) { 3106 ret = -ENOMEM; 3107 goto out; 3108 } 3109 ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); 3110 if (ret < 0) 3111 goto out; 3112 ret = send_rmdir(sctx, name); 3113 if (ret < 0) 3114 goto out; 3115 free_orphan_dir_info(sctx, odi); 3116 } 3117 3118 finish: 3119 ret = send_utimes(sctx, pm->ino, pm->gen); 3120 if (ret < 0) 3121 goto out; 3122 3123 /* 3124 * After rename/move, need to update the utimes of both new parent(s) 3125 * and old parent(s). 3126 */ 3127 list_for_each_entry(cur, &pm->update_refs, list) { 3128 if (cur->dir == rmdir_ino) 3129 continue; 3130 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3131 if (ret < 0) 3132 goto out; 3133 } 3134 3135 out: 3136 fs_path_free(name); 3137 fs_path_free(from_path); 3138 fs_path_free(to_path); 3139 sctx->send_progress = orig_progress; 3140 3141 return ret; 3142 } 3143 3144 static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) 3145 { 3146 if (!list_empty(&m->list)) 3147 list_del(&m->list); 3148 if (!RB_EMPTY_NODE(&m->node)) 3149 rb_erase(&m->node, &sctx->pending_dir_moves); 3150 __free_recorded_refs(&m->update_refs); 3151 kfree(m); 3152 } 3153 3154 static void tail_append_pending_moves(struct pending_dir_move *moves, 3155 struct list_head *stack) 3156 { 3157 if (list_empty(&moves->list)) { 3158 list_add_tail(&moves->list, stack); 3159 } else { 3160 LIST_HEAD(list); 3161 list_splice_init(&moves->list, &list); 3162 list_add_tail(&moves->list, stack); 3163 list_splice_tail(&list, stack); 3164 } 3165 } 3166 3167 static int apply_children_dir_moves(struct send_ctx *sctx) 3168 { 3169 struct pending_dir_move *pm; 3170 struct list_head stack; 3171 u64 parent_ino = sctx->cur_ino; 3172 int ret = 0; 3173 3174 pm = get_pending_dir_moves(sctx, parent_ino); 3175 if (!pm) 3176 return 0; 3177 3178 INIT_LIST_HEAD(&stack); 3179 tail_append_pending_moves(pm, &stack); 3180 3181 while (!list_empty(&stack)) { 3182 pm = list_first_entry(&stack, struct pending_dir_move, list); 3183 parent_ino = pm->ino; 3184 ret = apply_dir_move(sctx, pm); 3185 free_pending_move(sctx, pm); 3186 if (ret) 3187 goto out; 3188 pm = get_pending_dir_moves(sctx, parent_ino); 3189 if (pm) 3190 tail_append_pending_moves(pm, &stack); 3191 } 3192 return 0; 3193 3194 out: 3195 while (!list_empty(&stack)) { 3196 pm = list_first_entry(&stack, struct pending_dir_move, list); 3197 free_pending_move(sctx, pm); 3198 } 3199 return ret; 3200 } 3201 3202 static int wait_for_parent_move(struct send_ctx *sctx, 3203 struct recorded_ref *parent_ref) 3204 { 3205 int ret; 3206 u64 ino = parent_ref->dir; 3207 u64 parent_ino_before, parent_ino_after; 3208 u64 old_gen; 3209 struct fs_path *path_before = NULL; 3210 struct fs_path *path_after = NULL; 3211 int len1, len2; 3212 int register_upper_dirs; 3213 u64 gen; 3214 3215 if (is_waiting_for_move(sctx, ino)) 3216 return 1; 3217 3218 if (parent_ref->dir <= sctx->cur_ino) 3219 return 0; 3220 3221 ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, 3222 NULL, NULL, NULL, NULL); 3223 if (ret == -ENOENT) 3224 return 0; 3225 else if (ret < 0) 3226 return ret; 3227 3228 if (parent_ref->dir_gen != old_gen) 3229 return 0; 3230 3231 path_before = fs_path_alloc(); 3232 if (!path_before) 3233 return -ENOMEM; 3234 3235 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3236 NULL, path_before); 3237 if (ret == -ENOENT) { 3238 ret = 0; 3239 goto out; 3240 } else if (ret < 0) { 3241 goto out; 3242 } 3243 3244 path_after = fs_path_alloc(); 3245 if (!path_after) { 3246 ret = -ENOMEM; 3247 goto out; 3248 } 3249 3250 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3251 &gen, path_after); 3252 if (ret == -ENOENT) { 3253 ret = 0; 3254 goto out; 3255 } else if (ret < 0) { 3256 goto out; 3257 } 3258 3259 len1 = fs_path_len(path_before); 3260 len2 = fs_path_len(path_after); 3261 if (parent_ino_before != parent_ino_after || len1 != len2 || 3262 memcmp(path_before->start, path_after->start, len1)) { 3263 ret = 1; 3264 goto out; 3265 } 3266 ret = 0; 3267 3268 /* 3269 * Ok, our new most direct ancestor has a higher inode number but 3270 * wasn't moved/renamed. So maybe some of the new ancestors higher in 3271 * the hierarchy have an higher inode number too *and* were renamed 3272 * or moved - in this case we need to wait for the ancestor's rename 3273 * or move operation before we can do the move/rename for the current 3274 * inode. 3275 */ 3276 register_upper_dirs = 0; 3277 ino = parent_ino_after; 3278 again: 3279 while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { 3280 u64 parent_gen; 3281 3282 fs_path_reset(path_before); 3283 fs_path_reset(path_after); 3284 3285 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3286 &parent_gen, path_after); 3287 if (ret < 0) 3288 goto out; 3289 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3290 NULL, path_before); 3291 if (ret == -ENOENT) { 3292 ret = 0; 3293 break; 3294 } else if (ret < 0) { 3295 goto out; 3296 } 3297 3298 len1 = fs_path_len(path_before); 3299 len2 = fs_path_len(path_after); 3300 if (parent_ino_before != parent_ino_after || len1 != len2 || 3301 memcmp(path_before->start, path_after->start, len1)) { 3302 ret = 1; 3303 if (register_upper_dirs) { 3304 break; 3305 } else { 3306 register_upper_dirs = 1; 3307 ino = parent_ref->dir; 3308 gen = parent_ref->dir_gen; 3309 goto again; 3310 } 3311 } else if (register_upper_dirs) { 3312 ret = add_pending_dir_move(sctx, ino, gen, 3313 parent_ino_after); 3314 if (ret < 0 && ret != -EEXIST) 3315 goto out; 3316 } 3317 3318 ino = parent_ino_after; 3319 gen = parent_gen; 3320 } 3321 3322 out: 3323 fs_path_free(path_before); 3324 fs_path_free(path_after); 3325 3326 return ret; 3327 } 3328 3329 /* 3330 * This does all the move/link/unlink/rmdir magic. 3331 */ 3332 static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) 3333 { 3334 int ret = 0; 3335 struct recorded_ref *cur; 3336 struct recorded_ref *cur2; 3337 struct list_head check_dirs; 3338 struct fs_path *valid_path = NULL; 3339 u64 ow_inode = 0; 3340 u64 ow_gen; 3341 int did_overwrite = 0; 3342 int is_orphan = 0; 3343 u64 last_dir_ino_rm = 0; 3344 3345 verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3346 3347 /* 3348 * This should never happen as the root dir always has the same ref 3349 * which is always '..' 3350 */ 3351 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 3352 INIT_LIST_HEAD(&check_dirs); 3353 3354 valid_path = fs_path_alloc(); 3355 if (!valid_path) { 3356 ret = -ENOMEM; 3357 goto out; 3358 } 3359 3360 /* 3361 * First, check if the first ref of the current inode was overwritten 3362 * before. If yes, we know that the current inode was already orphanized 3363 * and thus use the orphan name. If not, we can use get_cur_path to 3364 * get the path of the first ref as it would like while receiving at 3365 * this point in time. 3366 * New inodes are always orphan at the beginning, so force to use the 3367 * orphan name in this case. 3368 * The first ref is stored in valid_path and will be updated if it 3369 * gets moved around. 3370 */ 3371 if (!sctx->cur_inode_new) { 3372 ret = did_overwrite_first_ref(sctx, sctx->cur_ino, 3373 sctx->cur_inode_gen); 3374 if (ret < 0) 3375 goto out; 3376 if (ret) 3377 did_overwrite = 1; 3378 } 3379 if (sctx->cur_inode_new || did_overwrite) { 3380 ret = gen_unique_name(sctx, sctx->cur_ino, 3381 sctx->cur_inode_gen, valid_path); 3382 if (ret < 0) 3383 goto out; 3384 is_orphan = 1; 3385 } else { 3386 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3387 valid_path); 3388 if (ret < 0) 3389 goto out; 3390 } 3391 3392 list_for_each_entry(cur, &sctx->new_refs, list) { 3393 /* 3394 * We may have refs where the parent directory does not exist 3395 * yet. This happens if the parent directories inum is higher 3396 * the the current inum. To handle this case, we create the 3397 * parent directory out of order. But we need to check if this 3398 * did already happen before due to other refs in the same dir. 3399 */ 3400 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3401 if (ret < 0) 3402 goto out; 3403 if (ret == inode_state_will_create) { 3404 ret = 0; 3405 /* 3406 * First check if any of the current inodes refs did 3407 * already create the dir. 3408 */ 3409 list_for_each_entry(cur2, &sctx->new_refs, list) { 3410 if (cur == cur2) 3411 break; 3412 if (cur2->dir == cur->dir) { 3413 ret = 1; 3414 break; 3415 } 3416 } 3417 3418 /* 3419 * If that did not happen, check if a previous inode 3420 * did already create the dir. 3421 */ 3422 if (!ret) 3423 ret = did_create_dir(sctx, cur->dir); 3424 if (ret < 0) 3425 goto out; 3426 if (!ret) { 3427 ret = send_create_inode(sctx, cur->dir); 3428 if (ret < 0) 3429 goto out; 3430 } 3431 } 3432 3433 /* 3434 * Check if this new ref would overwrite the first ref of 3435 * another unprocessed inode. If yes, orphanize the 3436 * overwritten inode. If we find an overwritten ref that is 3437 * not the first ref, simply unlink it. 3438 */ 3439 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3440 cur->name, cur->name_len, 3441 &ow_inode, &ow_gen); 3442 if (ret < 0) 3443 goto out; 3444 if (ret) { 3445 ret = is_first_ref(sctx->parent_root, 3446 ow_inode, cur->dir, cur->name, 3447 cur->name_len); 3448 if (ret < 0) 3449 goto out; 3450 if (ret) { 3451 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3452 cur->full_path); 3453 if (ret < 0) 3454 goto out; 3455 } else { 3456 ret = send_unlink(sctx, cur->full_path); 3457 if (ret < 0) 3458 goto out; 3459 } 3460 } 3461 3462 /* 3463 * link/move the ref to the new place. If we have an orphan 3464 * inode, move it and update valid_path. If not, link or move 3465 * it depending on the inode mode. 3466 */ 3467 if (is_orphan) { 3468 ret = send_rename(sctx, valid_path, cur->full_path); 3469 if (ret < 0) 3470 goto out; 3471 is_orphan = 0; 3472 ret = fs_path_copy(valid_path, cur->full_path); 3473 if (ret < 0) 3474 goto out; 3475 } else { 3476 if (S_ISDIR(sctx->cur_inode_mode)) { 3477 /* 3478 * Dirs can't be linked, so move it. For moved 3479 * dirs, we always have one new and one deleted 3480 * ref. The deleted ref is ignored later. 3481 */ 3482 ret = wait_for_parent_move(sctx, cur); 3483 if (ret < 0) 3484 goto out; 3485 if (ret) { 3486 ret = add_pending_dir_move(sctx, 3487 sctx->cur_ino, 3488 sctx->cur_inode_gen, 3489 cur->dir); 3490 *pending_move = 1; 3491 } else { 3492 ret = send_rename(sctx, valid_path, 3493 cur->full_path); 3494 if (!ret) 3495 ret = fs_path_copy(valid_path, 3496 cur->full_path); 3497 } 3498 if (ret < 0) 3499 goto out; 3500 } else { 3501 ret = send_link(sctx, cur->full_path, 3502 valid_path); 3503 if (ret < 0) 3504 goto out; 3505 } 3506 } 3507 ret = dup_ref(cur, &check_dirs); 3508 if (ret < 0) 3509 goto out; 3510 } 3511 3512 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { 3513 /* 3514 * Check if we can already rmdir the directory. If not, 3515 * orphanize it. For every dir item inside that gets deleted 3516 * later, we do this check again and rmdir it then if possible. 3517 * See the use of check_dirs for more details. 3518 */ 3519 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3520 sctx->cur_ino); 3521 if (ret < 0) 3522 goto out; 3523 if (ret) { 3524 ret = send_rmdir(sctx, valid_path); 3525 if (ret < 0) 3526 goto out; 3527 } else if (!is_orphan) { 3528 ret = orphanize_inode(sctx, sctx->cur_ino, 3529 sctx->cur_inode_gen, valid_path); 3530 if (ret < 0) 3531 goto out; 3532 is_orphan = 1; 3533 } 3534 3535 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3536 ret = dup_ref(cur, &check_dirs); 3537 if (ret < 0) 3538 goto out; 3539 } 3540 } else if (S_ISDIR(sctx->cur_inode_mode) && 3541 !list_empty(&sctx->deleted_refs)) { 3542 /* 3543 * We have a moved dir. Add the old parent to check_dirs 3544 */ 3545 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 3546 list); 3547 ret = dup_ref(cur, &check_dirs); 3548 if (ret < 0) 3549 goto out; 3550 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 3551 /* 3552 * We have a non dir inode. Go through all deleted refs and 3553 * unlink them if they were not already overwritten by other 3554 * inodes. 3555 */ 3556 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3557 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3558 sctx->cur_ino, sctx->cur_inode_gen, 3559 cur->name, cur->name_len); 3560 if (ret < 0) 3561 goto out; 3562 if (!ret) { 3563 ret = send_unlink(sctx, cur->full_path); 3564 if (ret < 0) 3565 goto out; 3566 } 3567 ret = dup_ref(cur, &check_dirs); 3568 if (ret < 0) 3569 goto out; 3570 } 3571 /* 3572 * If the inode is still orphan, unlink the orphan. This may 3573 * happen when a previous inode did overwrite the first ref 3574 * of this inode and no new refs were added for the current 3575 * inode. Unlinking does not mean that the inode is deleted in 3576 * all cases. There may still be links to this inode in other 3577 * places. 3578 */ 3579 if (is_orphan) { 3580 ret = send_unlink(sctx, valid_path); 3581 if (ret < 0) 3582 goto out; 3583 } 3584 } 3585 3586 /* 3587 * We did collect all parent dirs where cur_inode was once located. We 3588 * now go through all these dirs and check if they are pending for 3589 * deletion and if it's finally possible to perform the rmdir now. 3590 * We also update the inode stats of the parent dirs here. 3591 */ 3592 list_for_each_entry(cur, &check_dirs, list) { 3593 /* 3594 * In case we had refs into dirs that were not processed yet, 3595 * we don't need to do the utime and rmdir logic for these dirs. 3596 * The dir will be processed later. 3597 */ 3598 if (cur->dir > sctx->cur_ino) 3599 continue; 3600 3601 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3602 if (ret < 0) 3603 goto out; 3604 3605 if (ret == inode_state_did_create || 3606 ret == inode_state_no_change) { 3607 /* TODO delayed utimes */ 3608 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3609 if (ret < 0) 3610 goto out; 3611 } else if (ret == inode_state_did_delete && 3612 cur->dir != last_dir_ino_rm) { 3613 ret = can_rmdir(sctx, cur->dir, cur->dir_gen, 3614 sctx->cur_ino); 3615 if (ret < 0) 3616 goto out; 3617 if (ret) { 3618 ret = get_cur_path(sctx, cur->dir, 3619 cur->dir_gen, valid_path); 3620 if (ret < 0) 3621 goto out; 3622 ret = send_rmdir(sctx, valid_path); 3623 if (ret < 0) 3624 goto out; 3625 last_dir_ino_rm = cur->dir; 3626 } 3627 } 3628 } 3629 3630 ret = 0; 3631 3632 out: 3633 __free_recorded_refs(&check_dirs); 3634 free_recorded_refs(sctx); 3635 fs_path_free(valid_path); 3636 return ret; 3637 } 3638 3639 static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, 3640 struct fs_path *name, void *ctx, struct list_head *refs) 3641 { 3642 int ret = 0; 3643 struct send_ctx *sctx = ctx; 3644 struct fs_path *p; 3645 u64 gen; 3646 3647 p = fs_path_alloc(); 3648 if (!p) 3649 return -ENOMEM; 3650 3651 ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, 3652 NULL, NULL); 3653 if (ret < 0) 3654 goto out; 3655 3656 ret = get_cur_path(sctx, dir, gen, p); 3657 if (ret < 0) 3658 goto out; 3659 ret = fs_path_add_path(p, name); 3660 if (ret < 0) 3661 goto out; 3662 3663 ret = __record_ref(refs, dir, gen, p); 3664 3665 out: 3666 if (ret) 3667 fs_path_free(p); 3668 return ret; 3669 } 3670 3671 static int __record_new_ref(int num, u64 dir, int index, 3672 struct fs_path *name, 3673 void *ctx) 3674 { 3675 struct send_ctx *sctx = ctx; 3676 return record_ref(sctx->send_root, num, dir, index, name, 3677 ctx, &sctx->new_refs); 3678 } 3679 3680 3681 static int __record_deleted_ref(int num, u64 dir, int index, 3682 struct fs_path *name, 3683 void *ctx) 3684 { 3685 struct send_ctx *sctx = ctx; 3686 return record_ref(sctx->parent_root, num, dir, index, name, 3687 ctx, &sctx->deleted_refs); 3688 } 3689 3690 static int record_new_ref(struct send_ctx *sctx) 3691 { 3692 int ret; 3693 3694 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3695 sctx->cmp_key, 0, __record_new_ref, sctx); 3696 if (ret < 0) 3697 goto out; 3698 ret = 0; 3699 3700 out: 3701 return ret; 3702 } 3703 3704 static int record_deleted_ref(struct send_ctx *sctx) 3705 { 3706 int ret; 3707 3708 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3709 sctx->cmp_key, 0, __record_deleted_ref, sctx); 3710 if (ret < 0) 3711 goto out; 3712 ret = 0; 3713 3714 out: 3715 return ret; 3716 } 3717 3718 struct find_ref_ctx { 3719 u64 dir; 3720 u64 dir_gen; 3721 struct btrfs_root *root; 3722 struct fs_path *name; 3723 int found_idx; 3724 }; 3725 3726 static int __find_iref(int num, u64 dir, int index, 3727 struct fs_path *name, 3728 void *ctx_) 3729 { 3730 struct find_ref_ctx *ctx = ctx_; 3731 u64 dir_gen; 3732 int ret; 3733 3734 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 3735 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 3736 /* 3737 * To avoid doing extra lookups we'll only do this if everything 3738 * else matches. 3739 */ 3740 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, 3741 NULL, NULL, NULL); 3742 if (ret) 3743 return ret; 3744 if (dir_gen != ctx->dir_gen) 3745 return 0; 3746 ctx->found_idx = num; 3747 return 1; 3748 } 3749 return 0; 3750 } 3751 3752 static int find_iref(struct btrfs_root *root, 3753 struct btrfs_path *path, 3754 struct btrfs_key *key, 3755 u64 dir, u64 dir_gen, struct fs_path *name) 3756 { 3757 int ret; 3758 struct find_ref_ctx ctx; 3759 3760 ctx.dir = dir; 3761 ctx.name = name; 3762 ctx.dir_gen = dir_gen; 3763 ctx.found_idx = -1; 3764 ctx.root = root; 3765 3766 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 3767 if (ret < 0) 3768 return ret; 3769 3770 if (ctx.found_idx == -1) 3771 return -ENOENT; 3772 3773 return ctx.found_idx; 3774 } 3775 3776 static int __record_changed_new_ref(int num, u64 dir, int index, 3777 struct fs_path *name, 3778 void *ctx) 3779 { 3780 u64 dir_gen; 3781 int ret; 3782 struct send_ctx *sctx = ctx; 3783 3784 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, 3785 NULL, NULL, NULL); 3786 if (ret) 3787 return ret; 3788 3789 ret = find_iref(sctx->parent_root, sctx->right_path, 3790 sctx->cmp_key, dir, dir_gen, name); 3791 if (ret == -ENOENT) 3792 ret = __record_new_ref(num, dir, index, name, sctx); 3793 else if (ret > 0) 3794 ret = 0; 3795 3796 return ret; 3797 } 3798 3799 static int __record_changed_deleted_ref(int num, u64 dir, int index, 3800 struct fs_path *name, 3801 void *ctx) 3802 { 3803 u64 dir_gen; 3804 int ret; 3805 struct send_ctx *sctx = ctx; 3806 3807 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, 3808 NULL, NULL, NULL); 3809 if (ret) 3810 return ret; 3811 3812 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 3813 dir, dir_gen, name); 3814 if (ret == -ENOENT) 3815 ret = __record_deleted_ref(num, dir, index, name, sctx); 3816 else if (ret > 0) 3817 ret = 0; 3818 3819 return ret; 3820 } 3821 3822 static int record_changed_ref(struct send_ctx *sctx) 3823 { 3824 int ret = 0; 3825 3826 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3827 sctx->cmp_key, 0, __record_changed_new_ref, sctx); 3828 if (ret < 0) 3829 goto out; 3830 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3831 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); 3832 if (ret < 0) 3833 goto out; 3834 ret = 0; 3835 3836 out: 3837 return ret; 3838 } 3839 3840 /* 3841 * Record and process all refs at once. Needed when an inode changes the 3842 * generation number, which means that it was deleted and recreated. 3843 */ 3844 static int process_all_refs(struct send_ctx *sctx, 3845 enum btrfs_compare_tree_result cmd) 3846 { 3847 int ret; 3848 struct btrfs_root *root; 3849 struct btrfs_path *path; 3850 struct btrfs_key key; 3851 struct btrfs_key found_key; 3852 struct extent_buffer *eb; 3853 int slot; 3854 iterate_inode_ref_t cb; 3855 int pending_move = 0; 3856 3857 path = alloc_path_for_send(); 3858 if (!path) 3859 return -ENOMEM; 3860 3861 if (cmd == BTRFS_COMPARE_TREE_NEW) { 3862 root = sctx->send_root; 3863 cb = __record_new_ref; 3864 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { 3865 root = sctx->parent_root; 3866 cb = __record_deleted_ref; 3867 } else { 3868 btrfs_err(sctx->send_root->fs_info, 3869 "Wrong command %d in process_all_refs", cmd); 3870 ret = -EINVAL; 3871 goto out; 3872 } 3873 3874 key.objectid = sctx->cmp_key->objectid; 3875 key.type = BTRFS_INODE_REF_KEY; 3876 key.offset = 0; 3877 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3878 if (ret < 0) 3879 goto out; 3880 3881 while (1) { 3882 eb = path->nodes[0]; 3883 slot = path->slots[0]; 3884 if (slot >= btrfs_header_nritems(eb)) { 3885 ret = btrfs_next_leaf(root, path); 3886 if (ret < 0) 3887 goto out; 3888 else if (ret > 0) 3889 break; 3890 continue; 3891 } 3892 3893 btrfs_item_key_to_cpu(eb, &found_key, slot); 3894 3895 if (found_key.objectid != key.objectid || 3896 (found_key.type != BTRFS_INODE_REF_KEY && 3897 found_key.type != BTRFS_INODE_EXTREF_KEY)) 3898 break; 3899 3900 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); 3901 if (ret < 0) 3902 goto out; 3903 3904 path->slots[0]++; 3905 } 3906 btrfs_release_path(path); 3907 3908 ret = process_recorded_refs(sctx, &pending_move); 3909 /* Only applicable to an incremental send. */ 3910 ASSERT(pending_move == 0); 3911 3912 out: 3913 btrfs_free_path(path); 3914 return ret; 3915 } 3916 3917 static int send_set_xattr(struct send_ctx *sctx, 3918 struct fs_path *path, 3919 const char *name, int name_len, 3920 const char *data, int data_len) 3921 { 3922 int ret = 0; 3923 3924 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); 3925 if (ret < 0) 3926 goto out; 3927 3928 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 3929 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 3930 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); 3931 3932 ret = send_cmd(sctx); 3933 3934 tlv_put_failure: 3935 out: 3936 return ret; 3937 } 3938 3939 static int send_remove_xattr(struct send_ctx *sctx, 3940 struct fs_path *path, 3941 const char *name, int name_len) 3942 { 3943 int ret = 0; 3944 3945 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); 3946 if (ret < 0) 3947 goto out; 3948 3949 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 3950 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 3951 3952 ret = send_cmd(sctx); 3953 3954 tlv_put_failure: 3955 out: 3956 return ret; 3957 } 3958 3959 static int __process_new_xattr(int num, struct btrfs_key *di_key, 3960 const char *name, int name_len, 3961 const char *data, int data_len, 3962 u8 type, void *ctx) 3963 { 3964 int ret; 3965 struct send_ctx *sctx = ctx; 3966 struct fs_path *p; 3967 posix_acl_xattr_header dummy_acl; 3968 3969 p = fs_path_alloc(); 3970 if (!p) 3971 return -ENOMEM; 3972 3973 /* 3974 * This hack is needed because empty acl's are stored as zero byte 3975 * data in xattrs. Problem with that is, that receiving these zero byte 3976 * acl's will fail later. To fix this, we send a dummy acl list that 3977 * only contains the version number and no entries. 3978 */ 3979 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || 3980 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { 3981 if (data_len == 0) { 3982 dummy_acl.a_version = 3983 cpu_to_le32(POSIX_ACL_XATTR_VERSION); 3984 data = (char *)&dummy_acl; 3985 data_len = sizeof(dummy_acl); 3986 } 3987 } 3988 3989 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 3990 if (ret < 0) 3991 goto out; 3992 3993 ret = send_set_xattr(sctx, p, name, name_len, data, data_len); 3994 3995 out: 3996 fs_path_free(p); 3997 return ret; 3998 } 3999 4000 static int __process_deleted_xattr(int num, struct btrfs_key *di_key, 4001 const char *name, int name_len, 4002 const char *data, int data_len, 4003 u8 type, void *ctx) 4004 { 4005 int ret; 4006 struct send_ctx *sctx = ctx; 4007 struct fs_path *p; 4008 4009 p = fs_path_alloc(); 4010 if (!p) 4011 return -ENOMEM; 4012 4013 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4014 if (ret < 0) 4015 goto out; 4016 4017 ret = send_remove_xattr(sctx, p, name, name_len); 4018 4019 out: 4020 fs_path_free(p); 4021 return ret; 4022 } 4023 4024 static int process_new_xattr(struct send_ctx *sctx) 4025 { 4026 int ret = 0; 4027 4028 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4029 sctx->cmp_key, __process_new_xattr, sctx); 4030 4031 return ret; 4032 } 4033 4034 static int process_deleted_xattr(struct send_ctx *sctx) 4035 { 4036 int ret; 4037 4038 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4039 sctx->cmp_key, __process_deleted_xattr, sctx); 4040 4041 return ret; 4042 } 4043 4044 struct find_xattr_ctx { 4045 const char *name; 4046 int name_len; 4047 int found_idx; 4048 char *found_data; 4049 int found_data_len; 4050 }; 4051 4052 static int __find_xattr(int num, struct btrfs_key *di_key, 4053 const char *name, int name_len, 4054 const char *data, int data_len, 4055 u8 type, void *vctx) 4056 { 4057 struct find_xattr_ctx *ctx = vctx; 4058 4059 if (name_len == ctx->name_len && 4060 strncmp(name, ctx->name, name_len) == 0) { 4061 ctx->found_idx = num; 4062 ctx->found_data_len = data_len; 4063 ctx->found_data = kmemdup(data, data_len, GFP_NOFS); 4064 if (!ctx->found_data) 4065 return -ENOMEM; 4066 return 1; 4067 } 4068 return 0; 4069 } 4070 4071 static int find_xattr(struct btrfs_root *root, 4072 struct btrfs_path *path, 4073 struct btrfs_key *key, 4074 const char *name, int name_len, 4075 char **data, int *data_len) 4076 { 4077 int ret; 4078 struct find_xattr_ctx ctx; 4079 4080 ctx.name = name; 4081 ctx.name_len = name_len; 4082 ctx.found_idx = -1; 4083 ctx.found_data = NULL; 4084 ctx.found_data_len = 0; 4085 4086 ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); 4087 if (ret < 0) 4088 return ret; 4089 4090 if (ctx.found_idx == -1) 4091 return -ENOENT; 4092 if (data) { 4093 *data = ctx.found_data; 4094 *data_len = ctx.found_data_len; 4095 } else { 4096 kfree(ctx.found_data); 4097 } 4098 return ctx.found_idx; 4099 } 4100 4101 4102 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, 4103 const char *name, int name_len, 4104 const char *data, int data_len, 4105 u8 type, void *ctx) 4106 { 4107 int ret; 4108 struct send_ctx *sctx = ctx; 4109 char *found_data = NULL; 4110 int found_data_len = 0; 4111 4112 ret = find_xattr(sctx->parent_root, sctx->right_path, 4113 sctx->cmp_key, name, name_len, &found_data, 4114 &found_data_len); 4115 if (ret == -ENOENT) { 4116 ret = __process_new_xattr(num, di_key, name, name_len, data, 4117 data_len, type, ctx); 4118 } else if (ret >= 0) { 4119 if (data_len != found_data_len || 4120 memcmp(data, found_data, data_len)) { 4121 ret = __process_new_xattr(num, di_key, name, name_len, 4122 data, data_len, type, ctx); 4123 } else { 4124 ret = 0; 4125 } 4126 } 4127 4128 kfree(found_data); 4129 return ret; 4130 } 4131 4132 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, 4133 const char *name, int name_len, 4134 const char *data, int data_len, 4135 u8 type, void *ctx) 4136 { 4137 int ret; 4138 struct send_ctx *sctx = ctx; 4139 4140 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, 4141 name, name_len, NULL, NULL); 4142 if (ret == -ENOENT) 4143 ret = __process_deleted_xattr(num, di_key, name, name_len, data, 4144 data_len, type, ctx); 4145 else if (ret >= 0) 4146 ret = 0; 4147 4148 return ret; 4149 } 4150 4151 static int process_changed_xattr(struct send_ctx *sctx) 4152 { 4153 int ret = 0; 4154 4155 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4156 sctx->cmp_key, __process_changed_new_xattr, sctx); 4157 if (ret < 0) 4158 goto out; 4159 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4160 sctx->cmp_key, __process_changed_deleted_xattr, sctx); 4161 4162 out: 4163 return ret; 4164 } 4165 4166 static int process_all_new_xattrs(struct send_ctx *sctx) 4167 { 4168 int ret; 4169 struct btrfs_root *root; 4170 struct btrfs_path *path; 4171 struct btrfs_key key; 4172 struct btrfs_key found_key; 4173 struct extent_buffer *eb; 4174 int slot; 4175 4176 path = alloc_path_for_send(); 4177 if (!path) 4178 return -ENOMEM; 4179 4180 root = sctx->send_root; 4181 4182 key.objectid = sctx->cmp_key->objectid; 4183 key.type = BTRFS_XATTR_ITEM_KEY; 4184 key.offset = 0; 4185 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4186 if (ret < 0) 4187 goto out; 4188 4189 while (1) { 4190 eb = path->nodes[0]; 4191 slot = path->slots[0]; 4192 if (slot >= btrfs_header_nritems(eb)) { 4193 ret = btrfs_next_leaf(root, path); 4194 if (ret < 0) { 4195 goto out; 4196 } else if (ret > 0) { 4197 ret = 0; 4198 break; 4199 } 4200 continue; 4201 } 4202 4203 btrfs_item_key_to_cpu(eb, &found_key, slot); 4204 if (found_key.objectid != key.objectid || 4205 found_key.type != key.type) { 4206 ret = 0; 4207 goto out; 4208 } 4209 4210 ret = iterate_dir_item(root, path, &found_key, 4211 __process_new_xattr, sctx); 4212 if (ret < 0) 4213 goto out; 4214 4215 path->slots[0]++; 4216 } 4217 4218 out: 4219 btrfs_free_path(path); 4220 return ret; 4221 } 4222 4223 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) 4224 { 4225 struct btrfs_root *root = sctx->send_root; 4226 struct btrfs_fs_info *fs_info = root->fs_info; 4227 struct inode *inode; 4228 struct page *page; 4229 char *addr; 4230 struct btrfs_key key; 4231 pgoff_t index = offset >> PAGE_CACHE_SHIFT; 4232 pgoff_t last_index; 4233 unsigned pg_offset = offset & ~PAGE_CACHE_MASK; 4234 ssize_t ret = 0; 4235 4236 key.objectid = sctx->cur_ino; 4237 key.type = BTRFS_INODE_ITEM_KEY; 4238 key.offset = 0; 4239 4240 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 4241 if (IS_ERR(inode)) 4242 return PTR_ERR(inode); 4243 4244 if (offset + len > i_size_read(inode)) { 4245 if (offset > i_size_read(inode)) 4246 len = 0; 4247 else 4248 len = offset - i_size_read(inode); 4249 } 4250 if (len == 0) 4251 goto out; 4252 4253 last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; 4254 4255 /* initial readahead */ 4256 memset(&sctx->ra, 0, sizeof(struct file_ra_state)); 4257 file_ra_state_init(&sctx->ra, inode->i_mapping); 4258 btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, 4259 last_index - index + 1); 4260 4261 while (index <= last_index) { 4262 unsigned cur_len = min_t(unsigned, len, 4263 PAGE_CACHE_SIZE - pg_offset); 4264 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 4265 if (!page) { 4266 ret = -ENOMEM; 4267 break; 4268 } 4269 4270 if (!PageUptodate(page)) { 4271 btrfs_readpage(NULL, page); 4272 lock_page(page); 4273 if (!PageUptodate(page)) { 4274 unlock_page(page); 4275 page_cache_release(page); 4276 ret = -EIO; 4277 break; 4278 } 4279 } 4280 4281 addr = kmap(page); 4282 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); 4283 kunmap(page); 4284 unlock_page(page); 4285 page_cache_release(page); 4286 index++; 4287 pg_offset = 0; 4288 len -= cur_len; 4289 ret += cur_len; 4290 } 4291 out: 4292 iput(inode); 4293 return ret; 4294 } 4295 4296 /* 4297 * Read some bytes from the current inode/file and send a write command to 4298 * user space. 4299 */ 4300 static int send_write(struct send_ctx *sctx, u64 offset, u32 len) 4301 { 4302 int ret = 0; 4303 struct fs_path *p; 4304 ssize_t num_read = 0; 4305 4306 p = fs_path_alloc(); 4307 if (!p) 4308 return -ENOMEM; 4309 4310 verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); 4311 4312 num_read = fill_read_buf(sctx, offset, len); 4313 if (num_read <= 0) { 4314 if (num_read < 0) 4315 ret = num_read; 4316 goto out; 4317 } 4318 4319 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4320 if (ret < 0) 4321 goto out; 4322 4323 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4324 if (ret < 0) 4325 goto out; 4326 4327 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4328 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4329 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); 4330 4331 ret = send_cmd(sctx); 4332 4333 tlv_put_failure: 4334 out: 4335 fs_path_free(p); 4336 if (ret < 0) 4337 return ret; 4338 return num_read; 4339 } 4340 4341 /* 4342 * Send a clone command to user space. 4343 */ 4344 static int send_clone(struct send_ctx *sctx, 4345 u64 offset, u32 len, 4346 struct clone_root *clone_root) 4347 { 4348 int ret = 0; 4349 struct fs_path *p; 4350 u64 gen; 4351 4352 verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " 4353 "clone_inode=%llu, clone_offset=%llu\n", offset, len, 4354 clone_root->root->objectid, clone_root->ino, 4355 clone_root->offset); 4356 4357 p = fs_path_alloc(); 4358 if (!p) 4359 return -ENOMEM; 4360 4361 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); 4362 if (ret < 0) 4363 goto out; 4364 4365 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4366 if (ret < 0) 4367 goto out; 4368 4369 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4370 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); 4371 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4372 4373 if (clone_root->root == sctx->send_root) { 4374 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, 4375 &gen, NULL, NULL, NULL, NULL); 4376 if (ret < 0) 4377 goto out; 4378 ret = get_cur_path(sctx, clone_root->ino, gen, p); 4379 } else { 4380 ret = get_inode_path(clone_root->root, clone_root->ino, p); 4381 } 4382 if (ret < 0) 4383 goto out; 4384 4385 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4386 clone_root->root->root_item.uuid); 4387 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4388 le64_to_cpu(clone_root->root->root_item.ctransid)); 4389 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4390 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4391 clone_root->offset); 4392 4393 ret = send_cmd(sctx); 4394 4395 tlv_put_failure: 4396 out: 4397 fs_path_free(p); 4398 return ret; 4399 } 4400 4401 /* 4402 * Send an update extent command to user space. 4403 */ 4404 static int send_update_extent(struct send_ctx *sctx, 4405 u64 offset, u32 len) 4406 { 4407 int ret = 0; 4408 struct fs_path *p; 4409 4410 p = fs_path_alloc(); 4411 if (!p) 4412 return -ENOMEM; 4413 4414 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); 4415 if (ret < 0) 4416 goto out; 4417 4418 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4419 if (ret < 0) 4420 goto out; 4421 4422 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4423 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4424 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); 4425 4426 ret = send_cmd(sctx); 4427 4428 tlv_put_failure: 4429 out: 4430 fs_path_free(p); 4431 return ret; 4432 } 4433 4434 static int send_hole(struct send_ctx *sctx, u64 end) 4435 { 4436 struct fs_path *p = NULL; 4437 u64 offset = sctx->cur_inode_last_extent; 4438 u64 len; 4439 int ret = 0; 4440 4441 p = fs_path_alloc(); 4442 if (!p) 4443 return -ENOMEM; 4444 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4445 if (ret < 0) 4446 goto tlv_put_failure; 4447 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); 4448 while (offset < end) { 4449 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); 4450 4451 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4452 if (ret < 0) 4453 break; 4454 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4455 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4456 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); 4457 ret = send_cmd(sctx); 4458 if (ret < 0) 4459 break; 4460 offset += len; 4461 } 4462 tlv_put_failure: 4463 fs_path_free(p); 4464 return ret; 4465 } 4466 4467 static int send_write_or_clone(struct send_ctx *sctx, 4468 struct btrfs_path *path, 4469 struct btrfs_key *key, 4470 struct clone_root *clone_root) 4471 { 4472 int ret = 0; 4473 struct btrfs_file_extent_item *ei; 4474 u64 offset = key->offset; 4475 u64 pos = 0; 4476 u64 len; 4477 u32 l; 4478 u8 type; 4479 u64 bs = sctx->send_root->fs_info->sb->s_blocksize; 4480 4481 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4482 struct btrfs_file_extent_item); 4483 type = btrfs_file_extent_type(path->nodes[0], ei); 4484 if (type == BTRFS_FILE_EXTENT_INLINE) { 4485 len = btrfs_file_extent_inline_len(path->nodes[0], 4486 path->slots[0], ei); 4487 /* 4488 * it is possible the inline item won't cover the whole page, 4489 * but there may be items after this page. Make 4490 * sure to send the whole thing 4491 */ 4492 len = PAGE_CACHE_ALIGN(len); 4493 } else { 4494 len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 4495 } 4496 4497 if (offset + len > sctx->cur_inode_size) 4498 len = sctx->cur_inode_size - offset; 4499 if (len == 0) { 4500 ret = 0; 4501 goto out; 4502 } 4503 4504 if (clone_root && IS_ALIGNED(offset + len, bs)) { 4505 ret = send_clone(sctx, offset, len, clone_root); 4506 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { 4507 ret = send_update_extent(sctx, offset, len); 4508 } else { 4509 while (pos < len) { 4510 l = len - pos; 4511 if (l > BTRFS_SEND_READ_SIZE) 4512 l = BTRFS_SEND_READ_SIZE; 4513 ret = send_write(sctx, pos + offset, l); 4514 if (ret < 0) 4515 goto out; 4516 if (!ret) 4517 break; 4518 pos += ret; 4519 } 4520 ret = 0; 4521 } 4522 out: 4523 return ret; 4524 } 4525 4526 static int is_extent_unchanged(struct send_ctx *sctx, 4527 struct btrfs_path *left_path, 4528 struct btrfs_key *ekey) 4529 { 4530 int ret = 0; 4531 struct btrfs_key key; 4532 struct btrfs_path *path = NULL; 4533 struct extent_buffer *eb; 4534 int slot; 4535 struct btrfs_key found_key; 4536 struct btrfs_file_extent_item *ei; 4537 u64 left_disknr; 4538 u64 right_disknr; 4539 u64 left_offset; 4540 u64 right_offset; 4541 u64 left_offset_fixed; 4542 u64 left_len; 4543 u64 right_len; 4544 u64 left_gen; 4545 u64 right_gen; 4546 u8 left_type; 4547 u8 right_type; 4548 4549 path = alloc_path_for_send(); 4550 if (!path) 4551 return -ENOMEM; 4552 4553 eb = left_path->nodes[0]; 4554 slot = left_path->slots[0]; 4555 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4556 left_type = btrfs_file_extent_type(eb, ei); 4557 4558 if (left_type != BTRFS_FILE_EXTENT_REG) { 4559 ret = 0; 4560 goto out; 4561 } 4562 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4563 left_len = btrfs_file_extent_num_bytes(eb, ei); 4564 left_offset = btrfs_file_extent_offset(eb, ei); 4565 left_gen = btrfs_file_extent_generation(eb, ei); 4566 4567 /* 4568 * Following comments will refer to these graphics. L is the left 4569 * extents which we are checking at the moment. 1-8 are the right 4570 * extents that we iterate. 4571 * 4572 * |-----L-----| 4573 * |-1-|-2a-|-3-|-4-|-5-|-6-| 4574 * 4575 * |-----L-----| 4576 * |--1--|-2b-|...(same as above) 4577 * 4578 * Alternative situation. Happens on files where extents got split. 4579 * |-----L-----| 4580 * |-----------7-----------|-6-| 4581 * 4582 * Alternative situation. Happens on files which got larger. 4583 * |-----L-----| 4584 * |-8-| 4585 * Nothing follows after 8. 4586 */ 4587 4588 key.objectid = ekey->objectid; 4589 key.type = BTRFS_EXTENT_DATA_KEY; 4590 key.offset = ekey->offset; 4591 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); 4592 if (ret < 0) 4593 goto out; 4594 if (ret) { 4595 ret = 0; 4596 goto out; 4597 } 4598 4599 /* 4600 * Handle special case where the right side has no extents at all. 4601 */ 4602 eb = path->nodes[0]; 4603 slot = path->slots[0]; 4604 btrfs_item_key_to_cpu(eb, &found_key, slot); 4605 if (found_key.objectid != key.objectid || 4606 found_key.type != key.type) { 4607 /* If we're a hole then just pretend nothing changed */ 4608 ret = (left_disknr) ? 0 : 1; 4609 goto out; 4610 } 4611 4612 /* 4613 * We're now on 2a, 2b or 7. 4614 */ 4615 key = found_key; 4616 while (key.offset < ekey->offset + left_len) { 4617 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4618 right_type = btrfs_file_extent_type(eb, ei); 4619 if (right_type != BTRFS_FILE_EXTENT_REG) { 4620 ret = 0; 4621 goto out; 4622 } 4623 4624 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4625 right_len = btrfs_file_extent_num_bytes(eb, ei); 4626 right_offset = btrfs_file_extent_offset(eb, ei); 4627 right_gen = btrfs_file_extent_generation(eb, ei); 4628 4629 /* 4630 * Are we at extent 8? If yes, we know the extent is changed. 4631 * This may only happen on the first iteration. 4632 */ 4633 if (found_key.offset + right_len <= ekey->offset) { 4634 /* If we're a hole just pretend nothing changed */ 4635 ret = (left_disknr) ? 0 : 1; 4636 goto out; 4637 } 4638 4639 left_offset_fixed = left_offset; 4640 if (key.offset < ekey->offset) { 4641 /* Fix the right offset for 2a and 7. */ 4642 right_offset += ekey->offset - key.offset; 4643 } else { 4644 /* Fix the left offset for all behind 2a and 2b */ 4645 left_offset_fixed += key.offset - ekey->offset; 4646 } 4647 4648 /* 4649 * Check if we have the same extent. 4650 */ 4651 if (left_disknr != right_disknr || 4652 left_offset_fixed != right_offset || 4653 left_gen != right_gen) { 4654 ret = 0; 4655 goto out; 4656 } 4657 4658 /* 4659 * Go to the next extent. 4660 */ 4661 ret = btrfs_next_item(sctx->parent_root, path); 4662 if (ret < 0) 4663 goto out; 4664 if (!ret) { 4665 eb = path->nodes[0]; 4666 slot = path->slots[0]; 4667 btrfs_item_key_to_cpu(eb, &found_key, slot); 4668 } 4669 if (ret || found_key.objectid != key.objectid || 4670 found_key.type != key.type) { 4671 key.offset += right_len; 4672 break; 4673 } 4674 if (found_key.offset != key.offset + right_len) { 4675 ret = 0; 4676 goto out; 4677 } 4678 key = found_key; 4679 } 4680 4681 /* 4682 * We're now behind the left extent (treat as unchanged) or at the end 4683 * of the right side (treat as changed). 4684 */ 4685 if (key.offset >= ekey->offset + left_len) 4686 ret = 1; 4687 else 4688 ret = 0; 4689 4690 4691 out: 4692 btrfs_free_path(path); 4693 return ret; 4694 } 4695 4696 static int get_last_extent(struct send_ctx *sctx, u64 offset) 4697 { 4698 struct btrfs_path *path; 4699 struct btrfs_root *root = sctx->send_root; 4700 struct btrfs_file_extent_item *fi; 4701 struct btrfs_key key; 4702 u64 extent_end; 4703 u8 type; 4704 int ret; 4705 4706 path = alloc_path_for_send(); 4707 if (!path) 4708 return -ENOMEM; 4709 4710 sctx->cur_inode_last_extent = 0; 4711 4712 key.objectid = sctx->cur_ino; 4713 key.type = BTRFS_EXTENT_DATA_KEY; 4714 key.offset = offset; 4715 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); 4716 if (ret < 0) 4717 goto out; 4718 ret = 0; 4719 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 4720 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) 4721 goto out; 4722 4723 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 4724 struct btrfs_file_extent_item); 4725 type = btrfs_file_extent_type(path->nodes[0], fi); 4726 if (type == BTRFS_FILE_EXTENT_INLINE) { 4727 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 4728 path->slots[0], fi); 4729 extent_end = ALIGN(key.offset + size, 4730 sctx->send_root->sectorsize); 4731 } else { 4732 extent_end = key.offset + 4733 btrfs_file_extent_num_bytes(path->nodes[0], fi); 4734 } 4735 sctx->cur_inode_last_extent = extent_end; 4736 out: 4737 btrfs_free_path(path); 4738 return ret; 4739 } 4740 4741 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, 4742 struct btrfs_key *key) 4743 { 4744 struct btrfs_file_extent_item *fi; 4745 u64 extent_end; 4746 u8 type; 4747 int ret = 0; 4748 4749 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) 4750 return 0; 4751 4752 if (sctx->cur_inode_last_extent == (u64)-1) { 4753 ret = get_last_extent(sctx, key->offset - 1); 4754 if (ret) 4755 return ret; 4756 } 4757 4758 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 4759 struct btrfs_file_extent_item); 4760 type = btrfs_file_extent_type(path->nodes[0], fi); 4761 if (type == BTRFS_FILE_EXTENT_INLINE) { 4762 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 4763 path->slots[0], fi); 4764 extent_end = ALIGN(key->offset + size, 4765 sctx->send_root->sectorsize); 4766 } else { 4767 extent_end = key->offset + 4768 btrfs_file_extent_num_bytes(path->nodes[0], fi); 4769 } 4770 4771 if (path->slots[0] == 0 && 4772 sctx->cur_inode_last_extent < key->offset) { 4773 /* 4774 * We might have skipped entire leafs that contained only 4775 * file extent items for our current inode. These leafs have 4776 * a generation number smaller (older) than the one in the 4777 * current leaf and the leaf our last extent came from, and 4778 * are located between these 2 leafs. 4779 */ 4780 ret = get_last_extent(sctx, key->offset - 1); 4781 if (ret) 4782 return ret; 4783 } 4784 4785 if (sctx->cur_inode_last_extent < key->offset) 4786 ret = send_hole(sctx, key->offset); 4787 sctx->cur_inode_last_extent = extent_end; 4788 return ret; 4789 } 4790 4791 static int process_extent(struct send_ctx *sctx, 4792 struct btrfs_path *path, 4793 struct btrfs_key *key) 4794 { 4795 struct clone_root *found_clone = NULL; 4796 int ret = 0; 4797 4798 if (S_ISLNK(sctx->cur_inode_mode)) 4799 return 0; 4800 4801 if (sctx->parent_root && !sctx->cur_inode_new) { 4802 ret = is_extent_unchanged(sctx, path, key); 4803 if (ret < 0) 4804 goto out; 4805 if (ret) { 4806 ret = 0; 4807 goto out_hole; 4808 } 4809 } else { 4810 struct btrfs_file_extent_item *ei; 4811 u8 type; 4812 4813 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4814 struct btrfs_file_extent_item); 4815 type = btrfs_file_extent_type(path->nodes[0], ei); 4816 if (type == BTRFS_FILE_EXTENT_PREALLOC || 4817 type == BTRFS_FILE_EXTENT_REG) { 4818 /* 4819 * The send spec does not have a prealloc command yet, 4820 * so just leave a hole for prealloc'ed extents until 4821 * we have enough commands queued up to justify rev'ing 4822 * the send spec. 4823 */ 4824 if (type == BTRFS_FILE_EXTENT_PREALLOC) { 4825 ret = 0; 4826 goto out; 4827 } 4828 4829 /* Have a hole, just skip it. */ 4830 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { 4831 ret = 0; 4832 goto out; 4833 } 4834 } 4835 } 4836 4837 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 4838 sctx->cur_inode_size, &found_clone); 4839 if (ret != -ENOENT && ret < 0) 4840 goto out; 4841 4842 ret = send_write_or_clone(sctx, path, key, found_clone); 4843 if (ret) 4844 goto out; 4845 out_hole: 4846 ret = maybe_send_hole(sctx, path, key); 4847 out: 4848 return ret; 4849 } 4850 4851 static int process_all_extents(struct send_ctx *sctx) 4852 { 4853 int ret; 4854 struct btrfs_root *root; 4855 struct btrfs_path *path; 4856 struct btrfs_key key; 4857 struct btrfs_key found_key; 4858 struct extent_buffer *eb; 4859 int slot; 4860 4861 root = sctx->send_root; 4862 path = alloc_path_for_send(); 4863 if (!path) 4864 return -ENOMEM; 4865 4866 key.objectid = sctx->cmp_key->objectid; 4867 key.type = BTRFS_EXTENT_DATA_KEY; 4868 key.offset = 0; 4869 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4870 if (ret < 0) 4871 goto out; 4872 4873 while (1) { 4874 eb = path->nodes[0]; 4875 slot = path->slots[0]; 4876 4877 if (slot >= btrfs_header_nritems(eb)) { 4878 ret = btrfs_next_leaf(root, path); 4879 if (ret < 0) { 4880 goto out; 4881 } else if (ret > 0) { 4882 ret = 0; 4883 break; 4884 } 4885 continue; 4886 } 4887 4888 btrfs_item_key_to_cpu(eb, &found_key, slot); 4889 4890 if (found_key.objectid != key.objectid || 4891 found_key.type != key.type) { 4892 ret = 0; 4893 goto out; 4894 } 4895 4896 ret = process_extent(sctx, path, &found_key); 4897 if (ret < 0) 4898 goto out; 4899 4900 path->slots[0]++; 4901 } 4902 4903 out: 4904 btrfs_free_path(path); 4905 return ret; 4906 } 4907 4908 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, 4909 int *pending_move, 4910 int *refs_processed) 4911 { 4912 int ret = 0; 4913 4914 if (sctx->cur_ino == 0) 4915 goto out; 4916 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && 4917 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) 4918 goto out; 4919 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 4920 goto out; 4921 4922 ret = process_recorded_refs(sctx, pending_move); 4923 if (ret < 0) 4924 goto out; 4925 4926 *refs_processed = 1; 4927 out: 4928 return ret; 4929 } 4930 4931 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) 4932 { 4933 int ret = 0; 4934 u64 left_mode; 4935 u64 left_uid; 4936 u64 left_gid; 4937 u64 right_mode; 4938 u64 right_uid; 4939 u64 right_gid; 4940 int need_chmod = 0; 4941 int need_chown = 0; 4942 int pending_move = 0; 4943 int refs_processed = 0; 4944 4945 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 4946 &refs_processed); 4947 if (ret < 0) 4948 goto out; 4949 4950 /* 4951 * We have processed the refs and thus need to advance send_progress. 4952 * Now, calls to get_cur_xxx will take the updated refs of the current 4953 * inode into account. 4954 * 4955 * On the other hand, if our current inode is a directory and couldn't 4956 * be moved/renamed because its parent was renamed/moved too and it has 4957 * a higher inode number, we can only move/rename our current inode 4958 * after we moved/renamed its parent. Therefore in this case operate on 4959 * the old path (pre move/rename) of our current inode, and the 4960 * move/rename will be performed later. 4961 */ 4962 if (refs_processed && !pending_move) 4963 sctx->send_progress = sctx->cur_ino + 1; 4964 4965 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 4966 goto out; 4967 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 4968 goto out; 4969 4970 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, 4971 &left_mode, &left_uid, &left_gid, NULL); 4972 if (ret < 0) 4973 goto out; 4974 4975 if (!sctx->parent_root || sctx->cur_inode_new) { 4976 need_chown = 1; 4977 if (!S_ISLNK(sctx->cur_inode_mode)) 4978 need_chmod = 1; 4979 } else { 4980 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, 4981 NULL, NULL, &right_mode, &right_uid, 4982 &right_gid, NULL); 4983 if (ret < 0) 4984 goto out; 4985 4986 if (left_uid != right_uid || left_gid != right_gid) 4987 need_chown = 1; 4988 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) 4989 need_chmod = 1; 4990 } 4991 4992 if (S_ISREG(sctx->cur_inode_mode)) { 4993 if (need_send_hole(sctx)) { 4994 if (sctx->cur_inode_last_extent == (u64)-1 || 4995 sctx->cur_inode_last_extent < 4996 sctx->cur_inode_size) { 4997 ret = get_last_extent(sctx, (u64)-1); 4998 if (ret) 4999 goto out; 5000 } 5001 if (sctx->cur_inode_last_extent < 5002 sctx->cur_inode_size) { 5003 ret = send_hole(sctx, sctx->cur_inode_size); 5004 if (ret) 5005 goto out; 5006 } 5007 } 5008 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5009 sctx->cur_inode_size); 5010 if (ret < 0) 5011 goto out; 5012 } 5013 5014 if (need_chown) { 5015 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5016 left_uid, left_gid); 5017 if (ret < 0) 5018 goto out; 5019 } 5020 if (need_chmod) { 5021 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5022 left_mode); 5023 if (ret < 0) 5024 goto out; 5025 } 5026 5027 /* 5028 * If other directory inodes depended on our current directory 5029 * inode's move/rename, now do their move/rename operations. 5030 */ 5031 if (!is_waiting_for_move(sctx, sctx->cur_ino)) { 5032 ret = apply_children_dir_moves(sctx); 5033 if (ret) 5034 goto out; 5035 /* 5036 * Need to send that every time, no matter if it actually 5037 * changed between the two trees as we have done changes to 5038 * the inode before. If our inode is a directory and it's 5039 * waiting to be moved/renamed, we will send its utimes when 5040 * it's moved/renamed, therefore we don't need to do it here. 5041 */ 5042 sctx->send_progress = sctx->cur_ino + 1; 5043 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 5044 if (ret < 0) 5045 goto out; 5046 } 5047 5048 out: 5049 return ret; 5050 } 5051 5052 static int changed_inode(struct send_ctx *sctx, 5053 enum btrfs_compare_tree_result result) 5054 { 5055 int ret = 0; 5056 struct btrfs_key *key = sctx->cmp_key; 5057 struct btrfs_inode_item *left_ii = NULL; 5058 struct btrfs_inode_item *right_ii = NULL; 5059 u64 left_gen = 0; 5060 u64 right_gen = 0; 5061 5062 sctx->cur_ino = key->objectid; 5063 sctx->cur_inode_new_gen = 0; 5064 sctx->cur_inode_last_extent = (u64)-1; 5065 5066 /* 5067 * Set send_progress to current inode. This will tell all get_cur_xxx 5068 * functions that the current inode's refs are not updated yet. Later, 5069 * when process_recorded_refs is finished, it is set to cur_ino + 1. 5070 */ 5071 sctx->send_progress = sctx->cur_ino; 5072 5073 if (result == BTRFS_COMPARE_TREE_NEW || 5074 result == BTRFS_COMPARE_TREE_CHANGED) { 5075 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], 5076 sctx->left_path->slots[0], 5077 struct btrfs_inode_item); 5078 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], 5079 left_ii); 5080 } else { 5081 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5082 sctx->right_path->slots[0], 5083 struct btrfs_inode_item); 5084 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5085 right_ii); 5086 } 5087 if (result == BTRFS_COMPARE_TREE_CHANGED) { 5088 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5089 sctx->right_path->slots[0], 5090 struct btrfs_inode_item); 5091 5092 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5093 right_ii); 5094 5095 /* 5096 * The cur_ino = root dir case is special here. We can't treat 5097 * the inode as deleted+reused because it would generate a 5098 * stream that tries to delete/mkdir the root dir. 5099 */ 5100 if (left_gen != right_gen && 5101 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5102 sctx->cur_inode_new_gen = 1; 5103 } 5104 5105 if (result == BTRFS_COMPARE_TREE_NEW) { 5106 sctx->cur_inode_gen = left_gen; 5107 sctx->cur_inode_new = 1; 5108 sctx->cur_inode_deleted = 0; 5109 sctx->cur_inode_size = btrfs_inode_size( 5110 sctx->left_path->nodes[0], left_ii); 5111 sctx->cur_inode_mode = btrfs_inode_mode( 5112 sctx->left_path->nodes[0], left_ii); 5113 sctx->cur_inode_rdev = btrfs_inode_rdev( 5114 sctx->left_path->nodes[0], left_ii); 5115 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5116 ret = send_create_inode_if_needed(sctx); 5117 } else if (result == BTRFS_COMPARE_TREE_DELETED) { 5118 sctx->cur_inode_gen = right_gen; 5119 sctx->cur_inode_new = 0; 5120 sctx->cur_inode_deleted = 1; 5121 sctx->cur_inode_size = btrfs_inode_size( 5122 sctx->right_path->nodes[0], right_ii); 5123 sctx->cur_inode_mode = btrfs_inode_mode( 5124 sctx->right_path->nodes[0], right_ii); 5125 } else if (result == BTRFS_COMPARE_TREE_CHANGED) { 5126 /* 5127 * We need to do some special handling in case the inode was 5128 * reported as changed with a changed generation number. This 5129 * means that the original inode was deleted and new inode 5130 * reused the same inum. So we have to treat the old inode as 5131 * deleted and the new one as new. 5132 */ 5133 if (sctx->cur_inode_new_gen) { 5134 /* 5135 * First, process the inode as if it was deleted. 5136 */ 5137 sctx->cur_inode_gen = right_gen; 5138 sctx->cur_inode_new = 0; 5139 sctx->cur_inode_deleted = 1; 5140 sctx->cur_inode_size = btrfs_inode_size( 5141 sctx->right_path->nodes[0], right_ii); 5142 sctx->cur_inode_mode = btrfs_inode_mode( 5143 sctx->right_path->nodes[0], right_ii); 5144 ret = process_all_refs(sctx, 5145 BTRFS_COMPARE_TREE_DELETED); 5146 if (ret < 0) 5147 goto out; 5148 5149 /* 5150 * Now process the inode as if it was new. 5151 */ 5152 sctx->cur_inode_gen = left_gen; 5153 sctx->cur_inode_new = 1; 5154 sctx->cur_inode_deleted = 0; 5155 sctx->cur_inode_size = btrfs_inode_size( 5156 sctx->left_path->nodes[0], left_ii); 5157 sctx->cur_inode_mode = btrfs_inode_mode( 5158 sctx->left_path->nodes[0], left_ii); 5159 sctx->cur_inode_rdev = btrfs_inode_rdev( 5160 sctx->left_path->nodes[0], left_ii); 5161 ret = send_create_inode_if_needed(sctx); 5162 if (ret < 0) 5163 goto out; 5164 5165 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); 5166 if (ret < 0) 5167 goto out; 5168 /* 5169 * Advance send_progress now as we did not get into 5170 * process_recorded_refs_if_needed in the new_gen case. 5171 */ 5172 sctx->send_progress = sctx->cur_ino + 1; 5173 5174 /* 5175 * Now process all extents and xattrs of the inode as if 5176 * they were all new. 5177 */ 5178 ret = process_all_extents(sctx); 5179 if (ret < 0) 5180 goto out; 5181 ret = process_all_new_xattrs(sctx); 5182 if (ret < 0) 5183 goto out; 5184 } else { 5185 sctx->cur_inode_gen = left_gen; 5186 sctx->cur_inode_new = 0; 5187 sctx->cur_inode_new_gen = 0; 5188 sctx->cur_inode_deleted = 0; 5189 sctx->cur_inode_size = btrfs_inode_size( 5190 sctx->left_path->nodes[0], left_ii); 5191 sctx->cur_inode_mode = btrfs_inode_mode( 5192 sctx->left_path->nodes[0], left_ii); 5193 } 5194 } 5195 5196 out: 5197 return ret; 5198 } 5199 5200 /* 5201 * We have to process new refs before deleted refs, but compare_trees gives us 5202 * the new and deleted refs mixed. To fix this, we record the new/deleted refs 5203 * first and later process them in process_recorded_refs. 5204 * For the cur_inode_new_gen case, we skip recording completely because 5205 * changed_inode did already initiate processing of refs. The reason for this is 5206 * that in this case, compare_tree actually compares the refs of 2 different 5207 * inodes. To fix this, process_all_refs is used in changed_inode to handle all 5208 * refs of the right tree as deleted and all refs of the left tree as new. 5209 */ 5210 static int changed_ref(struct send_ctx *sctx, 5211 enum btrfs_compare_tree_result result) 5212 { 5213 int ret = 0; 5214 5215 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5216 5217 if (!sctx->cur_inode_new_gen && 5218 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 5219 if (result == BTRFS_COMPARE_TREE_NEW) 5220 ret = record_new_ref(sctx); 5221 else if (result == BTRFS_COMPARE_TREE_DELETED) 5222 ret = record_deleted_ref(sctx); 5223 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5224 ret = record_changed_ref(sctx); 5225 } 5226 5227 return ret; 5228 } 5229 5230 /* 5231 * Process new/deleted/changed xattrs. We skip processing in the 5232 * cur_inode_new_gen case because changed_inode did already initiate processing 5233 * of xattrs. The reason is the same as in changed_ref 5234 */ 5235 static int changed_xattr(struct send_ctx *sctx, 5236 enum btrfs_compare_tree_result result) 5237 { 5238 int ret = 0; 5239 5240 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5241 5242 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5243 if (result == BTRFS_COMPARE_TREE_NEW) 5244 ret = process_new_xattr(sctx); 5245 else if (result == BTRFS_COMPARE_TREE_DELETED) 5246 ret = process_deleted_xattr(sctx); 5247 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5248 ret = process_changed_xattr(sctx); 5249 } 5250 5251 return ret; 5252 } 5253 5254 /* 5255 * Process new/deleted/changed extents. We skip processing in the 5256 * cur_inode_new_gen case because changed_inode did already initiate processing 5257 * of extents. The reason is the same as in changed_ref 5258 */ 5259 static int changed_extent(struct send_ctx *sctx, 5260 enum btrfs_compare_tree_result result) 5261 { 5262 int ret = 0; 5263 5264 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5265 5266 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5267 if (result != BTRFS_COMPARE_TREE_DELETED) 5268 ret = process_extent(sctx, sctx->left_path, 5269 sctx->cmp_key); 5270 } 5271 5272 return ret; 5273 } 5274 5275 static int dir_changed(struct send_ctx *sctx, u64 dir) 5276 { 5277 u64 orig_gen, new_gen; 5278 int ret; 5279 5280 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, 5281 NULL, NULL); 5282 if (ret) 5283 return ret; 5284 5285 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, 5286 NULL, NULL, NULL); 5287 if (ret) 5288 return ret; 5289 5290 return (orig_gen != new_gen) ? 1 : 0; 5291 } 5292 5293 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, 5294 struct btrfs_key *key) 5295 { 5296 struct btrfs_inode_extref *extref; 5297 struct extent_buffer *leaf; 5298 u64 dirid = 0, last_dirid = 0; 5299 unsigned long ptr; 5300 u32 item_size; 5301 u32 cur_offset = 0; 5302 int ref_name_len; 5303 int ret = 0; 5304 5305 /* Easy case, just check this one dirid */ 5306 if (key->type == BTRFS_INODE_REF_KEY) { 5307 dirid = key->offset; 5308 5309 ret = dir_changed(sctx, dirid); 5310 goto out; 5311 } 5312 5313 leaf = path->nodes[0]; 5314 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 5315 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 5316 while (cur_offset < item_size) { 5317 extref = (struct btrfs_inode_extref *)(ptr + 5318 cur_offset); 5319 dirid = btrfs_inode_extref_parent(leaf, extref); 5320 ref_name_len = btrfs_inode_extref_name_len(leaf, extref); 5321 cur_offset += ref_name_len + sizeof(*extref); 5322 if (dirid == last_dirid) 5323 continue; 5324 ret = dir_changed(sctx, dirid); 5325 if (ret) 5326 break; 5327 last_dirid = dirid; 5328 } 5329 out: 5330 return ret; 5331 } 5332 5333 /* 5334 * Updates compare related fields in sctx and simply forwards to the actual 5335 * changed_xxx functions. 5336 */ 5337 static int changed_cb(struct btrfs_root *left_root, 5338 struct btrfs_root *right_root, 5339 struct btrfs_path *left_path, 5340 struct btrfs_path *right_path, 5341 struct btrfs_key *key, 5342 enum btrfs_compare_tree_result result, 5343 void *ctx) 5344 { 5345 int ret = 0; 5346 struct send_ctx *sctx = ctx; 5347 5348 if (result == BTRFS_COMPARE_TREE_SAME) { 5349 if (key->type == BTRFS_INODE_REF_KEY || 5350 key->type == BTRFS_INODE_EXTREF_KEY) { 5351 ret = compare_refs(sctx, left_path, key); 5352 if (!ret) 5353 return 0; 5354 if (ret < 0) 5355 return ret; 5356 } else if (key->type == BTRFS_EXTENT_DATA_KEY) { 5357 return maybe_send_hole(sctx, left_path, key); 5358 } else { 5359 return 0; 5360 } 5361 result = BTRFS_COMPARE_TREE_CHANGED; 5362 ret = 0; 5363 } 5364 5365 sctx->left_path = left_path; 5366 sctx->right_path = right_path; 5367 sctx->cmp_key = key; 5368 5369 ret = finish_inode_if_needed(sctx, 0); 5370 if (ret < 0) 5371 goto out; 5372 5373 /* Ignore non-FS objects */ 5374 if (key->objectid == BTRFS_FREE_INO_OBJECTID || 5375 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 5376 goto out; 5377 5378 if (key->type == BTRFS_INODE_ITEM_KEY) 5379 ret = changed_inode(sctx, result); 5380 else if (key->type == BTRFS_INODE_REF_KEY || 5381 key->type == BTRFS_INODE_EXTREF_KEY) 5382 ret = changed_ref(sctx, result); 5383 else if (key->type == BTRFS_XATTR_ITEM_KEY) 5384 ret = changed_xattr(sctx, result); 5385 else if (key->type == BTRFS_EXTENT_DATA_KEY) 5386 ret = changed_extent(sctx, result); 5387 5388 out: 5389 return ret; 5390 } 5391 5392 static int full_send_tree(struct send_ctx *sctx) 5393 { 5394 int ret; 5395 struct btrfs_root *send_root = sctx->send_root; 5396 struct btrfs_key key; 5397 struct btrfs_key found_key; 5398 struct btrfs_path *path; 5399 struct extent_buffer *eb; 5400 int slot; 5401 5402 path = alloc_path_for_send(); 5403 if (!path) 5404 return -ENOMEM; 5405 5406 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 5407 key.type = BTRFS_INODE_ITEM_KEY; 5408 key.offset = 0; 5409 5410 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 5411 if (ret < 0) 5412 goto out; 5413 if (ret) 5414 goto out_finish; 5415 5416 while (1) { 5417 eb = path->nodes[0]; 5418 slot = path->slots[0]; 5419 btrfs_item_key_to_cpu(eb, &found_key, slot); 5420 5421 ret = changed_cb(send_root, NULL, path, NULL, 5422 &found_key, BTRFS_COMPARE_TREE_NEW, sctx); 5423 if (ret < 0) 5424 goto out; 5425 5426 key.objectid = found_key.objectid; 5427 key.type = found_key.type; 5428 key.offset = found_key.offset + 1; 5429 5430 ret = btrfs_next_item(send_root, path); 5431 if (ret < 0) 5432 goto out; 5433 if (ret) { 5434 ret = 0; 5435 break; 5436 } 5437 } 5438 5439 out_finish: 5440 ret = finish_inode_if_needed(sctx, 1); 5441 5442 out: 5443 btrfs_free_path(path); 5444 return ret; 5445 } 5446 5447 static int send_subvol(struct send_ctx *sctx) 5448 { 5449 int ret; 5450 5451 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) { 5452 ret = send_header(sctx); 5453 if (ret < 0) 5454 goto out; 5455 } 5456 5457 ret = send_subvol_begin(sctx); 5458 if (ret < 0) 5459 goto out; 5460 5461 if (sctx->parent_root) { 5462 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, 5463 changed_cb, sctx); 5464 if (ret < 0) 5465 goto out; 5466 ret = finish_inode_if_needed(sctx, 1); 5467 if (ret < 0) 5468 goto out; 5469 } else { 5470 ret = full_send_tree(sctx); 5471 if (ret < 0) 5472 goto out; 5473 } 5474 5475 out: 5476 free_recorded_refs(sctx); 5477 return ret; 5478 } 5479 5480 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) 5481 { 5482 spin_lock(&root->root_item_lock); 5483 root->send_in_progress--; 5484 /* 5485 * Not much left to do, we don't know why it's unbalanced and 5486 * can't blindly reset it to 0. 5487 */ 5488 if (root->send_in_progress < 0) 5489 btrfs_err(root->fs_info, 5490 "send_in_progres unbalanced %d root %llu\n", 5491 root->send_in_progress, root->root_key.objectid); 5492 spin_unlock(&root->root_item_lock); 5493 } 5494 5495 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5496 { 5497 int ret = 0; 5498 struct btrfs_root *send_root; 5499 struct btrfs_root *clone_root; 5500 struct btrfs_fs_info *fs_info; 5501 struct btrfs_ioctl_send_args *arg = NULL; 5502 struct btrfs_key key; 5503 struct send_ctx *sctx = NULL; 5504 u32 i; 5505 u64 *clone_sources_tmp = NULL; 5506 int clone_sources_to_rollback = 0; 5507 int sort_clone_roots = 0; 5508 int index; 5509 5510 if (!capable(CAP_SYS_ADMIN)) 5511 return -EPERM; 5512 5513 send_root = BTRFS_I(file_inode(mnt_file))->root; 5514 fs_info = send_root->fs_info; 5515 5516 /* 5517 * The subvolume must remain read-only during send, protect against 5518 * making it RW. 5519 */ 5520 spin_lock(&send_root->root_item_lock); 5521 send_root->send_in_progress++; 5522 spin_unlock(&send_root->root_item_lock); 5523 5524 /* 5525 * This is done when we lookup the root, it should already be complete 5526 * by the time we get here. 5527 */ 5528 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5529 5530 /* 5531 * Userspace tools do the checks and warn the user if it's 5532 * not RO. 5533 */ 5534 if (!btrfs_root_readonly(send_root)) { 5535 ret = -EPERM; 5536 goto out; 5537 } 5538 5539 arg = memdup_user(arg_, sizeof(*arg)); 5540 if (IS_ERR(arg)) { 5541 ret = PTR_ERR(arg); 5542 arg = NULL; 5543 goto out; 5544 } 5545 5546 if (!access_ok(VERIFY_READ, arg->clone_sources, 5547 sizeof(*arg->clone_sources) * 5548 arg->clone_sources_count)) { 5549 ret = -EFAULT; 5550 goto out; 5551 } 5552 5553 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { 5554 ret = -EINVAL; 5555 goto out; 5556 } 5557 5558 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); 5559 if (!sctx) { 5560 ret = -ENOMEM; 5561 goto out; 5562 } 5563 5564 INIT_LIST_HEAD(&sctx->new_refs); 5565 INIT_LIST_HEAD(&sctx->deleted_refs); 5566 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); 5567 INIT_LIST_HEAD(&sctx->name_cache_list); 5568 5569 sctx->flags = arg->flags; 5570 5571 sctx->send_filp = fget(arg->send_fd); 5572 if (!sctx->send_filp) { 5573 ret = -EBADF; 5574 goto out; 5575 } 5576 5577 sctx->send_root = send_root; 5578 sctx->clone_roots_cnt = arg->clone_sources_count; 5579 5580 sctx->send_max_size = BTRFS_SEND_BUF_SIZE; 5581 sctx->send_buf = vmalloc(sctx->send_max_size); 5582 if (!sctx->send_buf) { 5583 ret = -ENOMEM; 5584 goto out; 5585 } 5586 5587 sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); 5588 if (!sctx->read_buf) { 5589 ret = -ENOMEM; 5590 goto out; 5591 } 5592 5593 sctx->pending_dir_moves = RB_ROOT; 5594 sctx->waiting_dir_moves = RB_ROOT; 5595 sctx->orphan_dirs = RB_ROOT; 5596 5597 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 5598 (arg->clone_sources_count + 1)); 5599 if (!sctx->clone_roots) { 5600 ret = -ENOMEM; 5601 goto out; 5602 } 5603 5604 if (arg->clone_sources_count) { 5605 clone_sources_tmp = vmalloc(arg->clone_sources_count * 5606 sizeof(*arg->clone_sources)); 5607 if (!clone_sources_tmp) { 5608 ret = -ENOMEM; 5609 goto out; 5610 } 5611 5612 ret = copy_from_user(clone_sources_tmp, arg->clone_sources, 5613 arg->clone_sources_count * 5614 sizeof(*arg->clone_sources)); 5615 if (ret) { 5616 ret = -EFAULT; 5617 goto out; 5618 } 5619 5620 for (i = 0; i < arg->clone_sources_count; i++) { 5621 key.objectid = clone_sources_tmp[i]; 5622 key.type = BTRFS_ROOT_ITEM_KEY; 5623 key.offset = (u64)-1; 5624 5625 index = srcu_read_lock(&fs_info->subvol_srcu); 5626 5627 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 5628 if (IS_ERR(clone_root)) { 5629 srcu_read_unlock(&fs_info->subvol_srcu, index); 5630 ret = PTR_ERR(clone_root); 5631 goto out; 5632 } 5633 clone_sources_to_rollback = i + 1; 5634 spin_lock(&clone_root->root_item_lock); 5635 clone_root->send_in_progress++; 5636 if (!btrfs_root_readonly(clone_root)) { 5637 spin_unlock(&clone_root->root_item_lock); 5638 srcu_read_unlock(&fs_info->subvol_srcu, index); 5639 ret = -EPERM; 5640 goto out; 5641 } 5642 spin_unlock(&clone_root->root_item_lock); 5643 srcu_read_unlock(&fs_info->subvol_srcu, index); 5644 5645 sctx->clone_roots[i].root = clone_root; 5646 } 5647 vfree(clone_sources_tmp); 5648 clone_sources_tmp = NULL; 5649 } 5650 5651 if (arg->parent_root) { 5652 key.objectid = arg->parent_root; 5653 key.type = BTRFS_ROOT_ITEM_KEY; 5654 key.offset = (u64)-1; 5655 5656 index = srcu_read_lock(&fs_info->subvol_srcu); 5657 5658 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 5659 if (IS_ERR(sctx->parent_root)) { 5660 srcu_read_unlock(&fs_info->subvol_srcu, index); 5661 ret = PTR_ERR(sctx->parent_root); 5662 goto out; 5663 } 5664 5665 spin_lock(&sctx->parent_root->root_item_lock); 5666 sctx->parent_root->send_in_progress++; 5667 if (!btrfs_root_readonly(sctx->parent_root)) { 5668 spin_unlock(&sctx->parent_root->root_item_lock); 5669 srcu_read_unlock(&fs_info->subvol_srcu, index); 5670 ret = -EPERM; 5671 goto out; 5672 } 5673 spin_unlock(&sctx->parent_root->root_item_lock); 5674 5675 srcu_read_unlock(&fs_info->subvol_srcu, index); 5676 } 5677 5678 /* 5679 * Clones from send_root are allowed, but only if the clone source 5680 * is behind the current send position. This is checked while searching 5681 * for possible clone sources. 5682 */ 5683 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; 5684 5685 /* We do a bsearch later */ 5686 sort(sctx->clone_roots, sctx->clone_roots_cnt, 5687 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 5688 NULL); 5689 sort_clone_roots = 1; 5690 5691 current->journal_info = (void *)BTRFS_SEND_TRANS_STUB; 5692 ret = send_subvol(sctx); 5693 current->journal_info = NULL; 5694 if (ret < 0) 5695 goto out; 5696 5697 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) { 5698 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 5699 if (ret < 0) 5700 goto out; 5701 ret = send_cmd(sctx); 5702 if (ret < 0) 5703 goto out; 5704 } 5705 5706 out: 5707 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)); 5708 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) { 5709 struct rb_node *n; 5710 struct pending_dir_move *pm; 5711 5712 n = rb_first(&sctx->pending_dir_moves); 5713 pm = rb_entry(n, struct pending_dir_move, node); 5714 while (!list_empty(&pm->list)) { 5715 struct pending_dir_move *pm2; 5716 5717 pm2 = list_first_entry(&pm->list, 5718 struct pending_dir_move, list); 5719 free_pending_move(sctx, pm2); 5720 } 5721 free_pending_move(sctx, pm); 5722 } 5723 5724 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)); 5725 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) { 5726 struct rb_node *n; 5727 struct waiting_dir_move *dm; 5728 5729 n = rb_first(&sctx->waiting_dir_moves); 5730 dm = rb_entry(n, struct waiting_dir_move, node); 5731 rb_erase(&dm->node, &sctx->waiting_dir_moves); 5732 kfree(dm); 5733 } 5734 5735 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); 5736 while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { 5737 struct rb_node *n; 5738 struct orphan_dir_info *odi; 5739 5740 n = rb_first(&sctx->orphan_dirs); 5741 odi = rb_entry(n, struct orphan_dir_info, node); 5742 free_orphan_dir_info(sctx, odi); 5743 } 5744 5745 if (sort_clone_roots) { 5746 for (i = 0; i < sctx->clone_roots_cnt; i++) 5747 btrfs_root_dec_send_in_progress( 5748 sctx->clone_roots[i].root); 5749 } else { 5750 for (i = 0; sctx && i < clone_sources_to_rollback; i++) 5751 btrfs_root_dec_send_in_progress( 5752 sctx->clone_roots[i].root); 5753 5754 btrfs_root_dec_send_in_progress(send_root); 5755 } 5756 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) 5757 btrfs_root_dec_send_in_progress(sctx->parent_root); 5758 5759 kfree(arg); 5760 vfree(clone_sources_tmp); 5761 5762 if (sctx) { 5763 if (sctx->send_filp) 5764 fput(sctx->send_filp); 5765 5766 vfree(sctx->clone_roots); 5767 vfree(sctx->send_buf); 5768 vfree(sctx->read_buf); 5769 5770 name_cache_free(sctx); 5771 5772 kfree(sctx); 5773 } 5774 5775 return ret; 5776 } 5777