1 /* 2 * Copyright (C) 2012 Alexander Block. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/bsearch.h> 20 #include <linux/fs.h> 21 #include <linux/file.h> 22 #include <linux/sort.h> 23 #include <linux/mount.h> 24 #include <linux/xattr.h> 25 #include <linux/posix_acl_xattr.h> 26 #include <linux/radix-tree.h> 27 #include <linux/vmalloc.h> 28 #include <linux/string.h> 29 30 #include "send.h" 31 #include "backref.h" 32 #include "hash.h" 33 #include "locking.h" 34 #include "disk-io.h" 35 #include "btrfs_inode.h" 36 #include "transaction.h" 37 38 static int g_verbose = 0; 39 40 #define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) 41 42 /* 43 * A fs_path is a helper to dynamically build path names with unknown size. 44 * It reallocates the internal buffer on demand. 45 * It allows fast adding of path elements on the right side (normal path) and 46 * fast adding to the left side (reversed path). A reversed path can also be 47 * unreversed if needed. 48 */ 49 struct fs_path { 50 union { 51 struct { 52 char *start; 53 char *end; 54 55 char *buf; 56 unsigned short buf_len:15; 57 unsigned short reversed:1; 58 char inline_buf[]; 59 }; 60 /* 61 * Average path length does not exceed 200 bytes, we'll have 62 * better packing in the slab and higher chance to satisfy 63 * a allocation later during send. 64 */ 65 char pad[256]; 66 }; 67 }; 68 #define FS_PATH_INLINE_SIZE \ 69 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) 70 71 72 /* reused for each extent */ 73 struct clone_root { 74 struct btrfs_root *root; 75 u64 ino; 76 u64 offset; 77 78 u64 found_refs; 79 }; 80 81 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 82 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) 83 84 struct send_ctx { 85 struct file *send_filp; 86 loff_t send_off; 87 char *send_buf; 88 u32 send_size; 89 u32 send_max_size; 90 u64 total_send_size; 91 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 92 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 93 94 struct btrfs_root *send_root; 95 struct btrfs_root *parent_root; 96 struct clone_root *clone_roots; 97 int clone_roots_cnt; 98 99 /* current state of the compare_tree call */ 100 struct btrfs_path *left_path; 101 struct btrfs_path *right_path; 102 struct btrfs_key *cmp_key; 103 104 /* 105 * infos of the currently processed inode. In case of deleted inodes, 106 * these are the values from the deleted inode. 107 */ 108 u64 cur_ino; 109 u64 cur_inode_gen; 110 int cur_inode_new; 111 int cur_inode_new_gen; 112 int cur_inode_deleted; 113 u64 cur_inode_size; 114 u64 cur_inode_mode; 115 u64 cur_inode_rdev; 116 u64 cur_inode_last_extent; 117 118 u64 send_progress; 119 120 struct list_head new_refs; 121 struct list_head deleted_refs; 122 123 struct radix_tree_root name_cache; 124 struct list_head name_cache_list; 125 int name_cache_size; 126 127 struct file_ra_state ra; 128 129 char *read_buf; 130 131 /* 132 * We process inodes by their increasing order, so if before an 133 * incremental send we reverse the parent/child relationship of 134 * directories such that a directory with a lower inode number was 135 * the parent of a directory with a higher inode number, and the one 136 * becoming the new parent got renamed too, we can't rename/move the 137 * directory with lower inode number when we finish processing it - we 138 * must process the directory with higher inode number first, then 139 * rename/move it and then rename/move the directory with lower inode 140 * number. Example follows. 141 * 142 * Tree state when the first send was performed: 143 * 144 * . 145 * |-- a (ino 257) 146 * |-- b (ino 258) 147 * | 148 * | 149 * |-- c (ino 259) 150 * | |-- d (ino 260) 151 * | 152 * |-- c2 (ino 261) 153 * 154 * Tree state when the second (incremental) send is performed: 155 * 156 * . 157 * |-- a (ino 257) 158 * |-- b (ino 258) 159 * |-- c2 (ino 261) 160 * |-- d2 (ino 260) 161 * |-- cc (ino 259) 162 * 163 * The sequence of steps that lead to the second state was: 164 * 165 * mv /a/b/c/d /a/b/c2/d2 166 * mv /a/b/c /a/b/c2/d2/cc 167 * 168 * "c" has lower inode number, but we can't move it (2nd mv operation) 169 * before we move "d", which has higher inode number. 170 * 171 * So we just memorize which move/rename operations must be performed 172 * later when their respective parent is processed and moved/renamed. 173 */ 174 175 /* Indexed by parent directory inode number. */ 176 struct rb_root pending_dir_moves; 177 178 /* 179 * Reverse index, indexed by the inode number of a directory that 180 * is waiting for the move/rename of its immediate parent before its 181 * own move/rename can be performed. 182 */ 183 struct rb_root waiting_dir_moves; 184 185 /* 186 * A directory that is going to be rm'ed might have a child directory 187 * which is in the pending directory moves index above. In this case, 188 * the directory can only be removed after the move/rename of its child 189 * is performed. Example: 190 * 191 * Parent snapshot: 192 * 193 * . (ino 256) 194 * |-- a/ (ino 257) 195 * |-- b/ (ino 258) 196 * |-- c/ (ino 259) 197 * | |-- x/ (ino 260) 198 * | 199 * |-- y/ (ino 261) 200 * 201 * Send snapshot: 202 * 203 * . (ino 256) 204 * |-- a/ (ino 257) 205 * |-- b/ (ino 258) 206 * |-- YY/ (ino 261) 207 * |-- x/ (ino 260) 208 * 209 * Sequence of steps that lead to the send snapshot: 210 * rm -f /a/b/c/foo.txt 211 * mv /a/b/y /a/b/YY 212 * mv /a/b/c/x /a/b/YY 213 * rmdir /a/b/c 214 * 215 * When the child is processed, its move/rename is delayed until its 216 * parent is processed (as explained above), but all other operations 217 * like update utimes, chown, chgrp, etc, are performed and the paths 218 * that it uses for those operations must use the orphanized name of 219 * its parent (the directory we're going to rm later), so we need to 220 * memorize that name. 221 * 222 * Indexed by the inode number of the directory to be deleted. 223 */ 224 struct rb_root orphan_dirs; 225 }; 226 227 struct pending_dir_move { 228 struct rb_node node; 229 struct list_head list; 230 u64 parent_ino; 231 u64 ino; 232 u64 gen; 233 bool is_orphan; 234 struct list_head update_refs; 235 }; 236 237 struct waiting_dir_move { 238 struct rb_node node; 239 u64 ino; 240 /* 241 * There might be some directory that could not be removed because it 242 * was waiting for this directory inode to be moved first. Therefore 243 * after this directory is moved, we can try to rmdir the ino rmdir_ino. 244 */ 245 u64 rmdir_ino; 246 bool orphanized; 247 }; 248 249 struct orphan_dir_info { 250 struct rb_node node; 251 u64 ino; 252 u64 gen; 253 }; 254 255 struct name_cache_entry { 256 struct list_head list; 257 /* 258 * radix_tree has only 32bit entries but we need to handle 64bit inums. 259 * We use the lower 32bit of the 64bit inum to store it in the tree. If 260 * more then one inum would fall into the same entry, we use radix_list 261 * to store the additional entries. radix_list is also used to store 262 * entries where two entries have the same inum but different 263 * generations. 264 */ 265 struct list_head radix_list; 266 u64 ino; 267 u64 gen; 268 u64 parent_ino; 269 u64 parent_gen; 270 int ret; 271 int need_later_update; 272 int name_len; 273 char name[]; 274 }; 275 276 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 277 278 static struct waiting_dir_move * 279 get_waiting_dir_move(struct send_ctx *sctx, u64 ino); 280 281 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); 282 283 static int need_send_hole(struct send_ctx *sctx) 284 { 285 return (sctx->parent_root && !sctx->cur_inode_new && 286 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && 287 S_ISREG(sctx->cur_inode_mode)); 288 } 289 290 static void fs_path_reset(struct fs_path *p) 291 { 292 if (p->reversed) { 293 p->start = p->buf + p->buf_len - 1; 294 p->end = p->start; 295 *p->start = 0; 296 } else { 297 p->start = p->buf; 298 p->end = p->start; 299 *p->start = 0; 300 } 301 } 302 303 static struct fs_path *fs_path_alloc(void) 304 { 305 struct fs_path *p; 306 307 p = kmalloc(sizeof(*p), GFP_NOFS); 308 if (!p) 309 return NULL; 310 p->reversed = 0; 311 p->buf = p->inline_buf; 312 p->buf_len = FS_PATH_INLINE_SIZE; 313 fs_path_reset(p); 314 return p; 315 } 316 317 static struct fs_path *fs_path_alloc_reversed(void) 318 { 319 struct fs_path *p; 320 321 p = fs_path_alloc(); 322 if (!p) 323 return NULL; 324 p->reversed = 1; 325 fs_path_reset(p); 326 return p; 327 } 328 329 static void fs_path_free(struct fs_path *p) 330 { 331 if (!p) 332 return; 333 if (p->buf != p->inline_buf) 334 kfree(p->buf); 335 kfree(p); 336 } 337 338 static int fs_path_len(struct fs_path *p) 339 { 340 return p->end - p->start; 341 } 342 343 static int fs_path_ensure_buf(struct fs_path *p, int len) 344 { 345 char *tmp_buf; 346 int path_len; 347 int old_buf_len; 348 349 len++; 350 351 if (p->buf_len >= len) 352 return 0; 353 354 if (len > PATH_MAX) { 355 WARN_ON(1); 356 return -ENOMEM; 357 } 358 359 path_len = p->end - p->start; 360 old_buf_len = p->buf_len; 361 362 /* 363 * First time the inline_buf does not suffice 364 */ 365 if (p->buf == p->inline_buf) { 366 tmp_buf = kmalloc(len, GFP_NOFS); 367 if (tmp_buf) 368 memcpy(tmp_buf, p->buf, old_buf_len); 369 } else { 370 tmp_buf = krealloc(p->buf, len, GFP_NOFS); 371 } 372 if (!tmp_buf) 373 return -ENOMEM; 374 p->buf = tmp_buf; 375 /* 376 * The real size of the buffer is bigger, this will let the fast path 377 * happen most of the time 378 */ 379 p->buf_len = ksize(p->buf); 380 381 if (p->reversed) { 382 tmp_buf = p->buf + old_buf_len - path_len - 1; 383 p->end = p->buf + p->buf_len - 1; 384 p->start = p->end - path_len; 385 memmove(p->start, tmp_buf, path_len + 1); 386 } else { 387 p->start = p->buf; 388 p->end = p->start + path_len; 389 } 390 return 0; 391 } 392 393 static int fs_path_prepare_for_add(struct fs_path *p, int name_len, 394 char **prepared) 395 { 396 int ret; 397 int new_len; 398 399 new_len = p->end - p->start + name_len; 400 if (p->start != p->end) 401 new_len++; 402 ret = fs_path_ensure_buf(p, new_len); 403 if (ret < 0) 404 goto out; 405 406 if (p->reversed) { 407 if (p->start != p->end) 408 *--p->start = '/'; 409 p->start -= name_len; 410 *prepared = p->start; 411 } else { 412 if (p->start != p->end) 413 *p->end++ = '/'; 414 *prepared = p->end; 415 p->end += name_len; 416 *p->end = 0; 417 } 418 419 out: 420 return ret; 421 } 422 423 static int fs_path_add(struct fs_path *p, const char *name, int name_len) 424 { 425 int ret; 426 char *prepared; 427 428 ret = fs_path_prepare_for_add(p, name_len, &prepared); 429 if (ret < 0) 430 goto out; 431 memcpy(prepared, name, name_len); 432 433 out: 434 return ret; 435 } 436 437 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) 438 { 439 int ret; 440 char *prepared; 441 442 ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); 443 if (ret < 0) 444 goto out; 445 memcpy(prepared, p2->start, p2->end - p2->start); 446 447 out: 448 return ret; 449 } 450 451 static int fs_path_add_from_extent_buffer(struct fs_path *p, 452 struct extent_buffer *eb, 453 unsigned long off, int len) 454 { 455 int ret; 456 char *prepared; 457 458 ret = fs_path_prepare_for_add(p, len, &prepared); 459 if (ret < 0) 460 goto out; 461 462 read_extent_buffer(eb, prepared, off, len); 463 464 out: 465 return ret; 466 } 467 468 static int fs_path_copy(struct fs_path *p, struct fs_path *from) 469 { 470 int ret; 471 472 p->reversed = from->reversed; 473 fs_path_reset(p); 474 475 ret = fs_path_add_path(p, from); 476 477 return ret; 478 } 479 480 481 static void fs_path_unreverse(struct fs_path *p) 482 { 483 char *tmp; 484 int len; 485 486 if (!p->reversed) 487 return; 488 489 tmp = p->start; 490 len = p->end - p->start; 491 p->start = p->buf; 492 p->end = p->start + len; 493 memmove(p->start, tmp, len + 1); 494 p->reversed = 0; 495 } 496 497 static struct btrfs_path *alloc_path_for_send(void) 498 { 499 struct btrfs_path *path; 500 501 path = btrfs_alloc_path(); 502 if (!path) 503 return NULL; 504 path->search_commit_root = 1; 505 path->skip_locking = 1; 506 path->need_commit_sem = 1; 507 return path; 508 } 509 510 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 511 { 512 int ret; 513 mm_segment_t old_fs; 514 u32 pos = 0; 515 516 old_fs = get_fs(); 517 set_fs(KERNEL_DS); 518 519 while (pos < len) { 520 ret = vfs_write(filp, (__force const char __user *)buf + pos, 521 len - pos, off); 522 /* TODO handle that correctly */ 523 /*if (ret == -ERESTARTSYS) { 524 continue; 525 }*/ 526 if (ret < 0) 527 goto out; 528 if (ret == 0) { 529 ret = -EIO; 530 goto out; 531 } 532 pos += ret; 533 } 534 535 ret = 0; 536 537 out: 538 set_fs(old_fs); 539 return ret; 540 } 541 542 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) 543 { 544 struct btrfs_tlv_header *hdr; 545 int total_len = sizeof(*hdr) + len; 546 int left = sctx->send_max_size - sctx->send_size; 547 548 if (unlikely(left < total_len)) 549 return -EOVERFLOW; 550 551 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); 552 hdr->tlv_type = cpu_to_le16(attr); 553 hdr->tlv_len = cpu_to_le16(len); 554 memcpy(hdr + 1, data, len); 555 sctx->send_size += total_len; 556 557 return 0; 558 } 559 560 #define TLV_PUT_DEFINE_INT(bits) \ 561 static int tlv_put_u##bits(struct send_ctx *sctx, \ 562 u##bits attr, u##bits value) \ 563 { \ 564 __le##bits __tmp = cpu_to_le##bits(value); \ 565 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ 566 } 567 568 TLV_PUT_DEFINE_INT(64) 569 570 static int tlv_put_string(struct send_ctx *sctx, u16 attr, 571 const char *str, int len) 572 { 573 if (len == -1) 574 len = strlen(str); 575 return tlv_put(sctx, attr, str, len); 576 } 577 578 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, 579 const u8 *uuid) 580 { 581 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 582 } 583 584 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 585 struct extent_buffer *eb, 586 struct btrfs_timespec *ts) 587 { 588 struct btrfs_timespec bts; 589 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); 590 return tlv_put(sctx, attr, &bts, sizeof(bts)); 591 } 592 593 594 #define TLV_PUT(sctx, attrtype, attrlen, data) \ 595 do { \ 596 ret = tlv_put(sctx, attrtype, attrlen, data); \ 597 if (ret < 0) \ 598 goto tlv_put_failure; \ 599 } while (0) 600 601 #define TLV_PUT_INT(sctx, attrtype, bits, value) \ 602 do { \ 603 ret = tlv_put_u##bits(sctx, attrtype, value); \ 604 if (ret < 0) \ 605 goto tlv_put_failure; \ 606 } while (0) 607 608 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) 609 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) 610 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) 611 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) 612 #define TLV_PUT_STRING(sctx, attrtype, str, len) \ 613 do { \ 614 ret = tlv_put_string(sctx, attrtype, str, len); \ 615 if (ret < 0) \ 616 goto tlv_put_failure; \ 617 } while (0) 618 #define TLV_PUT_PATH(sctx, attrtype, p) \ 619 do { \ 620 ret = tlv_put_string(sctx, attrtype, p->start, \ 621 p->end - p->start); \ 622 if (ret < 0) \ 623 goto tlv_put_failure; \ 624 } while(0) 625 #define TLV_PUT_UUID(sctx, attrtype, uuid) \ 626 do { \ 627 ret = tlv_put_uuid(sctx, attrtype, uuid); \ 628 if (ret < 0) \ 629 goto tlv_put_failure; \ 630 } while (0) 631 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 632 do { \ 633 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 634 if (ret < 0) \ 635 goto tlv_put_failure; \ 636 } while (0) 637 638 static int send_header(struct send_ctx *sctx) 639 { 640 struct btrfs_stream_header hdr; 641 642 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); 643 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); 644 645 return write_buf(sctx->send_filp, &hdr, sizeof(hdr), 646 &sctx->send_off); 647 } 648 649 /* 650 * For each command/item we want to send to userspace, we call this function. 651 */ 652 static int begin_cmd(struct send_ctx *sctx, int cmd) 653 { 654 struct btrfs_cmd_header *hdr; 655 656 if (WARN_ON(!sctx->send_buf)) 657 return -EINVAL; 658 659 BUG_ON(sctx->send_size); 660 661 sctx->send_size += sizeof(*hdr); 662 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 663 hdr->cmd = cpu_to_le16(cmd); 664 665 return 0; 666 } 667 668 static int send_cmd(struct send_ctx *sctx) 669 { 670 int ret; 671 struct btrfs_cmd_header *hdr; 672 u32 crc; 673 674 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 675 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 676 hdr->crc = 0; 677 678 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 679 hdr->crc = cpu_to_le32(crc); 680 681 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 682 &sctx->send_off); 683 684 sctx->total_send_size += sctx->send_size; 685 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; 686 sctx->send_size = 0; 687 688 return ret; 689 } 690 691 /* 692 * Sends a move instruction to user space 693 */ 694 static int send_rename(struct send_ctx *sctx, 695 struct fs_path *from, struct fs_path *to) 696 { 697 int ret; 698 699 verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); 700 701 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); 702 if (ret < 0) 703 goto out; 704 705 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); 706 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); 707 708 ret = send_cmd(sctx); 709 710 tlv_put_failure: 711 out: 712 return ret; 713 } 714 715 /* 716 * Sends a link instruction to user space 717 */ 718 static int send_link(struct send_ctx *sctx, 719 struct fs_path *path, struct fs_path *lnk) 720 { 721 int ret; 722 723 verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); 724 725 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); 726 if (ret < 0) 727 goto out; 728 729 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 730 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); 731 732 ret = send_cmd(sctx); 733 734 tlv_put_failure: 735 out: 736 return ret; 737 } 738 739 /* 740 * Sends an unlink instruction to user space 741 */ 742 static int send_unlink(struct send_ctx *sctx, struct fs_path *path) 743 { 744 int ret; 745 746 verbose_printk("btrfs: send_unlink %s\n", path->start); 747 748 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); 749 if (ret < 0) 750 goto out; 751 752 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 753 754 ret = send_cmd(sctx); 755 756 tlv_put_failure: 757 out: 758 return ret; 759 } 760 761 /* 762 * Sends a rmdir instruction to user space 763 */ 764 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) 765 { 766 int ret; 767 768 verbose_printk("btrfs: send_rmdir %s\n", path->start); 769 770 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); 771 if (ret < 0) 772 goto out; 773 774 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 775 776 ret = send_cmd(sctx); 777 778 tlv_put_failure: 779 out: 780 return ret; 781 } 782 783 /* 784 * Helper function to retrieve some fields from an inode item. 785 */ 786 static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path, 787 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid, 788 u64 *gid, u64 *rdev) 789 { 790 int ret; 791 struct btrfs_inode_item *ii; 792 struct btrfs_key key; 793 794 key.objectid = ino; 795 key.type = BTRFS_INODE_ITEM_KEY; 796 key.offset = 0; 797 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 798 if (ret) { 799 if (ret > 0) 800 ret = -ENOENT; 801 return ret; 802 } 803 804 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 805 struct btrfs_inode_item); 806 if (size) 807 *size = btrfs_inode_size(path->nodes[0], ii); 808 if (gen) 809 *gen = btrfs_inode_generation(path->nodes[0], ii); 810 if (mode) 811 *mode = btrfs_inode_mode(path->nodes[0], ii); 812 if (uid) 813 *uid = btrfs_inode_uid(path->nodes[0], ii); 814 if (gid) 815 *gid = btrfs_inode_gid(path->nodes[0], ii); 816 if (rdev) 817 *rdev = btrfs_inode_rdev(path->nodes[0], ii); 818 819 return ret; 820 } 821 822 static int get_inode_info(struct btrfs_root *root, 823 u64 ino, u64 *size, u64 *gen, 824 u64 *mode, u64 *uid, u64 *gid, 825 u64 *rdev) 826 { 827 struct btrfs_path *path; 828 int ret; 829 830 path = alloc_path_for_send(); 831 if (!path) 832 return -ENOMEM; 833 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid, 834 rdev); 835 btrfs_free_path(path); 836 return ret; 837 } 838 839 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, 840 struct fs_path *p, 841 void *ctx); 842 843 /* 844 * Helper function to iterate the entries in ONE btrfs_inode_ref or 845 * btrfs_inode_extref. 846 * The iterate callback may return a non zero value to stop iteration. This can 847 * be a negative value for error codes or 1 to simply stop it. 848 * 849 * path must point to the INODE_REF or INODE_EXTREF when called. 850 */ 851 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, 852 struct btrfs_key *found_key, int resolve, 853 iterate_inode_ref_t iterate, void *ctx) 854 { 855 struct extent_buffer *eb = path->nodes[0]; 856 struct btrfs_item *item; 857 struct btrfs_inode_ref *iref; 858 struct btrfs_inode_extref *extref; 859 struct btrfs_path *tmp_path; 860 struct fs_path *p; 861 u32 cur = 0; 862 u32 total; 863 int slot = path->slots[0]; 864 u32 name_len; 865 char *start; 866 int ret = 0; 867 int num = 0; 868 int index; 869 u64 dir; 870 unsigned long name_off; 871 unsigned long elem_size; 872 unsigned long ptr; 873 874 p = fs_path_alloc_reversed(); 875 if (!p) 876 return -ENOMEM; 877 878 tmp_path = alloc_path_for_send(); 879 if (!tmp_path) { 880 fs_path_free(p); 881 return -ENOMEM; 882 } 883 884 885 if (found_key->type == BTRFS_INODE_REF_KEY) { 886 ptr = (unsigned long)btrfs_item_ptr(eb, slot, 887 struct btrfs_inode_ref); 888 item = btrfs_item_nr(slot); 889 total = btrfs_item_size(eb, item); 890 elem_size = sizeof(*iref); 891 } else { 892 ptr = btrfs_item_ptr_offset(eb, slot); 893 total = btrfs_item_size_nr(eb, slot); 894 elem_size = sizeof(*extref); 895 } 896 897 while (cur < total) { 898 fs_path_reset(p); 899 900 if (found_key->type == BTRFS_INODE_REF_KEY) { 901 iref = (struct btrfs_inode_ref *)(ptr + cur); 902 name_len = btrfs_inode_ref_name_len(eb, iref); 903 name_off = (unsigned long)(iref + 1); 904 index = btrfs_inode_ref_index(eb, iref); 905 dir = found_key->offset; 906 } else { 907 extref = (struct btrfs_inode_extref *)(ptr + cur); 908 name_len = btrfs_inode_extref_name_len(eb, extref); 909 name_off = (unsigned long)&extref->name; 910 index = btrfs_inode_extref_index(eb, extref); 911 dir = btrfs_inode_extref_parent(eb, extref); 912 } 913 914 if (resolve) { 915 start = btrfs_ref_to_path(root, tmp_path, name_len, 916 name_off, eb, dir, 917 p->buf, p->buf_len); 918 if (IS_ERR(start)) { 919 ret = PTR_ERR(start); 920 goto out; 921 } 922 if (start < p->buf) { 923 /* overflow , try again with larger buffer */ 924 ret = fs_path_ensure_buf(p, 925 p->buf_len + p->buf - start); 926 if (ret < 0) 927 goto out; 928 start = btrfs_ref_to_path(root, tmp_path, 929 name_len, name_off, 930 eb, dir, 931 p->buf, p->buf_len); 932 if (IS_ERR(start)) { 933 ret = PTR_ERR(start); 934 goto out; 935 } 936 BUG_ON(start < p->buf); 937 } 938 p->start = start; 939 } else { 940 ret = fs_path_add_from_extent_buffer(p, eb, name_off, 941 name_len); 942 if (ret < 0) 943 goto out; 944 } 945 946 cur += elem_size + name_len; 947 ret = iterate(num, dir, index, p, ctx); 948 if (ret) 949 goto out; 950 num++; 951 } 952 953 out: 954 btrfs_free_path(tmp_path); 955 fs_path_free(p); 956 return ret; 957 } 958 959 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, 960 const char *name, int name_len, 961 const char *data, int data_len, 962 u8 type, void *ctx); 963 964 /* 965 * Helper function to iterate the entries in ONE btrfs_dir_item. 966 * The iterate callback may return a non zero value to stop iteration. This can 967 * be a negative value for error codes or 1 to simply stop it. 968 * 969 * path must point to the dir item when called. 970 */ 971 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, 972 struct btrfs_key *found_key, 973 iterate_dir_item_t iterate, void *ctx) 974 { 975 int ret = 0; 976 struct extent_buffer *eb; 977 struct btrfs_item *item; 978 struct btrfs_dir_item *di; 979 struct btrfs_key di_key; 980 char *buf = NULL; 981 int buf_len; 982 u32 name_len; 983 u32 data_len; 984 u32 cur; 985 u32 len; 986 u32 total; 987 int slot; 988 int num; 989 u8 type; 990 991 /* 992 * Start with a small buffer (1 page). If later we end up needing more 993 * space, which can happen for xattrs on a fs with a leaf size greater 994 * then the page size, attempt to increase the buffer. Typically xattr 995 * values are small. 996 */ 997 buf_len = PATH_MAX; 998 buf = kmalloc(buf_len, GFP_NOFS); 999 if (!buf) { 1000 ret = -ENOMEM; 1001 goto out; 1002 } 1003 1004 eb = path->nodes[0]; 1005 slot = path->slots[0]; 1006 item = btrfs_item_nr(slot); 1007 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 1008 cur = 0; 1009 len = 0; 1010 total = btrfs_item_size(eb, item); 1011 1012 num = 0; 1013 while (cur < total) { 1014 name_len = btrfs_dir_name_len(eb, di); 1015 data_len = btrfs_dir_data_len(eb, di); 1016 type = btrfs_dir_type(eb, di); 1017 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 1018 1019 if (type == BTRFS_FT_XATTR) { 1020 if (name_len > XATTR_NAME_MAX) { 1021 ret = -ENAMETOOLONG; 1022 goto out; 1023 } 1024 if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) { 1025 ret = -E2BIG; 1026 goto out; 1027 } 1028 } else { 1029 /* 1030 * Path too long 1031 */ 1032 if (name_len + data_len > PATH_MAX) { 1033 ret = -ENAMETOOLONG; 1034 goto out; 1035 } 1036 } 1037 1038 if (name_len + data_len > buf_len) { 1039 buf_len = name_len + data_len; 1040 if (is_vmalloc_addr(buf)) { 1041 vfree(buf); 1042 buf = NULL; 1043 } else { 1044 char *tmp = krealloc(buf, buf_len, 1045 GFP_NOFS | __GFP_NOWARN); 1046 1047 if (!tmp) 1048 kfree(buf); 1049 buf = tmp; 1050 } 1051 if (!buf) { 1052 buf = vmalloc(buf_len); 1053 if (!buf) { 1054 ret = -ENOMEM; 1055 goto out; 1056 } 1057 } 1058 } 1059 1060 read_extent_buffer(eb, buf, (unsigned long)(di + 1), 1061 name_len + data_len); 1062 1063 len = sizeof(*di) + name_len + data_len; 1064 di = (struct btrfs_dir_item *)((char *)di + len); 1065 cur += len; 1066 1067 ret = iterate(num, &di_key, buf, name_len, buf + name_len, 1068 data_len, type, ctx); 1069 if (ret < 0) 1070 goto out; 1071 if (ret) { 1072 ret = 0; 1073 goto out; 1074 } 1075 1076 num++; 1077 } 1078 1079 out: 1080 kvfree(buf); 1081 return ret; 1082 } 1083 1084 static int __copy_first_ref(int num, u64 dir, int index, 1085 struct fs_path *p, void *ctx) 1086 { 1087 int ret; 1088 struct fs_path *pt = ctx; 1089 1090 ret = fs_path_copy(pt, p); 1091 if (ret < 0) 1092 return ret; 1093 1094 /* we want the first only */ 1095 return 1; 1096 } 1097 1098 /* 1099 * Retrieve the first path of an inode. If an inode has more then one 1100 * ref/hardlink, this is ignored. 1101 */ 1102 static int get_inode_path(struct btrfs_root *root, 1103 u64 ino, struct fs_path *path) 1104 { 1105 int ret; 1106 struct btrfs_key key, found_key; 1107 struct btrfs_path *p; 1108 1109 p = alloc_path_for_send(); 1110 if (!p) 1111 return -ENOMEM; 1112 1113 fs_path_reset(path); 1114 1115 key.objectid = ino; 1116 key.type = BTRFS_INODE_REF_KEY; 1117 key.offset = 0; 1118 1119 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); 1120 if (ret < 0) 1121 goto out; 1122 if (ret) { 1123 ret = 1; 1124 goto out; 1125 } 1126 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1127 if (found_key.objectid != ino || 1128 (found_key.type != BTRFS_INODE_REF_KEY && 1129 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1130 ret = -ENOENT; 1131 goto out; 1132 } 1133 1134 ret = iterate_inode_ref(root, p, &found_key, 1, 1135 __copy_first_ref, path); 1136 if (ret < 0) 1137 goto out; 1138 ret = 0; 1139 1140 out: 1141 btrfs_free_path(p); 1142 return ret; 1143 } 1144 1145 struct backref_ctx { 1146 struct send_ctx *sctx; 1147 1148 struct btrfs_path *path; 1149 /* number of total found references */ 1150 u64 found; 1151 1152 /* 1153 * used for clones found in send_root. clones found behind cur_objectid 1154 * and cur_offset are not considered as allowed clones. 1155 */ 1156 u64 cur_objectid; 1157 u64 cur_offset; 1158 1159 /* may be truncated in case it's the last extent in a file */ 1160 u64 extent_len; 1161 1162 /* data offset in the file extent item */ 1163 u64 data_offset; 1164 1165 /* Just to check for bugs in backref resolving */ 1166 int found_itself; 1167 }; 1168 1169 static int __clone_root_cmp_bsearch(const void *key, const void *elt) 1170 { 1171 u64 root = (u64)(uintptr_t)key; 1172 struct clone_root *cr = (struct clone_root *)elt; 1173 1174 if (root < cr->root->objectid) 1175 return -1; 1176 if (root > cr->root->objectid) 1177 return 1; 1178 return 0; 1179 } 1180 1181 static int __clone_root_cmp_sort(const void *e1, const void *e2) 1182 { 1183 struct clone_root *cr1 = (struct clone_root *)e1; 1184 struct clone_root *cr2 = (struct clone_root *)e2; 1185 1186 if (cr1->root->objectid < cr2->root->objectid) 1187 return -1; 1188 if (cr1->root->objectid > cr2->root->objectid) 1189 return 1; 1190 return 0; 1191 } 1192 1193 /* 1194 * Called for every backref that is found for the current extent. 1195 * Results are collected in sctx->clone_roots->ino/offset/found_refs 1196 */ 1197 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) 1198 { 1199 struct backref_ctx *bctx = ctx_; 1200 struct clone_root *found; 1201 int ret; 1202 u64 i_size; 1203 1204 /* First check if the root is in the list of accepted clone sources */ 1205 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, 1206 bctx->sctx->clone_roots_cnt, 1207 sizeof(struct clone_root), 1208 __clone_root_cmp_bsearch); 1209 if (!found) 1210 return 0; 1211 1212 if (found->root == bctx->sctx->send_root && 1213 ino == bctx->cur_objectid && 1214 offset == bctx->cur_offset) { 1215 bctx->found_itself = 1; 1216 } 1217 1218 /* 1219 * There are inodes that have extents that lie behind its i_size. Don't 1220 * accept clones from these extents. 1221 */ 1222 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, 1223 NULL, NULL, NULL); 1224 btrfs_release_path(bctx->path); 1225 if (ret < 0) 1226 return ret; 1227 1228 if (offset + bctx->data_offset + bctx->extent_len > i_size) 1229 return 0; 1230 1231 /* 1232 * Make sure we don't consider clones from send_root that are 1233 * behind the current inode/offset. 1234 */ 1235 if (found->root == bctx->sctx->send_root) { 1236 /* 1237 * TODO for the moment we don't accept clones from the inode 1238 * that is currently send. We may change this when 1239 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same 1240 * file. 1241 */ 1242 if (ino >= bctx->cur_objectid) 1243 return 0; 1244 #if 0 1245 if (ino > bctx->cur_objectid) 1246 return 0; 1247 if (offset + bctx->extent_len > bctx->cur_offset) 1248 return 0; 1249 #endif 1250 } 1251 1252 bctx->found++; 1253 found->found_refs++; 1254 if (ino < found->ino) { 1255 found->ino = ino; 1256 found->offset = offset; 1257 } else if (found->ino == ino) { 1258 /* 1259 * same extent found more then once in the same file. 1260 */ 1261 if (found->offset > offset + bctx->extent_len) 1262 found->offset = offset; 1263 } 1264 1265 return 0; 1266 } 1267 1268 /* 1269 * Given an inode, offset and extent item, it finds a good clone for a clone 1270 * instruction. Returns -ENOENT when none could be found. The function makes 1271 * sure that the returned clone is usable at the point where sending is at the 1272 * moment. This means, that no clones are accepted which lie behind the current 1273 * inode+offset. 1274 * 1275 * path must point to the extent item when called. 1276 */ 1277 static int find_extent_clone(struct send_ctx *sctx, 1278 struct btrfs_path *path, 1279 u64 ino, u64 data_offset, 1280 u64 ino_size, 1281 struct clone_root **found) 1282 { 1283 int ret; 1284 int extent_type; 1285 u64 logical; 1286 u64 disk_byte; 1287 u64 num_bytes; 1288 u64 extent_item_pos; 1289 u64 flags = 0; 1290 struct btrfs_file_extent_item *fi; 1291 struct extent_buffer *eb = path->nodes[0]; 1292 struct backref_ctx *backref_ctx = NULL; 1293 struct clone_root *cur_clone_root; 1294 struct btrfs_key found_key; 1295 struct btrfs_path *tmp_path; 1296 int compressed; 1297 u32 i; 1298 1299 tmp_path = alloc_path_for_send(); 1300 if (!tmp_path) 1301 return -ENOMEM; 1302 1303 /* We only use this path under the commit sem */ 1304 tmp_path->need_commit_sem = 0; 1305 1306 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); 1307 if (!backref_ctx) { 1308 ret = -ENOMEM; 1309 goto out; 1310 } 1311 1312 backref_ctx->path = tmp_path; 1313 1314 if (data_offset >= ino_size) { 1315 /* 1316 * There may be extents that lie behind the file's size. 1317 * I at least had this in combination with snapshotting while 1318 * writing large files. 1319 */ 1320 ret = 0; 1321 goto out; 1322 } 1323 1324 fi = btrfs_item_ptr(eb, path->slots[0], 1325 struct btrfs_file_extent_item); 1326 extent_type = btrfs_file_extent_type(eb, fi); 1327 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1328 ret = -ENOENT; 1329 goto out; 1330 } 1331 compressed = btrfs_file_extent_compression(eb, fi); 1332 1333 num_bytes = btrfs_file_extent_num_bytes(eb, fi); 1334 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 1335 if (disk_byte == 0) { 1336 ret = -ENOENT; 1337 goto out; 1338 } 1339 logical = disk_byte + btrfs_file_extent_offset(eb, fi); 1340 1341 down_read(&sctx->send_root->fs_info->commit_root_sem); 1342 ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, 1343 &found_key, &flags); 1344 up_read(&sctx->send_root->fs_info->commit_root_sem); 1345 btrfs_release_path(tmp_path); 1346 1347 if (ret < 0) 1348 goto out; 1349 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1350 ret = -EIO; 1351 goto out; 1352 } 1353 1354 /* 1355 * Setup the clone roots. 1356 */ 1357 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1358 cur_clone_root = sctx->clone_roots + i; 1359 cur_clone_root->ino = (u64)-1; 1360 cur_clone_root->offset = 0; 1361 cur_clone_root->found_refs = 0; 1362 } 1363 1364 backref_ctx->sctx = sctx; 1365 backref_ctx->found = 0; 1366 backref_ctx->cur_objectid = ino; 1367 backref_ctx->cur_offset = data_offset; 1368 backref_ctx->found_itself = 0; 1369 backref_ctx->extent_len = num_bytes; 1370 /* 1371 * For non-compressed extents iterate_extent_inodes() gives us extent 1372 * offsets that already take into account the data offset, but not for 1373 * compressed extents, since the offset is logical and not relative to 1374 * the physical extent locations. We must take this into account to 1375 * avoid sending clone offsets that go beyond the source file's size, 1376 * which would result in the clone ioctl failing with -EINVAL on the 1377 * receiving end. 1378 */ 1379 if (compressed == BTRFS_COMPRESS_NONE) 1380 backref_ctx->data_offset = 0; 1381 else 1382 backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi); 1383 1384 /* 1385 * The last extent of a file may be too large due to page alignment. 1386 * We need to adjust extent_len in this case so that the checks in 1387 * __iterate_backrefs work. 1388 */ 1389 if (data_offset + num_bytes >= ino_size) 1390 backref_ctx->extent_len = ino_size - data_offset; 1391 1392 /* 1393 * Now collect all backrefs. 1394 */ 1395 if (compressed == BTRFS_COMPRESS_NONE) 1396 extent_item_pos = logical - found_key.objectid; 1397 else 1398 extent_item_pos = 0; 1399 ret = iterate_extent_inodes(sctx->send_root->fs_info, 1400 found_key.objectid, extent_item_pos, 1, 1401 __iterate_backrefs, backref_ctx); 1402 1403 if (ret < 0) 1404 goto out; 1405 1406 if (!backref_ctx->found_itself) { 1407 /* found a bug in backref code? */ 1408 ret = -EIO; 1409 btrfs_err(sctx->send_root->fs_info, "did not find backref in " 1410 "send_root. inode=%llu, offset=%llu, " 1411 "disk_byte=%llu found extent=%llu", 1412 ino, data_offset, disk_byte, found_key.objectid); 1413 goto out; 1414 } 1415 1416 verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " 1417 "ino=%llu, " 1418 "num_bytes=%llu, logical=%llu\n", 1419 data_offset, ino, num_bytes, logical); 1420 1421 if (!backref_ctx->found) 1422 verbose_printk("btrfs: no clones found\n"); 1423 1424 cur_clone_root = NULL; 1425 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1426 if (sctx->clone_roots[i].found_refs) { 1427 if (!cur_clone_root) 1428 cur_clone_root = sctx->clone_roots + i; 1429 else if (sctx->clone_roots[i].root == sctx->send_root) 1430 /* prefer clones from send_root over others */ 1431 cur_clone_root = sctx->clone_roots + i; 1432 } 1433 1434 } 1435 1436 if (cur_clone_root) { 1437 *found = cur_clone_root; 1438 ret = 0; 1439 } else { 1440 ret = -ENOENT; 1441 } 1442 1443 out: 1444 btrfs_free_path(tmp_path); 1445 kfree(backref_ctx); 1446 return ret; 1447 } 1448 1449 static int read_symlink(struct btrfs_root *root, 1450 u64 ino, 1451 struct fs_path *dest) 1452 { 1453 int ret; 1454 struct btrfs_path *path; 1455 struct btrfs_key key; 1456 struct btrfs_file_extent_item *ei; 1457 u8 type; 1458 u8 compression; 1459 unsigned long off; 1460 int len; 1461 1462 path = alloc_path_for_send(); 1463 if (!path) 1464 return -ENOMEM; 1465 1466 key.objectid = ino; 1467 key.type = BTRFS_EXTENT_DATA_KEY; 1468 key.offset = 0; 1469 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1470 if (ret < 0) 1471 goto out; 1472 BUG_ON(ret); 1473 1474 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 1475 struct btrfs_file_extent_item); 1476 type = btrfs_file_extent_type(path->nodes[0], ei); 1477 compression = btrfs_file_extent_compression(path->nodes[0], ei); 1478 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); 1479 BUG_ON(compression); 1480 1481 off = btrfs_file_extent_inline_start(ei); 1482 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1483 1484 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1485 1486 out: 1487 btrfs_free_path(path); 1488 return ret; 1489 } 1490 1491 /* 1492 * Helper function to generate a file name that is unique in the root of 1493 * send_root and parent_root. This is used to generate names for orphan inodes. 1494 */ 1495 static int gen_unique_name(struct send_ctx *sctx, 1496 u64 ino, u64 gen, 1497 struct fs_path *dest) 1498 { 1499 int ret = 0; 1500 struct btrfs_path *path; 1501 struct btrfs_dir_item *di; 1502 char tmp[64]; 1503 int len; 1504 u64 idx = 0; 1505 1506 path = alloc_path_for_send(); 1507 if (!path) 1508 return -ENOMEM; 1509 1510 while (1) { 1511 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", 1512 ino, gen, idx); 1513 ASSERT(len < sizeof(tmp)); 1514 1515 di = btrfs_lookup_dir_item(NULL, sctx->send_root, 1516 path, BTRFS_FIRST_FREE_OBJECTID, 1517 tmp, strlen(tmp), 0); 1518 btrfs_release_path(path); 1519 if (IS_ERR(di)) { 1520 ret = PTR_ERR(di); 1521 goto out; 1522 } 1523 if (di) { 1524 /* not unique, try again */ 1525 idx++; 1526 continue; 1527 } 1528 1529 if (!sctx->parent_root) { 1530 /* unique */ 1531 ret = 0; 1532 break; 1533 } 1534 1535 di = btrfs_lookup_dir_item(NULL, sctx->parent_root, 1536 path, BTRFS_FIRST_FREE_OBJECTID, 1537 tmp, strlen(tmp), 0); 1538 btrfs_release_path(path); 1539 if (IS_ERR(di)) { 1540 ret = PTR_ERR(di); 1541 goto out; 1542 } 1543 if (di) { 1544 /* not unique, try again */ 1545 idx++; 1546 continue; 1547 } 1548 /* unique */ 1549 break; 1550 } 1551 1552 ret = fs_path_add(dest, tmp, strlen(tmp)); 1553 1554 out: 1555 btrfs_free_path(path); 1556 return ret; 1557 } 1558 1559 enum inode_state { 1560 inode_state_no_change, 1561 inode_state_will_create, 1562 inode_state_did_create, 1563 inode_state_will_delete, 1564 inode_state_did_delete, 1565 }; 1566 1567 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) 1568 { 1569 int ret; 1570 int left_ret; 1571 int right_ret; 1572 u64 left_gen; 1573 u64 right_gen; 1574 1575 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, 1576 NULL, NULL); 1577 if (ret < 0 && ret != -ENOENT) 1578 goto out; 1579 left_ret = ret; 1580 1581 if (!sctx->parent_root) { 1582 right_ret = -ENOENT; 1583 } else { 1584 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, 1585 NULL, NULL, NULL, NULL); 1586 if (ret < 0 && ret != -ENOENT) 1587 goto out; 1588 right_ret = ret; 1589 } 1590 1591 if (!left_ret && !right_ret) { 1592 if (left_gen == gen && right_gen == gen) { 1593 ret = inode_state_no_change; 1594 } else if (left_gen == gen) { 1595 if (ino < sctx->send_progress) 1596 ret = inode_state_did_create; 1597 else 1598 ret = inode_state_will_create; 1599 } else if (right_gen == gen) { 1600 if (ino < sctx->send_progress) 1601 ret = inode_state_did_delete; 1602 else 1603 ret = inode_state_will_delete; 1604 } else { 1605 ret = -ENOENT; 1606 } 1607 } else if (!left_ret) { 1608 if (left_gen == gen) { 1609 if (ino < sctx->send_progress) 1610 ret = inode_state_did_create; 1611 else 1612 ret = inode_state_will_create; 1613 } else { 1614 ret = -ENOENT; 1615 } 1616 } else if (!right_ret) { 1617 if (right_gen == gen) { 1618 if (ino < sctx->send_progress) 1619 ret = inode_state_did_delete; 1620 else 1621 ret = inode_state_will_delete; 1622 } else { 1623 ret = -ENOENT; 1624 } 1625 } else { 1626 ret = -ENOENT; 1627 } 1628 1629 out: 1630 return ret; 1631 } 1632 1633 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) 1634 { 1635 int ret; 1636 1637 ret = get_cur_inode_state(sctx, ino, gen); 1638 if (ret < 0) 1639 goto out; 1640 1641 if (ret == inode_state_no_change || 1642 ret == inode_state_did_create || 1643 ret == inode_state_will_delete) 1644 ret = 1; 1645 else 1646 ret = 0; 1647 1648 out: 1649 return ret; 1650 } 1651 1652 /* 1653 * Helper function to lookup a dir item in a dir. 1654 */ 1655 static int lookup_dir_item_inode(struct btrfs_root *root, 1656 u64 dir, const char *name, int name_len, 1657 u64 *found_inode, 1658 u8 *found_type) 1659 { 1660 int ret = 0; 1661 struct btrfs_dir_item *di; 1662 struct btrfs_key key; 1663 struct btrfs_path *path; 1664 1665 path = alloc_path_for_send(); 1666 if (!path) 1667 return -ENOMEM; 1668 1669 di = btrfs_lookup_dir_item(NULL, root, path, 1670 dir, name, name_len, 0); 1671 if (!di) { 1672 ret = -ENOENT; 1673 goto out; 1674 } 1675 if (IS_ERR(di)) { 1676 ret = PTR_ERR(di); 1677 goto out; 1678 } 1679 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1680 if (key.type == BTRFS_ROOT_ITEM_KEY) { 1681 ret = -ENOENT; 1682 goto out; 1683 } 1684 *found_inode = key.objectid; 1685 *found_type = btrfs_dir_type(path->nodes[0], di); 1686 1687 out: 1688 btrfs_free_path(path); 1689 return ret; 1690 } 1691 1692 /* 1693 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, 1694 * generation of the parent dir and the name of the dir entry. 1695 */ 1696 static int get_first_ref(struct btrfs_root *root, u64 ino, 1697 u64 *dir, u64 *dir_gen, struct fs_path *name) 1698 { 1699 int ret; 1700 struct btrfs_key key; 1701 struct btrfs_key found_key; 1702 struct btrfs_path *path; 1703 int len; 1704 u64 parent_dir; 1705 1706 path = alloc_path_for_send(); 1707 if (!path) 1708 return -ENOMEM; 1709 1710 key.objectid = ino; 1711 key.type = BTRFS_INODE_REF_KEY; 1712 key.offset = 0; 1713 1714 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 1715 if (ret < 0) 1716 goto out; 1717 if (!ret) 1718 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1719 path->slots[0]); 1720 if (ret || found_key.objectid != ino || 1721 (found_key.type != BTRFS_INODE_REF_KEY && 1722 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1723 ret = -ENOENT; 1724 goto out; 1725 } 1726 1727 if (found_key.type == BTRFS_INODE_REF_KEY) { 1728 struct btrfs_inode_ref *iref; 1729 iref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1730 struct btrfs_inode_ref); 1731 len = btrfs_inode_ref_name_len(path->nodes[0], iref); 1732 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1733 (unsigned long)(iref + 1), 1734 len); 1735 parent_dir = found_key.offset; 1736 } else { 1737 struct btrfs_inode_extref *extref; 1738 extref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1739 struct btrfs_inode_extref); 1740 len = btrfs_inode_extref_name_len(path->nodes[0], extref); 1741 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1742 (unsigned long)&extref->name, len); 1743 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); 1744 } 1745 if (ret < 0) 1746 goto out; 1747 btrfs_release_path(path); 1748 1749 if (dir_gen) { 1750 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, 1751 NULL, NULL, NULL); 1752 if (ret < 0) 1753 goto out; 1754 } 1755 1756 *dir = parent_dir; 1757 1758 out: 1759 btrfs_free_path(path); 1760 return ret; 1761 } 1762 1763 static int is_first_ref(struct btrfs_root *root, 1764 u64 ino, u64 dir, 1765 const char *name, int name_len) 1766 { 1767 int ret; 1768 struct fs_path *tmp_name; 1769 u64 tmp_dir; 1770 1771 tmp_name = fs_path_alloc(); 1772 if (!tmp_name) 1773 return -ENOMEM; 1774 1775 ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name); 1776 if (ret < 0) 1777 goto out; 1778 1779 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { 1780 ret = 0; 1781 goto out; 1782 } 1783 1784 ret = !memcmp(tmp_name->start, name, name_len); 1785 1786 out: 1787 fs_path_free(tmp_name); 1788 return ret; 1789 } 1790 1791 /* 1792 * Used by process_recorded_refs to determine if a new ref would overwrite an 1793 * already existing ref. In case it detects an overwrite, it returns the 1794 * inode/gen in who_ino/who_gen. 1795 * When an overwrite is detected, process_recorded_refs does proper orphanizing 1796 * to make sure later references to the overwritten inode are possible. 1797 * Orphanizing is however only required for the first ref of an inode. 1798 * process_recorded_refs does an additional is_first_ref check to see if 1799 * orphanizing is really required. 1800 */ 1801 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, 1802 const char *name, int name_len, 1803 u64 *who_ino, u64 *who_gen) 1804 { 1805 int ret = 0; 1806 u64 gen; 1807 u64 other_inode = 0; 1808 u8 other_type = 0; 1809 1810 if (!sctx->parent_root) 1811 goto out; 1812 1813 ret = is_inode_existent(sctx, dir, dir_gen); 1814 if (ret <= 0) 1815 goto out; 1816 1817 /* 1818 * If we have a parent root we need to verify that the parent dir was 1819 * not delted and then re-created, if it was then we have no overwrite 1820 * and we can just unlink this entry. 1821 */ 1822 if (sctx->parent_root) { 1823 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, 1824 NULL, NULL, NULL); 1825 if (ret < 0 && ret != -ENOENT) 1826 goto out; 1827 if (ret) { 1828 ret = 0; 1829 goto out; 1830 } 1831 if (gen != dir_gen) 1832 goto out; 1833 } 1834 1835 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1836 &other_inode, &other_type); 1837 if (ret < 0 && ret != -ENOENT) 1838 goto out; 1839 if (ret) { 1840 ret = 0; 1841 goto out; 1842 } 1843 1844 /* 1845 * Check if the overwritten ref was already processed. If yes, the ref 1846 * was already unlinked/moved, so we can safely assume that we will not 1847 * overwrite anything at this point in time. 1848 */ 1849 if (other_inode > sctx->send_progress) { 1850 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1851 who_gen, NULL, NULL, NULL, NULL); 1852 if (ret < 0) 1853 goto out; 1854 1855 ret = 1; 1856 *who_ino = other_inode; 1857 } else { 1858 ret = 0; 1859 } 1860 1861 out: 1862 return ret; 1863 } 1864 1865 /* 1866 * Checks if the ref was overwritten by an already processed inode. This is 1867 * used by __get_cur_name_and_parent to find out if the ref was orphanized and 1868 * thus the orphan name needs be used. 1869 * process_recorded_refs also uses it to avoid unlinking of refs that were 1870 * overwritten. 1871 */ 1872 static int did_overwrite_ref(struct send_ctx *sctx, 1873 u64 dir, u64 dir_gen, 1874 u64 ino, u64 ino_gen, 1875 const char *name, int name_len) 1876 { 1877 int ret = 0; 1878 u64 gen; 1879 u64 ow_inode; 1880 u8 other_type; 1881 1882 if (!sctx->parent_root) 1883 goto out; 1884 1885 ret = is_inode_existent(sctx, dir, dir_gen); 1886 if (ret <= 0) 1887 goto out; 1888 1889 /* check if the ref was overwritten by another ref */ 1890 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, 1891 &ow_inode, &other_type); 1892 if (ret < 0 && ret != -ENOENT) 1893 goto out; 1894 if (ret) { 1895 /* was never and will never be overwritten */ 1896 ret = 0; 1897 goto out; 1898 } 1899 1900 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, 1901 NULL, NULL); 1902 if (ret < 0) 1903 goto out; 1904 1905 if (ow_inode == ino && gen == ino_gen) { 1906 ret = 0; 1907 goto out; 1908 } 1909 1910 /* 1911 * We know that it is or will be overwritten. Check this now. 1912 * The current inode being processed might have been the one that caused 1913 * inode 'ino' to be orphanized, therefore check if ow_inode matches 1914 * the current inode being processed. 1915 */ 1916 if ((ow_inode < sctx->send_progress) || 1917 (ino != sctx->cur_ino && ow_inode == sctx->cur_ino && 1918 gen == sctx->cur_inode_gen)) 1919 ret = 1; 1920 else 1921 ret = 0; 1922 1923 out: 1924 return ret; 1925 } 1926 1927 /* 1928 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode 1929 * that got overwritten. This is used by process_recorded_refs to determine 1930 * if it has to use the path as returned by get_cur_path or the orphan name. 1931 */ 1932 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) 1933 { 1934 int ret = 0; 1935 struct fs_path *name = NULL; 1936 u64 dir; 1937 u64 dir_gen; 1938 1939 if (!sctx->parent_root) 1940 goto out; 1941 1942 name = fs_path_alloc(); 1943 if (!name) 1944 return -ENOMEM; 1945 1946 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); 1947 if (ret < 0) 1948 goto out; 1949 1950 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, 1951 name->start, fs_path_len(name)); 1952 1953 out: 1954 fs_path_free(name); 1955 return ret; 1956 } 1957 1958 /* 1959 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, 1960 * so we need to do some special handling in case we have clashes. This function 1961 * takes care of this with the help of name_cache_entry::radix_list. 1962 * In case of error, nce is kfreed. 1963 */ 1964 static int name_cache_insert(struct send_ctx *sctx, 1965 struct name_cache_entry *nce) 1966 { 1967 int ret = 0; 1968 struct list_head *nce_head; 1969 1970 nce_head = radix_tree_lookup(&sctx->name_cache, 1971 (unsigned long)nce->ino); 1972 if (!nce_head) { 1973 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1974 if (!nce_head) { 1975 kfree(nce); 1976 return -ENOMEM; 1977 } 1978 INIT_LIST_HEAD(nce_head); 1979 1980 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1981 if (ret < 0) { 1982 kfree(nce_head); 1983 kfree(nce); 1984 return ret; 1985 } 1986 } 1987 list_add_tail(&nce->radix_list, nce_head); 1988 list_add_tail(&nce->list, &sctx->name_cache_list); 1989 sctx->name_cache_size++; 1990 1991 return ret; 1992 } 1993 1994 static void name_cache_delete(struct send_ctx *sctx, 1995 struct name_cache_entry *nce) 1996 { 1997 struct list_head *nce_head; 1998 1999 nce_head = radix_tree_lookup(&sctx->name_cache, 2000 (unsigned long)nce->ino); 2001 if (!nce_head) { 2002 btrfs_err(sctx->send_root->fs_info, 2003 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", 2004 nce->ino, sctx->name_cache_size); 2005 } 2006 2007 list_del(&nce->radix_list); 2008 list_del(&nce->list); 2009 sctx->name_cache_size--; 2010 2011 /* 2012 * We may not get to the final release of nce_head if the lookup fails 2013 */ 2014 if (nce_head && list_empty(nce_head)) { 2015 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); 2016 kfree(nce_head); 2017 } 2018 } 2019 2020 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, 2021 u64 ino, u64 gen) 2022 { 2023 struct list_head *nce_head; 2024 struct name_cache_entry *cur; 2025 2026 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); 2027 if (!nce_head) 2028 return NULL; 2029 2030 list_for_each_entry(cur, nce_head, radix_list) { 2031 if (cur->ino == ino && cur->gen == gen) 2032 return cur; 2033 } 2034 return NULL; 2035 } 2036 2037 /* 2038 * Removes the entry from the list and adds it back to the end. This marks the 2039 * entry as recently used so that name_cache_clean_unused does not remove it. 2040 */ 2041 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) 2042 { 2043 list_del(&nce->list); 2044 list_add_tail(&nce->list, &sctx->name_cache_list); 2045 } 2046 2047 /* 2048 * Remove some entries from the beginning of name_cache_list. 2049 */ 2050 static void name_cache_clean_unused(struct send_ctx *sctx) 2051 { 2052 struct name_cache_entry *nce; 2053 2054 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) 2055 return; 2056 2057 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { 2058 nce = list_entry(sctx->name_cache_list.next, 2059 struct name_cache_entry, list); 2060 name_cache_delete(sctx, nce); 2061 kfree(nce); 2062 } 2063 } 2064 2065 static void name_cache_free(struct send_ctx *sctx) 2066 { 2067 struct name_cache_entry *nce; 2068 2069 while (!list_empty(&sctx->name_cache_list)) { 2070 nce = list_entry(sctx->name_cache_list.next, 2071 struct name_cache_entry, list); 2072 name_cache_delete(sctx, nce); 2073 kfree(nce); 2074 } 2075 } 2076 2077 /* 2078 * Used by get_cur_path for each ref up to the root. 2079 * Returns 0 if it succeeded. 2080 * Returns 1 if the inode is not existent or got overwritten. In that case, the 2081 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 2082 * is returned, parent_ino/parent_gen are not guaranteed to be valid. 2083 * Returns <0 in case of error. 2084 */ 2085 static int __get_cur_name_and_parent(struct send_ctx *sctx, 2086 u64 ino, u64 gen, 2087 u64 *parent_ino, 2088 u64 *parent_gen, 2089 struct fs_path *dest) 2090 { 2091 int ret; 2092 int nce_ret; 2093 struct name_cache_entry *nce = NULL; 2094 2095 /* 2096 * First check if we already did a call to this function with the same 2097 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 2098 * return the cached result. 2099 */ 2100 nce = name_cache_search(sctx, ino, gen); 2101 if (nce) { 2102 if (ino < sctx->send_progress && nce->need_later_update) { 2103 name_cache_delete(sctx, nce); 2104 kfree(nce); 2105 nce = NULL; 2106 } else { 2107 name_cache_used(sctx, nce); 2108 *parent_ino = nce->parent_ino; 2109 *parent_gen = nce->parent_gen; 2110 ret = fs_path_add(dest, nce->name, nce->name_len); 2111 if (ret < 0) 2112 goto out; 2113 ret = nce->ret; 2114 goto out; 2115 } 2116 } 2117 2118 /* 2119 * If the inode is not existent yet, add the orphan name and return 1. 2120 * This should only happen for the parent dir that we determine in 2121 * __record_new_ref 2122 */ 2123 ret = is_inode_existent(sctx, ino, gen); 2124 if (ret < 0) 2125 goto out; 2126 2127 if (!ret) { 2128 ret = gen_unique_name(sctx, ino, gen, dest); 2129 if (ret < 0) 2130 goto out; 2131 ret = 1; 2132 goto out_cache; 2133 } 2134 2135 /* 2136 * Depending on whether the inode was already processed or not, use 2137 * send_root or parent_root for ref lookup. 2138 */ 2139 if (ino < sctx->send_progress) 2140 ret = get_first_ref(sctx->send_root, ino, 2141 parent_ino, parent_gen, dest); 2142 else 2143 ret = get_first_ref(sctx->parent_root, ino, 2144 parent_ino, parent_gen, dest); 2145 if (ret < 0) 2146 goto out; 2147 2148 /* 2149 * Check if the ref was overwritten by an inode's ref that was processed 2150 * earlier. If yes, treat as orphan and return 1. 2151 */ 2152 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, 2153 dest->start, dest->end - dest->start); 2154 if (ret < 0) 2155 goto out; 2156 if (ret) { 2157 fs_path_reset(dest); 2158 ret = gen_unique_name(sctx, ino, gen, dest); 2159 if (ret < 0) 2160 goto out; 2161 ret = 1; 2162 } 2163 2164 out_cache: 2165 /* 2166 * Store the result of the lookup in the name cache. 2167 */ 2168 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); 2169 if (!nce) { 2170 ret = -ENOMEM; 2171 goto out; 2172 } 2173 2174 nce->ino = ino; 2175 nce->gen = gen; 2176 nce->parent_ino = *parent_ino; 2177 nce->parent_gen = *parent_gen; 2178 nce->name_len = fs_path_len(dest); 2179 nce->ret = ret; 2180 strcpy(nce->name, dest->start); 2181 2182 if (ino < sctx->send_progress) 2183 nce->need_later_update = 0; 2184 else 2185 nce->need_later_update = 1; 2186 2187 nce_ret = name_cache_insert(sctx, nce); 2188 if (nce_ret < 0) 2189 ret = nce_ret; 2190 name_cache_clean_unused(sctx); 2191 2192 out: 2193 return ret; 2194 } 2195 2196 /* 2197 * Magic happens here. This function returns the first ref to an inode as it 2198 * would look like while receiving the stream at this point in time. 2199 * We walk the path up to the root. For every inode in between, we check if it 2200 * was already processed/sent. If yes, we continue with the parent as found 2201 * in send_root. If not, we continue with the parent as found in parent_root. 2202 * If we encounter an inode that was deleted at this point in time, we use the 2203 * inodes "orphan" name instead of the real name and stop. Same with new inodes 2204 * that were not created yet and overwritten inodes/refs. 2205 * 2206 * When do we have have orphan inodes: 2207 * 1. When an inode is freshly created and thus no valid refs are available yet 2208 * 2. When a directory lost all it's refs (deleted) but still has dir items 2209 * inside which were not processed yet (pending for move/delete). If anyone 2210 * tried to get the path to the dir items, it would get a path inside that 2211 * orphan directory. 2212 * 3. When an inode is moved around or gets new links, it may overwrite the ref 2213 * of an unprocessed inode. If in that case the first ref would be 2214 * overwritten, the overwritten inode gets "orphanized". Later when we 2215 * process this overwritten inode, it is restored at a new place by moving 2216 * the orphan inode. 2217 * 2218 * sctx->send_progress tells this function at which point in time receiving 2219 * would be. 2220 */ 2221 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, 2222 struct fs_path *dest) 2223 { 2224 int ret = 0; 2225 struct fs_path *name = NULL; 2226 u64 parent_inode = 0; 2227 u64 parent_gen = 0; 2228 int stop = 0; 2229 2230 name = fs_path_alloc(); 2231 if (!name) { 2232 ret = -ENOMEM; 2233 goto out; 2234 } 2235 2236 dest->reversed = 1; 2237 fs_path_reset(dest); 2238 2239 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2240 struct waiting_dir_move *wdm; 2241 2242 fs_path_reset(name); 2243 2244 if (is_waiting_for_rm(sctx, ino)) { 2245 ret = gen_unique_name(sctx, ino, gen, name); 2246 if (ret < 0) 2247 goto out; 2248 ret = fs_path_add_path(dest, name); 2249 break; 2250 } 2251 2252 wdm = get_waiting_dir_move(sctx, ino); 2253 if (wdm && wdm->orphanized) { 2254 ret = gen_unique_name(sctx, ino, gen, name); 2255 stop = 1; 2256 } else if (wdm) { 2257 ret = get_first_ref(sctx->parent_root, ino, 2258 &parent_inode, &parent_gen, name); 2259 } else { 2260 ret = __get_cur_name_and_parent(sctx, ino, gen, 2261 &parent_inode, 2262 &parent_gen, name); 2263 if (ret) 2264 stop = 1; 2265 } 2266 2267 if (ret < 0) 2268 goto out; 2269 2270 ret = fs_path_add_path(dest, name); 2271 if (ret < 0) 2272 goto out; 2273 2274 ino = parent_inode; 2275 gen = parent_gen; 2276 } 2277 2278 out: 2279 fs_path_free(name); 2280 if (!ret) 2281 fs_path_unreverse(dest); 2282 return ret; 2283 } 2284 2285 /* 2286 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace 2287 */ 2288 static int send_subvol_begin(struct send_ctx *sctx) 2289 { 2290 int ret; 2291 struct btrfs_root *send_root = sctx->send_root; 2292 struct btrfs_root *parent_root = sctx->parent_root; 2293 struct btrfs_path *path; 2294 struct btrfs_key key; 2295 struct btrfs_root_ref *ref; 2296 struct extent_buffer *leaf; 2297 char *name = NULL; 2298 int namelen; 2299 2300 path = btrfs_alloc_path(); 2301 if (!path) 2302 return -ENOMEM; 2303 2304 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); 2305 if (!name) { 2306 btrfs_free_path(path); 2307 return -ENOMEM; 2308 } 2309 2310 key.objectid = send_root->objectid; 2311 key.type = BTRFS_ROOT_BACKREF_KEY; 2312 key.offset = 0; 2313 2314 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, 2315 &key, path, 1, 0); 2316 if (ret < 0) 2317 goto out; 2318 if (ret) { 2319 ret = -ENOENT; 2320 goto out; 2321 } 2322 2323 leaf = path->nodes[0]; 2324 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2325 if (key.type != BTRFS_ROOT_BACKREF_KEY || 2326 key.objectid != send_root->objectid) { 2327 ret = -ENOENT; 2328 goto out; 2329 } 2330 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 2331 namelen = btrfs_root_ref_name_len(leaf, ref); 2332 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); 2333 btrfs_release_path(path); 2334 2335 if (parent_root) { 2336 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); 2337 if (ret < 0) 2338 goto out; 2339 } else { 2340 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); 2341 if (ret < 0) 2342 goto out; 2343 } 2344 2345 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); 2346 2347 if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid)) 2348 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2349 sctx->send_root->root_item.received_uuid); 2350 else 2351 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2352 sctx->send_root->root_item.uuid); 2353 2354 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2355 le64_to_cpu(sctx->send_root->root_item.ctransid)); 2356 if (parent_root) { 2357 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid)) 2358 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2359 parent_root->root_item.received_uuid); 2360 else 2361 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2362 parent_root->root_item.uuid); 2363 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2364 le64_to_cpu(sctx->parent_root->root_item.ctransid)); 2365 } 2366 2367 ret = send_cmd(sctx); 2368 2369 tlv_put_failure: 2370 out: 2371 btrfs_free_path(path); 2372 kfree(name); 2373 return ret; 2374 } 2375 2376 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) 2377 { 2378 int ret = 0; 2379 struct fs_path *p; 2380 2381 verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); 2382 2383 p = fs_path_alloc(); 2384 if (!p) 2385 return -ENOMEM; 2386 2387 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); 2388 if (ret < 0) 2389 goto out; 2390 2391 ret = get_cur_path(sctx, ino, gen, p); 2392 if (ret < 0) 2393 goto out; 2394 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2395 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); 2396 2397 ret = send_cmd(sctx); 2398 2399 tlv_put_failure: 2400 out: 2401 fs_path_free(p); 2402 return ret; 2403 } 2404 2405 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) 2406 { 2407 int ret = 0; 2408 struct fs_path *p; 2409 2410 verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); 2411 2412 p = fs_path_alloc(); 2413 if (!p) 2414 return -ENOMEM; 2415 2416 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); 2417 if (ret < 0) 2418 goto out; 2419 2420 ret = get_cur_path(sctx, ino, gen, p); 2421 if (ret < 0) 2422 goto out; 2423 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2424 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); 2425 2426 ret = send_cmd(sctx); 2427 2428 tlv_put_failure: 2429 out: 2430 fs_path_free(p); 2431 return ret; 2432 } 2433 2434 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) 2435 { 2436 int ret = 0; 2437 struct fs_path *p; 2438 2439 verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); 2440 2441 p = fs_path_alloc(); 2442 if (!p) 2443 return -ENOMEM; 2444 2445 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); 2446 if (ret < 0) 2447 goto out; 2448 2449 ret = get_cur_path(sctx, ino, gen, p); 2450 if (ret < 0) 2451 goto out; 2452 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2453 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); 2454 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); 2455 2456 ret = send_cmd(sctx); 2457 2458 tlv_put_failure: 2459 out: 2460 fs_path_free(p); 2461 return ret; 2462 } 2463 2464 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) 2465 { 2466 int ret = 0; 2467 struct fs_path *p = NULL; 2468 struct btrfs_inode_item *ii; 2469 struct btrfs_path *path = NULL; 2470 struct extent_buffer *eb; 2471 struct btrfs_key key; 2472 int slot; 2473 2474 verbose_printk("btrfs: send_utimes %llu\n", ino); 2475 2476 p = fs_path_alloc(); 2477 if (!p) 2478 return -ENOMEM; 2479 2480 path = alloc_path_for_send(); 2481 if (!path) { 2482 ret = -ENOMEM; 2483 goto out; 2484 } 2485 2486 key.objectid = ino; 2487 key.type = BTRFS_INODE_ITEM_KEY; 2488 key.offset = 0; 2489 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2490 if (ret < 0) 2491 goto out; 2492 2493 eb = path->nodes[0]; 2494 slot = path->slots[0]; 2495 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 2496 2497 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); 2498 if (ret < 0) 2499 goto out; 2500 2501 ret = get_cur_path(sctx, ino, gen, p); 2502 if (ret < 0) 2503 goto out; 2504 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2505 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime); 2506 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime); 2507 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime); 2508 /* TODO Add otime support when the otime patches get into upstream */ 2509 2510 ret = send_cmd(sctx); 2511 2512 tlv_put_failure: 2513 out: 2514 fs_path_free(p); 2515 btrfs_free_path(path); 2516 return ret; 2517 } 2518 2519 /* 2520 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have 2521 * a valid path yet because we did not process the refs yet. So, the inode 2522 * is created as orphan. 2523 */ 2524 static int send_create_inode(struct send_ctx *sctx, u64 ino) 2525 { 2526 int ret = 0; 2527 struct fs_path *p; 2528 int cmd; 2529 u64 gen; 2530 u64 mode; 2531 u64 rdev; 2532 2533 verbose_printk("btrfs: send_create_inode %llu\n", ino); 2534 2535 p = fs_path_alloc(); 2536 if (!p) 2537 return -ENOMEM; 2538 2539 if (ino != sctx->cur_ino) { 2540 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, 2541 NULL, NULL, &rdev); 2542 if (ret < 0) 2543 goto out; 2544 } else { 2545 gen = sctx->cur_inode_gen; 2546 mode = sctx->cur_inode_mode; 2547 rdev = sctx->cur_inode_rdev; 2548 } 2549 2550 if (S_ISREG(mode)) { 2551 cmd = BTRFS_SEND_C_MKFILE; 2552 } else if (S_ISDIR(mode)) { 2553 cmd = BTRFS_SEND_C_MKDIR; 2554 } else if (S_ISLNK(mode)) { 2555 cmd = BTRFS_SEND_C_SYMLINK; 2556 } else if (S_ISCHR(mode) || S_ISBLK(mode)) { 2557 cmd = BTRFS_SEND_C_MKNOD; 2558 } else if (S_ISFIFO(mode)) { 2559 cmd = BTRFS_SEND_C_MKFIFO; 2560 } else if (S_ISSOCK(mode)) { 2561 cmd = BTRFS_SEND_C_MKSOCK; 2562 } else { 2563 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o", 2564 (int)(mode & S_IFMT)); 2565 ret = -ENOTSUPP; 2566 goto out; 2567 } 2568 2569 ret = begin_cmd(sctx, cmd); 2570 if (ret < 0) 2571 goto out; 2572 2573 ret = gen_unique_name(sctx, ino, gen, p); 2574 if (ret < 0) 2575 goto out; 2576 2577 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2578 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); 2579 2580 if (S_ISLNK(mode)) { 2581 fs_path_reset(p); 2582 ret = read_symlink(sctx->send_root, ino, p); 2583 if (ret < 0) 2584 goto out; 2585 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); 2586 } else if (S_ISCHR(mode) || S_ISBLK(mode) || 2587 S_ISFIFO(mode) || S_ISSOCK(mode)) { 2588 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); 2589 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); 2590 } 2591 2592 ret = send_cmd(sctx); 2593 if (ret < 0) 2594 goto out; 2595 2596 2597 tlv_put_failure: 2598 out: 2599 fs_path_free(p); 2600 return ret; 2601 } 2602 2603 /* 2604 * We need some special handling for inodes that get processed before the parent 2605 * directory got created. See process_recorded_refs for details. 2606 * This function does the check if we already created the dir out of order. 2607 */ 2608 static int did_create_dir(struct send_ctx *sctx, u64 dir) 2609 { 2610 int ret = 0; 2611 struct btrfs_path *path = NULL; 2612 struct btrfs_key key; 2613 struct btrfs_key found_key; 2614 struct btrfs_key di_key; 2615 struct extent_buffer *eb; 2616 struct btrfs_dir_item *di; 2617 int slot; 2618 2619 path = alloc_path_for_send(); 2620 if (!path) { 2621 ret = -ENOMEM; 2622 goto out; 2623 } 2624 2625 key.objectid = dir; 2626 key.type = BTRFS_DIR_INDEX_KEY; 2627 key.offset = 0; 2628 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2629 if (ret < 0) 2630 goto out; 2631 2632 while (1) { 2633 eb = path->nodes[0]; 2634 slot = path->slots[0]; 2635 if (slot >= btrfs_header_nritems(eb)) { 2636 ret = btrfs_next_leaf(sctx->send_root, path); 2637 if (ret < 0) { 2638 goto out; 2639 } else if (ret > 0) { 2640 ret = 0; 2641 break; 2642 } 2643 continue; 2644 } 2645 2646 btrfs_item_key_to_cpu(eb, &found_key, slot); 2647 if (found_key.objectid != key.objectid || 2648 found_key.type != key.type) { 2649 ret = 0; 2650 goto out; 2651 } 2652 2653 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2654 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2655 2656 if (di_key.type != BTRFS_ROOT_ITEM_KEY && 2657 di_key.objectid < sctx->send_progress) { 2658 ret = 1; 2659 goto out; 2660 } 2661 2662 path->slots[0]++; 2663 } 2664 2665 out: 2666 btrfs_free_path(path); 2667 return ret; 2668 } 2669 2670 /* 2671 * Only creates the inode if it is: 2672 * 1. Not a directory 2673 * 2. Or a directory which was not created already due to out of order 2674 * directories. See did_create_dir and process_recorded_refs for details. 2675 */ 2676 static int send_create_inode_if_needed(struct send_ctx *sctx) 2677 { 2678 int ret; 2679 2680 if (S_ISDIR(sctx->cur_inode_mode)) { 2681 ret = did_create_dir(sctx, sctx->cur_ino); 2682 if (ret < 0) 2683 goto out; 2684 if (ret) { 2685 ret = 0; 2686 goto out; 2687 } 2688 } 2689 2690 ret = send_create_inode(sctx, sctx->cur_ino); 2691 if (ret < 0) 2692 goto out; 2693 2694 out: 2695 return ret; 2696 } 2697 2698 struct recorded_ref { 2699 struct list_head list; 2700 char *dir_path; 2701 char *name; 2702 struct fs_path *full_path; 2703 u64 dir; 2704 u64 dir_gen; 2705 int dir_path_len; 2706 int name_len; 2707 }; 2708 2709 /* 2710 * We need to process new refs before deleted refs, but compare_tree gives us 2711 * everything mixed. So we first record all refs and later process them. 2712 * This function is a helper to record one ref. 2713 */ 2714 static int __record_ref(struct list_head *head, u64 dir, 2715 u64 dir_gen, struct fs_path *path) 2716 { 2717 struct recorded_ref *ref; 2718 2719 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2720 if (!ref) 2721 return -ENOMEM; 2722 2723 ref->dir = dir; 2724 ref->dir_gen = dir_gen; 2725 ref->full_path = path; 2726 2727 ref->name = (char *)kbasename(ref->full_path->start); 2728 ref->name_len = ref->full_path->end - ref->name; 2729 ref->dir_path = ref->full_path->start; 2730 if (ref->name == ref->full_path->start) 2731 ref->dir_path_len = 0; 2732 else 2733 ref->dir_path_len = ref->full_path->end - 2734 ref->full_path->start - 1 - ref->name_len; 2735 2736 list_add_tail(&ref->list, head); 2737 return 0; 2738 } 2739 2740 static int dup_ref(struct recorded_ref *ref, struct list_head *list) 2741 { 2742 struct recorded_ref *new; 2743 2744 new = kmalloc(sizeof(*ref), GFP_NOFS); 2745 if (!new) 2746 return -ENOMEM; 2747 2748 new->dir = ref->dir; 2749 new->dir_gen = ref->dir_gen; 2750 new->full_path = NULL; 2751 INIT_LIST_HEAD(&new->list); 2752 list_add_tail(&new->list, list); 2753 return 0; 2754 } 2755 2756 static void __free_recorded_refs(struct list_head *head) 2757 { 2758 struct recorded_ref *cur; 2759 2760 while (!list_empty(head)) { 2761 cur = list_entry(head->next, struct recorded_ref, list); 2762 fs_path_free(cur->full_path); 2763 list_del(&cur->list); 2764 kfree(cur); 2765 } 2766 } 2767 2768 static void free_recorded_refs(struct send_ctx *sctx) 2769 { 2770 __free_recorded_refs(&sctx->new_refs); 2771 __free_recorded_refs(&sctx->deleted_refs); 2772 } 2773 2774 /* 2775 * Renames/moves a file/dir to its orphan name. Used when the first 2776 * ref of an unprocessed inode gets overwritten and for all non empty 2777 * directories. 2778 */ 2779 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, 2780 struct fs_path *path) 2781 { 2782 int ret; 2783 struct fs_path *orphan; 2784 2785 orphan = fs_path_alloc(); 2786 if (!orphan) 2787 return -ENOMEM; 2788 2789 ret = gen_unique_name(sctx, ino, gen, orphan); 2790 if (ret < 0) 2791 goto out; 2792 2793 ret = send_rename(sctx, path, orphan); 2794 2795 out: 2796 fs_path_free(orphan); 2797 return ret; 2798 } 2799 2800 static struct orphan_dir_info * 2801 add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2802 { 2803 struct rb_node **p = &sctx->orphan_dirs.rb_node; 2804 struct rb_node *parent = NULL; 2805 struct orphan_dir_info *entry, *odi; 2806 2807 odi = kmalloc(sizeof(*odi), GFP_NOFS); 2808 if (!odi) 2809 return ERR_PTR(-ENOMEM); 2810 odi->ino = dir_ino; 2811 odi->gen = 0; 2812 2813 while (*p) { 2814 parent = *p; 2815 entry = rb_entry(parent, struct orphan_dir_info, node); 2816 if (dir_ino < entry->ino) { 2817 p = &(*p)->rb_left; 2818 } else if (dir_ino > entry->ino) { 2819 p = &(*p)->rb_right; 2820 } else { 2821 kfree(odi); 2822 return entry; 2823 } 2824 } 2825 2826 rb_link_node(&odi->node, parent, p); 2827 rb_insert_color(&odi->node, &sctx->orphan_dirs); 2828 return odi; 2829 } 2830 2831 static struct orphan_dir_info * 2832 get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2833 { 2834 struct rb_node *n = sctx->orphan_dirs.rb_node; 2835 struct orphan_dir_info *entry; 2836 2837 while (n) { 2838 entry = rb_entry(n, struct orphan_dir_info, node); 2839 if (dir_ino < entry->ino) 2840 n = n->rb_left; 2841 else if (dir_ino > entry->ino) 2842 n = n->rb_right; 2843 else 2844 return entry; 2845 } 2846 return NULL; 2847 } 2848 2849 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) 2850 { 2851 struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); 2852 2853 return odi != NULL; 2854 } 2855 2856 static void free_orphan_dir_info(struct send_ctx *sctx, 2857 struct orphan_dir_info *odi) 2858 { 2859 if (!odi) 2860 return; 2861 rb_erase(&odi->node, &sctx->orphan_dirs); 2862 kfree(odi); 2863 } 2864 2865 /* 2866 * Returns 1 if a directory can be removed at this point in time. 2867 * We check this by iterating all dir items and checking if the inode behind 2868 * the dir item was already processed. 2869 */ 2870 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, 2871 u64 send_progress) 2872 { 2873 int ret = 0; 2874 struct btrfs_root *root = sctx->parent_root; 2875 struct btrfs_path *path; 2876 struct btrfs_key key; 2877 struct btrfs_key found_key; 2878 struct btrfs_key loc; 2879 struct btrfs_dir_item *di; 2880 2881 /* 2882 * Don't try to rmdir the top/root subvolume dir. 2883 */ 2884 if (dir == BTRFS_FIRST_FREE_OBJECTID) 2885 return 0; 2886 2887 path = alloc_path_for_send(); 2888 if (!path) 2889 return -ENOMEM; 2890 2891 key.objectid = dir; 2892 key.type = BTRFS_DIR_INDEX_KEY; 2893 key.offset = 0; 2894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2895 if (ret < 0) 2896 goto out; 2897 2898 while (1) { 2899 struct waiting_dir_move *dm; 2900 2901 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 2902 ret = btrfs_next_leaf(root, path); 2903 if (ret < 0) 2904 goto out; 2905 else if (ret > 0) 2906 break; 2907 continue; 2908 } 2909 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2910 path->slots[0]); 2911 if (found_key.objectid != key.objectid || 2912 found_key.type != key.type) 2913 break; 2914 2915 di = btrfs_item_ptr(path->nodes[0], path->slots[0], 2916 struct btrfs_dir_item); 2917 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); 2918 2919 dm = get_waiting_dir_move(sctx, loc.objectid); 2920 if (dm) { 2921 struct orphan_dir_info *odi; 2922 2923 odi = add_orphan_dir_info(sctx, dir); 2924 if (IS_ERR(odi)) { 2925 ret = PTR_ERR(odi); 2926 goto out; 2927 } 2928 odi->gen = dir_gen; 2929 dm->rmdir_ino = dir; 2930 ret = 0; 2931 goto out; 2932 } 2933 2934 if (loc.objectid > send_progress) { 2935 ret = 0; 2936 goto out; 2937 } 2938 2939 path->slots[0]++; 2940 } 2941 2942 ret = 1; 2943 2944 out: 2945 btrfs_free_path(path); 2946 return ret; 2947 } 2948 2949 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) 2950 { 2951 struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); 2952 2953 return entry != NULL; 2954 } 2955 2956 static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) 2957 { 2958 struct rb_node **p = &sctx->waiting_dir_moves.rb_node; 2959 struct rb_node *parent = NULL; 2960 struct waiting_dir_move *entry, *dm; 2961 2962 dm = kmalloc(sizeof(*dm), GFP_NOFS); 2963 if (!dm) 2964 return -ENOMEM; 2965 dm->ino = ino; 2966 dm->rmdir_ino = 0; 2967 dm->orphanized = orphanized; 2968 2969 while (*p) { 2970 parent = *p; 2971 entry = rb_entry(parent, struct waiting_dir_move, node); 2972 if (ino < entry->ino) { 2973 p = &(*p)->rb_left; 2974 } else if (ino > entry->ino) { 2975 p = &(*p)->rb_right; 2976 } else { 2977 kfree(dm); 2978 return -EEXIST; 2979 } 2980 } 2981 2982 rb_link_node(&dm->node, parent, p); 2983 rb_insert_color(&dm->node, &sctx->waiting_dir_moves); 2984 return 0; 2985 } 2986 2987 static struct waiting_dir_move * 2988 get_waiting_dir_move(struct send_ctx *sctx, u64 ino) 2989 { 2990 struct rb_node *n = sctx->waiting_dir_moves.rb_node; 2991 struct waiting_dir_move *entry; 2992 2993 while (n) { 2994 entry = rb_entry(n, struct waiting_dir_move, node); 2995 if (ino < entry->ino) 2996 n = n->rb_left; 2997 else if (ino > entry->ino) 2998 n = n->rb_right; 2999 else 3000 return entry; 3001 } 3002 return NULL; 3003 } 3004 3005 static void free_waiting_dir_move(struct send_ctx *sctx, 3006 struct waiting_dir_move *dm) 3007 { 3008 if (!dm) 3009 return; 3010 rb_erase(&dm->node, &sctx->waiting_dir_moves); 3011 kfree(dm); 3012 } 3013 3014 static int add_pending_dir_move(struct send_ctx *sctx, 3015 u64 ino, 3016 u64 ino_gen, 3017 u64 parent_ino, 3018 struct list_head *new_refs, 3019 struct list_head *deleted_refs, 3020 const bool is_orphan) 3021 { 3022 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 3023 struct rb_node *parent = NULL; 3024 struct pending_dir_move *entry = NULL, *pm; 3025 struct recorded_ref *cur; 3026 int exists = 0; 3027 int ret; 3028 3029 pm = kmalloc(sizeof(*pm), GFP_NOFS); 3030 if (!pm) 3031 return -ENOMEM; 3032 pm->parent_ino = parent_ino; 3033 pm->ino = ino; 3034 pm->gen = ino_gen; 3035 pm->is_orphan = is_orphan; 3036 INIT_LIST_HEAD(&pm->list); 3037 INIT_LIST_HEAD(&pm->update_refs); 3038 RB_CLEAR_NODE(&pm->node); 3039 3040 while (*p) { 3041 parent = *p; 3042 entry = rb_entry(parent, struct pending_dir_move, node); 3043 if (parent_ino < entry->parent_ino) { 3044 p = &(*p)->rb_left; 3045 } else if (parent_ino > entry->parent_ino) { 3046 p = &(*p)->rb_right; 3047 } else { 3048 exists = 1; 3049 break; 3050 } 3051 } 3052 3053 list_for_each_entry(cur, deleted_refs, list) { 3054 ret = dup_ref(cur, &pm->update_refs); 3055 if (ret < 0) 3056 goto out; 3057 } 3058 list_for_each_entry(cur, new_refs, list) { 3059 ret = dup_ref(cur, &pm->update_refs); 3060 if (ret < 0) 3061 goto out; 3062 } 3063 3064 ret = add_waiting_dir_move(sctx, pm->ino, is_orphan); 3065 if (ret) 3066 goto out; 3067 3068 if (exists) { 3069 list_add_tail(&pm->list, &entry->list); 3070 } else { 3071 rb_link_node(&pm->node, parent, p); 3072 rb_insert_color(&pm->node, &sctx->pending_dir_moves); 3073 } 3074 ret = 0; 3075 out: 3076 if (ret) { 3077 __free_recorded_refs(&pm->update_refs); 3078 kfree(pm); 3079 } 3080 return ret; 3081 } 3082 3083 static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, 3084 u64 parent_ino) 3085 { 3086 struct rb_node *n = sctx->pending_dir_moves.rb_node; 3087 struct pending_dir_move *entry; 3088 3089 while (n) { 3090 entry = rb_entry(n, struct pending_dir_move, node); 3091 if (parent_ino < entry->parent_ino) 3092 n = n->rb_left; 3093 else if (parent_ino > entry->parent_ino) 3094 n = n->rb_right; 3095 else 3096 return entry; 3097 } 3098 return NULL; 3099 } 3100 3101 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3102 { 3103 struct fs_path *from_path = NULL; 3104 struct fs_path *to_path = NULL; 3105 struct fs_path *name = NULL; 3106 u64 orig_progress = sctx->send_progress; 3107 struct recorded_ref *cur; 3108 u64 parent_ino, parent_gen; 3109 struct waiting_dir_move *dm = NULL; 3110 u64 rmdir_ino = 0; 3111 int ret; 3112 3113 name = fs_path_alloc(); 3114 from_path = fs_path_alloc(); 3115 if (!name || !from_path) { 3116 ret = -ENOMEM; 3117 goto out; 3118 } 3119 3120 dm = get_waiting_dir_move(sctx, pm->ino); 3121 ASSERT(dm); 3122 rmdir_ino = dm->rmdir_ino; 3123 free_waiting_dir_move(sctx, dm); 3124 3125 if (pm->is_orphan) { 3126 ret = gen_unique_name(sctx, pm->ino, 3127 pm->gen, from_path); 3128 } else { 3129 ret = get_first_ref(sctx->parent_root, pm->ino, 3130 &parent_ino, &parent_gen, name); 3131 if (ret < 0) 3132 goto out; 3133 ret = get_cur_path(sctx, parent_ino, parent_gen, 3134 from_path); 3135 if (ret < 0) 3136 goto out; 3137 ret = fs_path_add_path(from_path, name); 3138 } 3139 if (ret < 0) 3140 goto out; 3141 3142 sctx->send_progress = sctx->cur_ino + 1; 3143 fs_path_reset(name); 3144 to_path = name; 3145 name = NULL; 3146 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); 3147 if (ret < 0) 3148 goto out; 3149 3150 ret = send_rename(sctx, from_path, to_path); 3151 if (ret < 0) 3152 goto out; 3153 3154 if (rmdir_ino) { 3155 struct orphan_dir_info *odi; 3156 3157 odi = get_orphan_dir_info(sctx, rmdir_ino); 3158 if (!odi) { 3159 /* already deleted */ 3160 goto finish; 3161 } 3162 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); 3163 if (ret < 0) 3164 goto out; 3165 if (!ret) 3166 goto finish; 3167 3168 name = fs_path_alloc(); 3169 if (!name) { 3170 ret = -ENOMEM; 3171 goto out; 3172 } 3173 ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); 3174 if (ret < 0) 3175 goto out; 3176 ret = send_rmdir(sctx, name); 3177 if (ret < 0) 3178 goto out; 3179 free_orphan_dir_info(sctx, odi); 3180 } 3181 3182 finish: 3183 ret = send_utimes(sctx, pm->ino, pm->gen); 3184 if (ret < 0) 3185 goto out; 3186 3187 /* 3188 * After rename/move, need to update the utimes of both new parent(s) 3189 * and old parent(s). 3190 */ 3191 list_for_each_entry(cur, &pm->update_refs, list) { 3192 if (cur->dir == rmdir_ino) 3193 continue; 3194 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3195 if (ret < 0) 3196 goto out; 3197 } 3198 3199 out: 3200 fs_path_free(name); 3201 fs_path_free(from_path); 3202 fs_path_free(to_path); 3203 sctx->send_progress = orig_progress; 3204 3205 return ret; 3206 } 3207 3208 static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) 3209 { 3210 if (!list_empty(&m->list)) 3211 list_del(&m->list); 3212 if (!RB_EMPTY_NODE(&m->node)) 3213 rb_erase(&m->node, &sctx->pending_dir_moves); 3214 __free_recorded_refs(&m->update_refs); 3215 kfree(m); 3216 } 3217 3218 static void tail_append_pending_moves(struct pending_dir_move *moves, 3219 struct list_head *stack) 3220 { 3221 if (list_empty(&moves->list)) { 3222 list_add_tail(&moves->list, stack); 3223 } else { 3224 LIST_HEAD(list); 3225 list_splice_init(&moves->list, &list); 3226 list_add_tail(&moves->list, stack); 3227 list_splice_tail(&list, stack); 3228 } 3229 } 3230 3231 static int apply_children_dir_moves(struct send_ctx *sctx) 3232 { 3233 struct pending_dir_move *pm; 3234 struct list_head stack; 3235 u64 parent_ino = sctx->cur_ino; 3236 int ret = 0; 3237 3238 pm = get_pending_dir_moves(sctx, parent_ino); 3239 if (!pm) 3240 return 0; 3241 3242 INIT_LIST_HEAD(&stack); 3243 tail_append_pending_moves(pm, &stack); 3244 3245 while (!list_empty(&stack)) { 3246 pm = list_first_entry(&stack, struct pending_dir_move, list); 3247 parent_ino = pm->ino; 3248 ret = apply_dir_move(sctx, pm); 3249 free_pending_move(sctx, pm); 3250 if (ret) 3251 goto out; 3252 pm = get_pending_dir_moves(sctx, parent_ino); 3253 if (pm) 3254 tail_append_pending_moves(pm, &stack); 3255 } 3256 return 0; 3257 3258 out: 3259 while (!list_empty(&stack)) { 3260 pm = list_first_entry(&stack, struct pending_dir_move, list); 3261 free_pending_move(sctx, pm); 3262 } 3263 return ret; 3264 } 3265 3266 /* 3267 * We might need to delay a directory rename even when no ancestor directory 3268 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was 3269 * renamed. This happens when we rename a directory to the old name (the name 3270 * in the parent root) of some other unrelated directory that got its rename 3271 * delayed due to some ancestor with higher number that got renamed. 3272 * 3273 * Example: 3274 * 3275 * Parent snapshot: 3276 * . (ino 256) 3277 * |---- a/ (ino 257) 3278 * | |---- file (ino 260) 3279 * | 3280 * |---- b/ (ino 258) 3281 * |---- c/ (ino 259) 3282 * 3283 * Send snapshot: 3284 * . (ino 256) 3285 * |---- a/ (ino 258) 3286 * |---- x/ (ino 259) 3287 * |---- y/ (ino 257) 3288 * |----- file (ino 260) 3289 * 3290 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 3291 * from 'a' to 'x/y' happening first, which in turn depends on the rename of 3292 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream 3293 * must issue is: 3294 * 3295 * 1 - rename 259 from 'c' to 'x' 3296 * 2 - rename 257 from 'a' to 'x/y' 3297 * 3 - rename 258 from 'b' to 'a' 3298 * 3299 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can 3300 * be done right away and < 0 on error. 3301 */ 3302 static int wait_for_dest_dir_move(struct send_ctx *sctx, 3303 struct recorded_ref *parent_ref, 3304 const bool is_orphan) 3305 { 3306 struct btrfs_path *path; 3307 struct btrfs_key key; 3308 struct btrfs_key di_key; 3309 struct btrfs_dir_item *di; 3310 u64 left_gen; 3311 u64 right_gen; 3312 int ret = 0; 3313 3314 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) 3315 return 0; 3316 3317 path = alloc_path_for_send(); 3318 if (!path) 3319 return -ENOMEM; 3320 3321 key.objectid = parent_ref->dir; 3322 key.type = BTRFS_DIR_ITEM_KEY; 3323 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); 3324 3325 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); 3326 if (ret < 0) { 3327 goto out; 3328 } else if (ret > 0) { 3329 ret = 0; 3330 goto out; 3331 } 3332 3333 di = btrfs_match_dir_item_name(sctx->parent_root, path, 3334 parent_ref->name, parent_ref->name_len); 3335 if (!di) { 3336 ret = 0; 3337 goto out; 3338 } 3339 /* 3340 * di_key.objectid has the number of the inode that has a dentry in the 3341 * parent directory with the same name that sctx->cur_ino is being 3342 * renamed to. We need to check if that inode is in the send root as 3343 * well and if it is currently marked as an inode with a pending rename, 3344 * if it is, we need to delay the rename of sctx->cur_ino as well, so 3345 * that it happens after that other inode is renamed. 3346 */ 3347 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); 3348 if (di_key.type != BTRFS_INODE_ITEM_KEY) { 3349 ret = 0; 3350 goto out; 3351 } 3352 3353 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, 3354 &left_gen, NULL, NULL, NULL, NULL); 3355 if (ret < 0) 3356 goto out; 3357 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, 3358 &right_gen, NULL, NULL, NULL, NULL); 3359 if (ret < 0) { 3360 if (ret == -ENOENT) 3361 ret = 0; 3362 goto out; 3363 } 3364 3365 /* Different inode, no need to delay the rename of sctx->cur_ino */ 3366 if (right_gen != left_gen) { 3367 ret = 0; 3368 goto out; 3369 } 3370 3371 if (is_waiting_for_move(sctx, di_key.objectid)) { 3372 ret = add_pending_dir_move(sctx, 3373 sctx->cur_ino, 3374 sctx->cur_inode_gen, 3375 di_key.objectid, 3376 &sctx->new_refs, 3377 &sctx->deleted_refs, 3378 is_orphan); 3379 if (!ret) 3380 ret = 1; 3381 } 3382 out: 3383 btrfs_free_path(path); 3384 return ret; 3385 } 3386 3387 /* 3388 * Check if ino ino1 is an ancestor of inode ino2 in the given root. 3389 * Return 1 if true, 0 if false and < 0 on error. 3390 */ 3391 static int is_ancestor(struct btrfs_root *root, 3392 const u64 ino1, 3393 const u64 ino1_gen, 3394 const u64 ino2, 3395 struct fs_path *fs_path) 3396 { 3397 u64 ino = ino2; 3398 3399 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3400 int ret; 3401 u64 parent; 3402 u64 parent_gen; 3403 3404 fs_path_reset(fs_path); 3405 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); 3406 if (ret < 0) { 3407 if (ret == -ENOENT && ino == ino2) 3408 ret = 0; 3409 return ret; 3410 } 3411 if (parent == ino1) 3412 return parent_gen == ino1_gen ? 1 : 0; 3413 ino = parent; 3414 } 3415 return 0; 3416 } 3417 3418 static int wait_for_parent_move(struct send_ctx *sctx, 3419 struct recorded_ref *parent_ref, 3420 const bool is_orphan) 3421 { 3422 int ret = 0; 3423 u64 ino = parent_ref->dir; 3424 u64 parent_ino_before, parent_ino_after; 3425 struct fs_path *path_before = NULL; 3426 struct fs_path *path_after = NULL; 3427 int len1, len2; 3428 3429 path_after = fs_path_alloc(); 3430 path_before = fs_path_alloc(); 3431 if (!path_after || !path_before) { 3432 ret = -ENOMEM; 3433 goto out; 3434 } 3435 3436 /* 3437 * Our current directory inode may not yet be renamed/moved because some 3438 * ancestor (immediate or not) has to be renamed/moved first. So find if 3439 * such ancestor exists and make sure our own rename/move happens after 3440 * that ancestor is processed to avoid path build infinite loops (done 3441 * at get_cur_path()). 3442 */ 3443 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3444 if (is_waiting_for_move(sctx, ino)) { 3445 /* 3446 * If the current inode is an ancestor of ino in the 3447 * parent root, we need to delay the rename of the 3448 * current inode, otherwise don't delayed the rename 3449 * because we can end up with a circular dependency 3450 * of renames, resulting in some directories never 3451 * getting the respective rename operations issued in 3452 * the send stream or getting into infinite path build 3453 * loops. 3454 */ 3455 ret = is_ancestor(sctx->parent_root, 3456 sctx->cur_ino, sctx->cur_inode_gen, 3457 ino, path_before); 3458 break; 3459 } 3460 3461 fs_path_reset(path_before); 3462 fs_path_reset(path_after); 3463 3464 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3465 NULL, path_after); 3466 if (ret < 0) 3467 goto out; 3468 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3469 NULL, path_before); 3470 if (ret < 0 && ret != -ENOENT) { 3471 goto out; 3472 } else if (ret == -ENOENT) { 3473 ret = 0; 3474 break; 3475 } 3476 3477 len1 = fs_path_len(path_before); 3478 len2 = fs_path_len(path_after); 3479 if (ino > sctx->cur_ino && 3480 (parent_ino_before != parent_ino_after || len1 != len2 || 3481 memcmp(path_before->start, path_after->start, len1))) { 3482 ret = 1; 3483 break; 3484 } 3485 ino = parent_ino_after; 3486 } 3487 3488 out: 3489 fs_path_free(path_before); 3490 fs_path_free(path_after); 3491 3492 if (ret == 1) { 3493 ret = add_pending_dir_move(sctx, 3494 sctx->cur_ino, 3495 sctx->cur_inode_gen, 3496 ino, 3497 &sctx->new_refs, 3498 &sctx->deleted_refs, 3499 is_orphan); 3500 if (!ret) 3501 ret = 1; 3502 } 3503 3504 return ret; 3505 } 3506 3507 /* 3508 * This does all the move/link/unlink/rmdir magic. 3509 */ 3510 static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) 3511 { 3512 int ret = 0; 3513 struct recorded_ref *cur; 3514 struct recorded_ref *cur2; 3515 struct list_head check_dirs; 3516 struct fs_path *valid_path = NULL; 3517 u64 ow_inode = 0; 3518 u64 ow_gen; 3519 int did_overwrite = 0; 3520 int is_orphan = 0; 3521 u64 last_dir_ino_rm = 0; 3522 bool can_rename = true; 3523 3524 verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3525 3526 /* 3527 * This should never happen as the root dir always has the same ref 3528 * which is always '..' 3529 */ 3530 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 3531 INIT_LIST_HEAD(&check_dirs); 3532 3533 valid_path = fs_path_alloc(); 3534 if (!valid_path) { 3535 ret = -ENOMEM; 3536 goto out; 3537 } 3538 3539 /* 3540 * First, check if the first ref of the current inode was overwritten 3541 * before. If yes, we know that the current inode was already orphanized 3542 * and thus use the orphan name. If not, we can use get_cur_path to 3543 * get the path of the first ref as it would like while receiving at 3544 * this point in time. 3545 * New inodes are always orphan at the beginning, so force to use the 3546 * orphan name in this case. 3547 * The first ref is stored in valid_path and will be updated if it 3548 * gets moved around. 3549 */ 3550 if (!sctx->cur_inode_new) { 3551 ret = did_overwrite_first_ref(sctx, sctx->cur_ino, 3552 sctx->cur_inode_gen); 3553 if (ret < 0) 3554 goto out; 3555 if (ret) 3556 did_overwrite = 1; 3557 } 3558 if (sctx->cur_inode_new || did_overwrite) { 3559 ret = gen_unique_name(sctx, sctx->cur_ino, 3560 sctx->cur_inode_gen, valid_path); 3561 if (ret < 0) 3562 goto out; 3563 is_orphan = 1; 3564 } else { 3565 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3566 valid_path); 3567 if (ret < 0) 3568 goto out; 3569 } 3570 3571 list_for_each_entry(cur, &sctx->new_refs, list) { 3572 /* 3573 * We may have refs where the parent directory does not exist 3574 * yet. This happens if the parent directories inum is higher 3575 * the the current inum. To handle this case, we create the 3576 * parent directory out of order. But we need to check if this 3577 * did already happen before due to other refs in the same dir. 3578 */ 3579 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3580 if (ret < 0) 3581 goto out; 3582 if (ret == inode_state_will_create) { 3583 ret = 0; 3584 /* 3585 * First check if any of the current inodes refs did 3586 * already create the dir. 3587 */ 3588 list_for_each_entry(cur2, &sctx->new_refs, list) { 3589 if (cur == cur2) 3590 break; 3591 if (cur2->dir == cur->dir) { 3592 ret = 1; 3593 break; 3594 } 3595 } 3596 3597 /* 3598 * If that did not happen, check if a previous inode 3599 * did already create the dir. 3600 */ 3601 if (!ret) 3602 ret = did_create_dir(sctx, cur->dir); 3603 if (ret < 0) 3604 goto out; 3605 if (!ret) { 3606 ret = send_create_inode(sctx, cur->dir); 3607 if (ret < 0) 3608 goto out; 3609 } 3610 } 3611 3612 /* 3613 * Check if this new ref would overwrite the first ref of 3614 * another unprocessed inode. If yes, orphanize the 3615 * overwritten inode. If we find an overwritten ref that is 3616 * not the first ref, simply unlink it. 3617 */ 3618 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3619 cur->name, cur->name_len, 3620 &ow_inode, &ow_gen); 3621 if (ret < 0) 3622 goto out; 3623 if (ret) { 3624 ret = is_first_ref(sctx->parent_root, 3625 ow_inode, cur->dir, cur->name, 3626 cur->name_len); 3627 if (ret < 0) 3628 goto out; 3629 if (ret) { 3630 struct name_cache_entry *nce; 3631 3632 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3633 cur->full_path); 3634 if (ret < 0) 3635 goto out; 3636 /* 3637 * Make sure we clear our orphanized inode's 3638 * name from the name cache. This is because the 3639 * inode ow_inode might be an ancestor of some 3640 * other inode that will be orphanized as well 3641 * later and has an inode number greater than 3642 * sctx->send_progress. We need to prevent 3643 * future name lookups from using the old name 3644 * and get instead the orphan name. 3645 */ 3646 nce = name_cache_search(sctx, ow_inode, ow_gen); 3647 if (nce) { 3648 name_cache_delete(sctx, nce); 3649 kfree(nce); 3650 } 3651 } else { 3652 ret = send_unlink(sctx, cur->full_path); 3653 if (ret < 0) 3654 goto out; 3655 } 3656 } 3657 3658 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { 3659 ret = wait_for_dest_dir_move(sctx, cur, is_orphan); 3660 if (ret < 0) 3661 goto out; 3662 if (ret == 1) { 3663 can_rename = false; 3664 *pending_move = 1; 3665 } 3666 } 3667 3668 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root && 3669 can_rename) { 3670 ret = wait_for_parent_move(sctx, cur, is_orphan); 3671 if (ret < 0) 3672 goto out; 3673 if (ret == 1) { 3674 can_rename = false; 3675 *pending_move = 1; 3676 } 3677 } 3678 3679 /* 3680 * link/move the ref to the new place. If we have an orphan 3681 * inode, move it and update valid_path. If not, link or move 3682 * it depending on the inode mode. 3683 */ 3684 if (is_orphan && can_rename) { 3685 ret = send_rename(sctx, valid_path, cur->full_path); 3686 if (ret < 0) 3687 goto out; 3688 is_orphan = 0; 3689 ret = fs_path_copy(valid_path, cur->full_path); 3690 if (ret < 0) 3691 goto out; 3692 } else if (can_rename) { 3693 if (S_ISDIR(sctx->cur_inode_mode)) { 3694 /* 3695 * Dirs can't be linked, so move it. For moved 3696 * dirs, we always have one new and one deleted 3697 * ref. The deleted ref is ignored later. 3698 */ 3699 ret = send_rename(sctx, valid_path, 3700 cur->full_path); 3701 if (!ret) 3702 ret = fs_path_copy(valid_path, 3703 cur->full_path); 3704 if (ret < 0) 3705 goto out; 3706 } else { 3707 ret = send_link(sctx, cur->full_path, 3708 valid_path); 3709 if (ret < 0) 3710 goto out; 3711 } 3712 } 3713 ret = dup_ref(cur, &check_dirs); 3714 if (ret < 0) 3715 goto out; 3716 } 3717 3718 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { 3719 /* 3720 * Check if we can already rmdir the directory. If not, 3721 * orphanize it. For every dir item inside that gets deleted 3722 * later, we do this check again and rmdir it then if possible. 3723 * See the use of check_dirs for more details. 3724 */ 3725 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3726 sctx->cur_ino); 3727 if (ret < 0) 3728 goto out; 3729 if (ret) { 3730 ret = send_rmdir(sctx, valid_path); 3731 if (ret < 0) 3732 goto out; 3733 } else if (!is_orphan) { 3734 ret = orphanize_inode(sctx, sctx->cur_ino, 3735 sctx->cur_inode_gen, valid_path); 3736 if (ret < 0) 3737 goto out; 3738 is_orphan = 1; 3739 } 3740 3741 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3742 ret = dup_ref(cur, &check_dirs); 3743 if (ret < 0) 3744 goto out; 3745 } 3746 } else if (S_ISDIR(sctx->cur_inode_mode) && 3747 !list_empty(&sctx->deleted_refs)) { 3748 /* 3749 * We have a moved dir. Add the old parent to check_dirs 3750 */ 3751 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 3752 list); 3753 ret = dup_ref(cur, &check_dirs); 3754 if (ret < 0) 3755 goto out; 3756 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 3757 /* 3758 * We have a non dir inode. Go through all deleted refs and 3759 * unlink them if they were not already overwritten by other 3760 * inodes. 3761 */ 3762 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3763 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3764 sctx->cur_ino, sctx->cur_inode_gen, 3765 cur->name, cur->name_len); 3766 if (ret < 0) 3767 goto out; 3768 if (!ret) { 3769 ret = send_unlink(sctx, cur->full_path); 3770 if (ret < 0) 3771 goto out; 3772 } 3773 ret = dup_ref(cur, &check_dirs); 3774 if (ret < 0) 3775 goto out; 3776 } 3777 /* 3778 * If the inode is still orphan, unlink the orphan. This may 3779 * happen when a previous inode did overwrite the first ref 3780 * of this inode and no new refs were added for the current 3781 * inode. Unlinking does not mean that the inode is deleted in 3782 * all cases. There may still be links to this inode in other 3783 * places. 3784 */ 3785 if (is_orphan) { 3786 ret = send_unlink(sctx, valid_path); 3787 if (ret < 0) 3788 goto out; 3789 } 3790 } 3791 3792 /* 3793 * We did collect all parent dirs where cur_inode was once located. We 3794 * now go through all these dirs and check if they are pending for 3795 * deletion and if it's finally possible to perform the rmdir now. 3796 * We also update the inode stats of the parent dirs here. 3797 */ 3798 list_for_each_entry(cur, &check_dirs, list) { 3799 /* 3800 * In case we had refs into dirs that were not processed yet, 3801 * we don't need to do the utime and rmdir logic for these dirs. 3802 * The dir will be processed later. 3803 */ 3804 if (cur->dir > sctx->cur_ino) 3805 continue; 3806 3807 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3808 if (ret < 0) 3809 goto out; 3810 3811 if (ret == inode_state_did_create || 3812 ret == inode_state_no_change) { 3813 /* TODO delayed utimes */ 3814 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3815 if (ret < 0) 3816 goto out; 3817 } else if (ret == inode_state_did_delete && 3818 cur->dir != last_dir_ino_rm) { 3819 ret = can_rmdir(sctx, cur->dir, cur->dir_gen, 3820 sctx->cur_ino); 3821 if (ret < 0) 3822 goto out; 3823 if (ret) { 3824 ret = get_cur_path(sctx, cur->dir, 3825 cur->dir_gen, valid_path); 3826 if (ret < 0) 3827 goto out; 3828 ret = send_rmdir(sctx, valid_path); 3829 if (ret < 0) 3830 goto out; 3831 last_dir_ino_rm = cur->dir; 3832 } 3833 } 3834 } 3835 3836 ret = 0; 3837 3838 out: 3839 __free_recorded_refs(&check_dirs); 3840 free_recorded_refs(sctx); 3841 fs_path_free(valid_path); 3842 return ret; 3843 } 3844 3845 static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, 3846 struct fs_path *name, void *ctx, struct list_head *refs) 3847 { 3848 int ret = 0; 3849 struct send_ctx *sctx = ctx; 3850 struct fs_path *p; 3851 u64 gen; 3852 3853 p = fs_path_alloc(); 3854 if (!p) 3855 return -ENOMEM; 3856 3857 ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, 3858 NULL, NULL); 3859 if (ret < 0) 3860 goto out; 3861 3862 ret = get_cur_path(sctx, dir, gen, p); 3863 if (ret < 0) 3864 goto out; 3865 ret = fs_path_add_path(p, name); 3866 if (ret < 0) 3867 goto out; 3868 3869 ret = __record_ref(refs, dir, gen, p); 3870 3871 out: 3872 if (ret) 3873 fs_path_free(p); 3874 return ret; 3875 } 3876 3877 static int __record_new_ref(int num, u64 dir, int index, 3878 struct fs_path *name, 3879 void *ctx) 3880 { 3881 struct send_ctx *sctx = ctx; 3882 return record_ref(sctx->send_root, num, dir, index, name, 3883 ctx, &sctx->new_refs); 3884 } 3885 3886 3887 static int __record_deleted_ref(int num, u64 dir, int index, 3888 struct fs_path *name, 3889 void *ctx) 3890 { 3891 struct send_ctx *sctx = ctx; 3892 return record_ref(sctx->parent_root, num, dir, index, name, 3893 ctx, &sctx->deleted_refs); 3894 } 3895 3896 static int record_new_ref(struct send_ctx *sctx) 3897 { 3898 int ret; 3899 3900 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3901 sctx->cmp_key, 0, __record_new_ref, sctx); 3902 if (ret < 0) 3903 goto out; 3904 ret = 0; 3905 3906 out: 3907 return ret; 3908 } 3909 3910 static int record_deleted_ref(struct send_ctx *sctx) 3911 { 3912 int ret; 3913 3914 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3915 sctx->cmp_key, 0, __record_deleted_ref, sctx); 3916 if (ret < 0) 3917 goto out; 3918 ret = 0; 3919 3920 out: 3921 return ret; 3922 } 3923 3924 struct find_ref_ctx { 3925 u64 dir; 3926 u64 dir_gen; 3927 struct btrfs_root *root; 3928 struct fs_path *name; 3929 int found_idx; 3930 }; 3931 3932 static int __find_iref(int num, u64 dir, int index, 3933 struct fs_path *name, 3934 void *ctx_) 3935 { 3936 struct find_ref_ctx *ctx = ctx_; 3937 u64 dir_gen; 3938 int ret; 3939 3940 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 3941 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 3942 /* 3943 * To avoid doing extra lookups we'll only do this if everything 3944 * else matches. 3945 */ 3946 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, 3947 NULL, NULL, NULL); 3948 if (ret) 3949 return ret; 3950 if (dir_gen != ctx->dir_gen) 3951 return 0; 3952 ctx->found_idx = num; 3953 return 1; 3954 } 3955 return 0; 3956 } 3957 3958 static int find_iref(struct btrfs_root *root, 3959 struct btrfs_path *path, 3960 struct btrfs_key *key, 3961 u64 dir, u64 dir_gen, struct fs_path *name) 3962 { 3963 int ret; 3964 struct find_ref_ctx ctx; 3965 3966 ctx.dir = dir; 3967 ctx.name = name; 3968 ctx.dir_gen = dir_gen; 3969 ctx.found_idx = -1; 3970 ctx.root = root; 3971 3972 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 3973 if (ret < 0) 3974 return ret; 3975 3976 if (ctx.found_idx == -1) 3977 return -ENOENT; 3978 3979 return ctx.found_idx; 3980 } 3981 3982 static int __record_changed_new_ref(int num, u64 dir, int index, 3983 struct fs_path *name, 3984 void *ctx) 3985 { 3986 u64 dir_gen; 3987 int ret; 3988 struct send_ctx *sctx = ctx; 3989 3990 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, 3991 NULL, NULL, NULL); 3992 if (ret) 3993 return ret; 3994 3995 ret = find_iref(sctx->parent_root, sctx->right_path, 3996 sctx->cmp_key, dir, dir_gen, name); 3997 if (ret == -ENOENT) 3998 ret = __record_new_ref(num, dir, index, name, sctx); 3999 else if (ret > 0) 4000 ret = 0; 4001 4002 return ret; 4003 } 4004 4005 static int __record_changed_deleted_ref(int num, u64 dir, int index, 4006 struct fs_path *name, 4007 void *ctx) 4008 { 4009 u64 dir_gen; 4010 int ret; 4011 struct send_ctx *sctx = ctx; 4012 4013 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, 4014 NULL, NULL, NULL); 4015 if (ret) 4016 return ret; 4017 4018 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 4019 dir, dir_gen, name); 4020 if (ret == -ENOENT) 4021 ret = __record_deleted_ref(num, dir, index, name, sctx); 4022 else if (ret > 0) 4023 ret = 0; 4024 4025 return ret; 4026 } 4027 4028 static int record_changed_ref(struct send_ctx *sctx) 4029 { 4030 int ret = 0; 4031 4032 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4033 sctx->cmp_key, 0, __record_changed_new_ref, sctx); 4034 if (ret < 0) 4035 goto out; 4036 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 4037 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); 4038 if (ret < 0) 4039 goto out; 4040 ret = 0; 4041 4042 out: 4043 return ret; 4044 } 4045 4046 /* 4047 * Record and process all refs at once. Needed when an inode changes the 4048 * generation number, which means that it was deleted and recreated. 4049 */ 4050 static int process_all_refs(struct send_ctx *sctx, 4051 enum btrfs_compare_tree_result cmd) 4052 { 4053 int ret; 4054 struct btrfs_root *root; 4055 struct btrfs_path *path; 4056 struct btrfs_key key; 4057 struct btrfs_key found_key; 4058 struct extent_buffer *eb; 4059 int slot; 4060 iterate_inode_ref_t cb; 4061 int pending_move = 0; 4062 4063 path = alloc_path_for_send(); 4064 if (!path) 4065 return -ENOMEM; 4066 4067 if (cmd == BTRFS_COMPARE_TREE_NEW) { 4068 root = sctx->send_root; 4069 cb = __record_new_ref; 4070 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { 4071 root = sctx->parent_root; 4072 cb = __record_deleted_ref; 4073 } else { 4074 btrfs_err(sctx->send_root->fs_info, 4075 "Wrong command %d in process_all_refs", cmd); 4076 ret = -EINVAL; 4077 goto out; 4078 } 4079 4080 key.objectid = sctx->cmp_key->objectid; 4081 key.type = BTRFS_INODE_REF_KEY; 4082 key.offset = 0; 4083 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4084 if (ret < 0) 4085 goto out; 4086 4087 while (1) { 4088 eb = path->nodes[0]; 4089 slot = path->slots[0]; 4090 if (slot >= btrfs_header_nritems(eb)) { 4091 ret = btrfs_next_leaf(root, path); 4092 if (ret < 0) 4093 goto out; 4094 else if (ret > 0) 4095 break; 4096 continue; 4097 } 4098 4099 btrfs_item_key_to_cpu(eb, &found_key, slot); 4100 4101 if (found_key.objectid != key.objectid || 4102 (found_key.type != BTRFS_INODE_REF_KEY && 4103 found_key.type != BTRFS_INODE_EXTREF_KEY)) 4104 break; 4105 4106 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); 4107 if (ret < 0) 4108 goto out; 4109 4110 path->slots[0]++; 4111 } 4112 btrfs_release_path(path); 4113 4114 ret = process_recorded_refs(sctx, &pending_move); 4115 /* Only applicable to an incremental send. */ 4116 ASSERT(pending_move == 0); 4117 4118 out: 4119 btrfs_free_path(path); 4120 return ret; 4121 } 4122 4123 static int send_set_xattr(struct send_ctx *sctx, 4124 struct fs_path *path, 4125 const char *name, int name_len, 4126 const char *data, int data_len) 4127 { 4128 int ret = 0; 4129 4130 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); 4131 if (ret < 0) 4132 goto out; 4133 4134 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4135 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 4136 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); 4137 4138 ret = send_cmd(sctx); 4139 4140 tlv_put_failure: 4141 out: 4142 return ret; 4143 } 4144 4145 static int send_remove_xattr(struct send_ctx *sctx, 4146 struct fs_path *path, 4147 const char *name, int name_len) 4148 { 4149 int ret = 0; 4150 4151 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); 4152 if (ret < 0) 4153 goto out; 4154 4155 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4156 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 4157 4158 ret = send_cmd(sctx); 4159 4160 tlv_put_failure: 4161 out: 4162 return ret; 4163 } 4164 4165 static int __process_new_xattr(int num, struct btrfs_key *di_key, 4166 const char *name, int name_len, 4167 const char *data, int data_len, 4168 u8 type, void *ctx) 4169 { 4170 int ret; 4171 struct send_ctx *sctx = ctx; 4172 struct fs_path *p; 4173 posix_acl_xattr_header dummy_acl; 4174 4175 p = fs_path_alloc(); 4176 if (!p) 4177 return -ENOMEM; 4178 4179 /* 4180 * This hack is needed because empty acl's are stored as zero byte 4181 * data in xattrs. Problem with that is, that receiving these zero byte 4182 * acl's will fail later. To fix this, we send a dummy acl list that 4183 * only contains the version number and no entries. 4184 */ 4185 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || 4186 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { 4187 if (data_len == 0) { 4188 dummy_acl.a_version = 4189 cpu_to_le32(POSIX_ACL_XATTR_VERSION); 4190 data = (char *)&dummy_acl; 4191 data_len = sizeof(dummy_acl); 4192 } 4193 } 4194 4195 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4196 if (ret < 0) 4197 goto out; 4198 4199 ret = send_set_xattr(sctx, p, name, name_len, data, data_len); 4200 4201 out: 4202 fs_path_free(p); 4203 return ret; 4204 } 4205 4206 static int __process_deleted_xattr(int num, struct btrfs_key *di_key, 4207 const char *name, int name_len, 4208 const char *data, int data_len, 4209 u8 type, void *ctx) 4210 { 4211 int ret; 4212 struct send_ctx *sctx = ctx; 4213 struct fs_path *p; 4214 4215 p = fs_path_alloc(); 4216 if (!p) 4217 return -ENOMEM; 4218 4219 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4220 if (ret < 0) 4221 goto out; 4222 4223 ret = send_remove_xattr(sctx, p, name, name_len); 4224 4225 out: 4226 fs_path_free(p); 4227 return ret; 4228 } 4229 4230 static int process_new_xattr(struct send_ctx *sctx) 4231 { 4232 int ret = 0; 4233 4234 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4235 sctx->cmp_key, __process_new_xattr, sctx); 4236 4237 return ret; 4238 } 4239 4240 static int process_deleted_xattr(struct send_ctx *sctx) 4241 { 4242 int ret; 4243 4244 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4245 sctx->cmp_key, __process_deleted_xattr, sctx); 4246 4247 return ret; 4248 } 4249 4250 struct find_xattr_ctx { 4251 const char *name; 4252 int name_len; 4253 int found_idx; 4254 char *found_data; 4255 int found_data_len; 4256 }; 4257 4258 static int __find_xattr(int num, struct btrfs_key *di_key, 4259 const char *name, int name_len, 4260 const char *data, int data_len, 4261 u8 type, void *vctx) 4262 { 4263 struct find_xattr_ctx *ctx = vctx; 4264 4265 if (name_len == ctx->name_len && 4266 strncmp(name, ctx->name, name_len) == 0) { 4267 ctx->found_idx = num; 4268 ctx->found_data_len = data_len; 4269 ctx->found_data = kmemdup(data, data_len, GFP_NOFS); 4270 if (!ctx->found_data) 4271 return -ENOMEM; 4272 return 1; 4273 } 4274 return 0; 4275 } 4276 4277 static int find_xattr(struct btrfs_root *root, 4278 struct btrfs_path *path, 4279 struct btrfs_key *key, 4280 const char *name, int name_len, 4281 char **data, int *data_len) 4282 { 4283 int ret; 4284 struct find_xattr_ctx ctx; 4285 4286 ctx.name = name; 4287 ctx.name_len = name_len; 4288 ctx.found_idx = -1; 4289 ctx.found_data = NULL; 4290 ctx.found_data_len = 0; 4291 4292 ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); 4293 if (ret < 0) 4294 return ret; 4295 4296 if (ctx.found_idx == -1) 4297 return -ENOENT; 4298 if (data) { 4299 *data = ctx.found_data; 4300 *data_len = ctx.found_data_len; 4301 } else { 4302 kfree(ctx.found_data); 4303 } 4304 return ctx.found_idx; 4305 } 4306 4307 4308 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, 4309 const char *name, int name_len, 4310 const char *data, int data_len, 4311 u8 type, void *ctx) 4312 { 4313 int ret; 4314 struct send_ctx *sctx = ctx; 4315 char *found_data = NULL; 4316 int found_data_len = 0; 4317 4318 ret = find_xattr(sctx->parent_root, sctx->right_path, 4319 sctx->cmp_key, name, name_len, &found_data, 4320 &found_data_len); 4321 if (ret == -ENOENT) { 4322 ret = __process_new_xattr(num, di_key, name, name_len, data, 4323 data_len, type, ctx); 4324 } else if (ret >= 0) { 4325 if (data_len != found_data_len || 4326 memcmp(data, found_data, data_len)) { 4327 ret = __process_new_xattr(num, di_key, name, name_len, 4328 data, data_len, type, ctx); 4329 } else { 4330 ret = 0; 4331 } 4332 } 4333 4334 kfree(found_data); 4335 return ret; 4336 } 4337 4338 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, 4339 const char *name, int name_len, 4340 const char *data, int data_len, 4341 u8 type, void *ctx) 4342 { 4343 int ret; 4344 struct send_ctx *sctx = ctx; 4345 4346 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, 4347 name, name_len, NULL, NULL); 4348 if (ret == -ENOENT) 4349 ret = __process_deleted_xattr(num, di_key, name, name_len, data, 4350 data_len, type, ctx); 4351 else if (ret >= 0) 4352 ret = 0; 4353 4354 return ret; 4355 } 4356 4357 static int process_changed_xattr(struct send_ctx *sctx) 4358 { 4359 int ret = 0; 4360 4361 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4362 sctx->cmp_key, __process_changed_new_xattr, sctx); 4363 if (ret < 0) 4364 goto out; 4365 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4366 sctx->cmp_key, __process_changed_deleted_xattr, sctx); 4367 4368 out: 4369 return ret; 4370 } 4371 4372 static int process_all_new_xattrs(struct send_ctx *sctx) 4373 { 4374 int ret; 4375 struct btrfs_root *root; 4376 struct btrfs_path *path; 4377 struct btrfs_key key; 4378 struct btrfs_key found_key; 4379 struct extent_buffer *eb; 4380 int slot; 4381 4382 path = alloc_path_for_send(); 4383 if (!path) 4384 return -ENOMEM; 4385 4386 root = sctx->send_root; 4387 4388 key.objectid = sctx->cmp_key->objectid; 4389 key.type = BTRFS_XATTR_ITEM_KEY; 4390 key.offset = 0; 4391 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4392 if (ret < 0) 4393 goto out; 4394 4395 while (1) { 4396 eb = path->nodes[0]; 4397 slot = path->slots[0]; 4398 if (slot >= btrfs_header_nritems(eb)) { 4399 ret = btrfs_next_leaf(root, path); 4400 if (ret < 0) { 4401 goto out; 4402 } else if (ret > 0) { 4403 ret = 0; 4404 break; 4405 } 4406 continue; 4407 } 4408 4409 btrfs_item_key_to_cpu(eb, &found_key, slot); 4410 if (found_key.objectid != key.objectid || 4411 found_key.type != key.type) { 4412 ret = 0; 4413 goto out; 4414 } 4415 4416 ret = iterate_dir_item(root, path, &found_key, 4417 __process_new_xattr, sctx); 4418 if (ret < 0) 4419 goto out; 4420 4421 path->slots[0]++; 4422 } 4423 4424 out: 4425 btrfs_free_path(path); 4426 return ret; 4427 } 4428 4429 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) 4430 { 4431 struct btrfs_root *root = sctx->send_root; 4432 struct btrfs_fs_info *fs_info = root->fs_info; 4433 struct inode *inode; 4434 struct page *page; 4435 char *addr; 4436 struct btrfs_key key; 4437 pgoff_t index = offset >> PAGE_CACHE_SHIFT; 4438 pgoff_t last_index; 4439 unsigned pg_offset = offset & ~PAGE_CACHE_MASK; 4440 ssize_t ret = 0; 4441 4442 key.objectid = sctx->cur_ino; 4443 key.type = BTRFS_INODE_ITEM_KEY; 4444 key.offset = 0; 4445 4446 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 4447 if (IS_ERR(inode)) 4448 return PTR_ERR(inode); 4449 4450 if (offset + len > i_size_read(inode)) { 4451 if (offset > i_size_read(inode)) 4452 len = 0; 4453 else 4454 len = offset - i_size_read(inode); 4455 } 4456 if (len == 0) 4457 goto out; 4458 4459 last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; 4460 4461 /* initial readahead */ 4462 memset(&sctx->ra, 0, sizeof(struct file_ra_state)); 4463 file_ra_state_init(&sctx->ra, inode->i_mapping); 4464 btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, 4465 last_index - index + 1); 4466 4467 while (index <= last_index) { 4468 unsigned cur_len = min_t(unsigned, len, 4469 PAGE_CACHE_SIZE - pg_offset); 4470 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 4471 if (!page) { 4472 ret = -ENOMEM; 4473 break; 4474 } 4475 4476 if (!PageUptodate(page)) { 4477 btrfs_readpage(NULL, page); 4478 lock_page(page); 4479 if (!PageUptodate(page)) { 4480 unlock_page(page); 4481 page_cache_release(page); 4482 ret = -EIO; 4483 break; 4484 } 4485 } 4486 4487 addr = kmap(page); 4488 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); 4489 kunmap(page); 4490 unlock_page(page); 4491 page_cache_release(page); 4492 index++; 4493 pg_offset = 0; 4494 len -= cur_len; 4495 ret += cur_len; 4496 } 4497 out: 4498 iput(inode); 4499 return ret; 4500 } 4501 4502 /* 4503 * Read some bytes from the current inode/file and send a write command to 4504 * user space. 4505 */ 4506 static int send_write(struct send_ctx *sctx, u64 offset, u32 len) 4507 { 4508 int ret = 0; 4509 struct fs_path *p; 4510 ssize_t num_read = 0; 4511 4512 p = fs_path_alloc(); 4513 if (!p) 4514 return -ENOMEM; 4515 4516 verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); 4517 4518 num_read = fill_read_buf(sctx, offset, len); 4519 if (num_read <= 0) { 4520 if (num_read < 0) 4521 ret = num_read; 4522 goto out; 4523 } 4524 4525 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4526 if (ret < 0) 4527 goto out; 4528 4529 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4530 if (ret < 0) 4531 goto out; 4532 4533 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4534 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4535 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); 4536 4537 ret = send_cmd(sctx); 4538 4539 tlv_put_failure: 4540 out: 4541 fs_path_free(p); 4542 if (ret < 0) 4543 return ret; 4544 return num_read; 4545 } 4546 4547 /* 4548 * Send a clone command to user space. 4549 */ 4550 static int send_clone(struct send_ctx *sctx, 4551 u64 offset, u32 len, 4552 struct clone_root *clone_root) 4553 { 4554 int ret = 0; 4555 struct fs_path *p; 4556 u64 gen; 4557 4558 verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " 4559 "clone_inode=%llu, clone_offset=%llu\n", offset, len, 4560 clone_root->root->objectid, clone_root->ino, 4561 clone_root->offset); 4562 4563 p = fs_path_alloc(); 4564 if (!p) 4565 return -ENOMEM; 4566 4567 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); 4568 if (ret < 0) 4569 goto out; 4570 4571 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4572 if (ret < 0) 4573 goto out; 4574 4575 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4576 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); 4577 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4578 4579 if (clone_root->root == sctx->send_root) { 4580 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, 4581 &gen, NULL, NULL, NULL, NULL); 4582 if (ret < 0) 4583 goto out; 4584 ret = get_cur_path(sctx, clone_root->ino, gen, p); 4585 } else { 4586 ret = get_inode_path(clone_root->root, clone_root->ino, p); 4587 } 4588 if (ret < 0) 4589 goto out; 4590 4591 /* 4592 * If the parent we're using has a received_uuid set then use that as 4593 * our clone source as that is what we will look for when doing a 4594 * receive. 4595 * 4596 * This covers the case that we create a snapshot off of a received 4597 * subvolume and then use that as the parent and try to receive on a 4598 * different host. 4599 */ 4600 if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid)) 4601 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4602 clone_root->root->root_item.received_uuid); 4603 else 4604 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4605 clone_root->root->root_item.uuid); 4606 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4607 le64_to_cpu(clone_root->root->root_item.ctransid)); 4608 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4609 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4610 clone_root->offset); 4611 4612 ret = send_cmd(sctx); 4613 4614 tlv_put_failure: 4615 out: 4616 fs_path_free(p); 4617 return ret; 4618 } 4619 4620 /* 4621 * Send an update extent command to user space. 4622 */ 4623 static int send_update_extent(struct send_ctx *sctx, 4624 u64 offset, u32 len) 4625 { 4626 int ret = 0; 4627 struct fs_path *p; 4628 4629 p = fs_path_alloc(); 4630 if (!p) 4631 return -ENOMEM; 4632 4633 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); 4634 if (ret < 0) 4635 goto out; 4636 4637 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4638 if (ret < 0) 4639 goto out; 4640 4641 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4642 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4643 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); 4644 4645 ret = send_cmd(sctx); 4646 4647 tlv_put_failure: 4648 out: 4649 fs_path_free(p); 4650 return ret; 4651 } 4652 4653 static int send_hole(struct send_ctx *sctx, u64 end) 4654 { 4655 struct fs_path *p = NULL; 4656 u64 offset = sctx->cur_inode_last_extent; 4657 u64 len; 4658 int ret = 0; 4659 4660 p = fs_path_alloc(); 4661 if (!p) 4662 return -ENOMEM; 4663 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4664 if (ret < 0) 4665 goto tlv_put_failure; 4666 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); 4667 while (offset < end) { 4668 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); 4669 4670 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4671 if (ret < 0) 4672 break; 4673 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4674 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4675 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); 4676 ret = send_cmd(sctx); 4677 if (ret < 0) 4678 break; 4679 offset += len; 4680 } 4681 tlv_put_failure: 4682 fs_path_free(p); 4683 return ret; 4684 } 4685 4686 static int send_extent_data(struct send_ctx *sctx, 4687 const u64 offset, 4688 const u64 len) 4689 { 4690 u64 sent = 0; 4691 4692 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) 4693 return send_update_extent(sctx, offset, len); 4694 4695 while (sent < len) { 4696 u64 size = len - sent; 4697 int ret; 4698 4699 if (size > BTRFS_SEND_READ_SIZE) 4700 size = BTRFS_SEND_READ_SIZE; 4701 ret = send_write(sctx, offset + sent, size); 4702 if (ret < 0) 4703 return ret; 4704 if (!ret) 4705 break; 4706 sent += ret; 4707 } 4708 return 0; 4709 } 4710 4711 static int clone_range(struct send_ctx *sctx, 4712 struct clone_root *clone_root, 4713 const u64 disk_byte, 4714 u64 data_offset, 4715 u64 offset, 4716 u64 len) 4717 { 4718 struct btrfs_path *path; 4719 struct btrfs_key key; 4720 int ret; 4721 4722 path = alloc_path_for_send(); 4723 if (!path) 4724 return -ENOMEM; 4725 4726 /* 4727 * We can't send a clone operation for the entire range if we find 4728 * extent items in the respective range in the source file that 4729 * refer to different extents or if we find holes. 4730 * So check for that and do a mix of clone and regular write/copy 4731 * operations if needed. 4732 * 4733 * Example: 4734 * 4735 * mkfs.btrfs -f /dev/sda 4736 * mount /dev/sda /mnt 4737 * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo 4738 * cp --reflink=always /mnt/foo /mnt/bar 4739 * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo 4740 * btrfs subvolume snapshot -r /mnt /mnt/snap 4741 * 4742 * If when we send the snapshot and we are processing file bar (which 4743 * has a higher inode number than foo) we blindly send a clone operation 4744 * for the [0, 100K[ range from foo to bar, the receiver ends up getting 4745 * a file bar that matches the content of file foo - iow, doesn't match 4746 * the content from bar in the original filesystem. 4747 */ 4748 key.objectid = clone_root->ino; 4749 key.type = BTRFS_EXTENT_DATA_KEY; 4750 key.offset = clone_root->offset; 4751 ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0); 4752 if (ret < 0) 4753 goto out; 4754 if (ret > 0 && path->slots[0] > 0) { 4755 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); 4756 if (key.objectid == clone_root->ino && 4757 key.type == BTRFS_EXTENT_DATA_KEY) 4758 path->slots[0]--; 4759 } 4760 4761 while (true) { 4762 struct extent_buffer *leaf = path->nodes[0]; 4763 int slot = path->slots[0]; 4764 struct btrfs_file_extent_item *ei; 4765 u8 type; 4766 u64 ext_len; 4767 u64 clone_len; 4768 4769 if (slot >= btrfs_header_nritems(leaf)) { 4770 ret = btrfs_next_leaf(clone_root->root, path); 4771 if (ret < 0) 4772 goto out; 4773 else if (ret > 0) 4774 break; 4775 continue; 4776 } 4777 4778 btrfs_item_key_to_cpu(leaf, &key, slot); 4779 4780 /* 4781 * We might have an implicit trailing hole (NO_HOLES feature 4782 * enabled). We deal with it after leaving this loop. 4783 */ 4784 if (key.objectid != clone_root->ino || 4785 key.type != BTRFS_EXTENT_DATA_KEY) 4786 break; 4787 4788 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 4789 type = btrfs_file_extent_type(leaf, ei); 4790 if (type == BTRFS_FILE_EXTENT_INLINE) { 4791 ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); 4792 ext_len = PAGE_CACHE_ALIGN(ext_len); 4793 } else { 4794 ext_len = btrfs_file_extent_num_bytes(leaf, ei); 4795 } 4796 4797 if (key.offset + ext_len <= clone_root->offset) 4798 goto next; 4799 4800 if (key.offset > clone_root->offset) { 4801 /* Implicit hole, NO_HOLES feature enabled. */ 4802 u64 hole_len = key.offset - clone_root->offset; 4803 4804 if (hole_len > len) 4805 hole_len = len; 4806 ret = send_extent_data(sctx, offset, hole_len); 4807 if (ret < 0) 4808 goto out; 4809 4810 len -= hole_len; 4811 if (len == 0) 4812 break; 4813 offset += hole_len; 4814 clone_root->offset += hole_len; 4815 data_offset += hole_len; 4816 } 4817 4818 if (key.offset >= clone_root->offset + len) 4819 break; 4820 4821 clone_len = min_t(u64, ext_len, len); 4822 4823 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && 4824 btrfs_file_extent_offset(leaf, ei) == data_offset) 4825 ret = send_clone(sctx, offset, clone_len, clone_root); 4826 else 4827 ret = send_extent_data(sctx, offset, clone_len); 4828 4829 if (ret < 0) 4830 goto out; 4831 4832 len -= clone_len; 4833 if (len == 0) 4834 break; 4835 offset += clone_len; 4836 clone_root->offset += clone_len; 4837 data_offset += clone_len; 4838 next: 4839 path->slots[0]++; 4840 } 4841 4842 if (len > 0) 4843 ret = send_extent_data(sctx, offset, len); 4844 else 4845 ret = 0; 4846 out: 4847 btrfs_free_path(path); 4848 return ret; 4849 } 4850 4851 static int send_write_or_clone(struct send_ctx *sctx, 4852 struct btrfs_path *path, 4853 struct btrfs_key *key, 4854 struct clone_root *clone_root) 4855 { 4856 int ret = 0; 4857 struct btrfs_file_extent_item *ei; 4858 u64 offset = key->offset; 4859 u64 len; 4860 u8 type; 4861 u64 bs = sctx->send_root->fs_info->sb->s_blocksize; 4862 4863 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4864 struct btrfs_file_extent_item); 4865 type = btrfs_file_extent_type(path->nodes[0], ei); 4866 if (type == BTRFS_FILE_EXTENT_INLINE) { 4867 len = btrfs_file_extent_inline_len(path->nodes[0], 4868 path->slots[0], ei); 4869 /* 4870 * it is possible the inline item won't cover the whole page, 4871 * but there may be items after this page. Make 4872 * sure to send the whole thing 4873 */ 4874 len = PAGE_CACHE_ALIGN(len); 4875 } else { 4876 len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 4877 } 4878 4879 if (offset + len > sctx->cur_inode_size) 4880 len = sctx->cur_inode_size - offset; 4881 if (len == 0) { 4882 ret = 0; 4883 goto out; 4884 } 4885 4886 if (clone_root && IS_ALIGNED(offset + len, bs)) { 4887 u64 disk_byte; 4888 u64 data_offset; 4889 4890 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); 4891 data_offset = btrfs_file_extent_offset(path->nodes[0], ei); 4892 ret = clone_range(sctx, clone_root, disk_byte, data_offset, 4893 offset, len); 4894 } else { 4895 ret = send_extent_data(sctx, offset, len); 4896 } 4897 out: 4898 return ret; 4899 } 4900 4901 static int is_extent_unchanged(struct send_ctx *sctx, 4902 struct btrfs_path *left_path, 4903 struct btrfs_key *ekey) 4904 { 4905 int ret = 0; 4906 struct btrfs_key key; 4907 struct btrfs_path *path = NULL; 4908 struct extent_buffer *eb; 4909 int slot; 4910 struct btrfs_key found_key; 4911 struct btrfs_file_extent_item *ei; 4912 u64 left_disknr; 4913 u64 right_disknr; 4914 u64 left_offset; 4915 u64 right_offset; 4916 u64 left_offset_fixed; 4917 u64 left_len; 4918 u64 right_len; 4919 u64 left_gen; 4920 u64 right_gen; 4921 u8 left_type; 4922 u8 right_type; 4923 4924 path = alloc_path_for_send(); 4925 if (!path) 4926 return -ENOMEM; 4927 4928 eb = left_path->nodes[0]; 4929 slot = left_path->slots[0]; 4930 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4931 left_type = btrfs_file_extent_type(eb, ei); 4932 4933 if (left_type != BTRFS_FILE_EXTENT_REG) { 4934 ret = 0; 4935 goto out; 4936 } 4937 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4938 left_len = btrfs_file_extent_num_bytes(eb, ei); 4939 left_offset = btrfs_file_extent_offset(eb, ei); 4940 left_gen = btrfs_file_extent_generation(eb, ei); 4941 4942 /* 4943 * Following comments will refer to these graphics. L is the left 4944 * extents which we are checking at the moment. 1-8 are the right 4945 * extents that we iterate. 4946 * 4947 * |-----L-----| 4948 * |-1-|-2a-|-3-|-4-|-5-|-6-| 4949 * 4950 * |-----L-----| 4951 * |--1--|-2b-|...(same as above) 4952 * 4953 * Alternative situation. Happens on files where extents got split. 4954 * |-----L-----| 4955 * |-----------7-----------|-6-| 4956 * 4957 * Alternative situation. Happens on files which got larger. 4958 * |-----L-----| 4959 * |-8-| 4960 * Nothing follows after 8. 4961 */ 4962 4963 key.objectid = ekey->objectid; 4964 key.type = BTRFS_EXTENT_DATA_KEY; 4965 key.offset = ekey->offset; 4966 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); 4967 if (ret < 0) 4968 goto out; 4969 if (ret) { 4970 ret = 0; 4971 goto out; 4972 } 4973 4974 /* 4975 * Handle special case where the right side has no extents at all. 4976 */ 4977 eb = path->nodes[0]; 4978 slot = path->slots[0]; 4979 btrfs_item_key_to_cpu(eb, &found_key, slot); 4980 if (found_key.objectid != key.objectid || 4981 found_key.type != key.type) { 4982 /* If we're a hole then just pretend nothing changed */ 4983 ret = (left_disknr) ? 0 : 1; 4984 goto out; 4985 } 4986 4987 /* 4988 * We're now on 2a, 2b or 7. 4989 */ 4990 key = found_key; 4991 while (key.offset < ekey->offset + left_len) { 4992 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4993 right_type = btrfs_file_extent_type(eb, ei); 4994 if (right_type != BTRFS_FILE_EXTENT_REG) { 4995 ret = 0; 4996 goto out; 4997 } 4998 4999 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 5000 right_len = btrfs_file_extent_num_bytes(eb, ei); 5001 right_offset = btrfs_file_extent_offset(eb, ei); 5002 right_gen = btrfs_file_extent_generation(eb, ei); 5003 5004 /* 5005 * Are we at extent 8? If yes, we know the extent is changed. 5006 * This may only happen on the first iteration. 5007 */ 5008 if (found_key.offset + right_len <= ekey->offset) { 5009 /* If we're a hole just pretend nothing changed */ 5010 ret = (left_disknr) ? 0 : 1; 5011 goto out; 5012 } 5013 5014 left_offset_fixed = left_offset; 5015 if (key.offset < ekey->offset) { 5016 /* Fix the right offset for 2a and 7. */ 5017 right_offset += ekey->offset - key.offset; 5018 } else { 5019 /* Fix the left offset for all behind 2a and 2b */ 5020 left_offset_fixed += key.offset - ekey->offset; 5021 } 5022 5023 /* 5024 * Check if we have the same extent. 5025 */ 5026 if (left_disknr != right_disknr || 5027 left_offset_fixed != right_offset || 5028 left_gen != right_gen) { 5029 ret = 0; 5030 goto out; 5031 } 5032 5033 /* 5034 * Go to the next extent. 5035 */ 5036 ret = btrfs_next_item(sctx->parent_root, path); 5037 if (ret < 0) 5038 goto out; 5039 if (!ret) { 5040 eb = path->nodes[0]; 5041 slot = path->slots[0]; 5042 btrfs_item_key_to_cpu(eb, &found_key, slot); 5043 } 5044 if (ret || found_key.objectid != key.objectid || 5045 found_key.type != key.type) { 5046 key.offset += right_len; 5047 break; 5048 } 5049 if (found_key.offset != key.offset + right_len) { 5050 ret = 0; 5051 goto out; 5052 } 5053 key = found_key; 5054 } 5055 5056 /* 5057 * We're now behind the left extent (treat as unchanged) or at the end 5058 * of the right side (treat as changed). 5059 */ 5060 if (key.offset >= ekey->offset + left_len) 5061 ret = 1; 5062 else 5063 ret = 0; 5064 5065 5066 out: 5067 btrfs_free_path(path); 5068 return ret; 5069 } 5070 5071 static int get_last_extent(struct send_ctx *sctx, u64 offset) 5072 { 5073 struct btrfs_path *path; 5074 struct btrfs_root *root = sctx->send_root; 5075 struct btrfs_file_extent_item *fi; 5076 struct btrfs_key key; 5077 u64 extent_end; 5078 u8 type; 5079 int ret; 5080 5081 path = alloc_path_for_send(); 5082 if (!path) 5083 return -ENOMEM; 5084 5085 sctx->cur_inode_last_extent = 0; 5086 5087 key.objectid = sctx->cur_ino; 5088 key.type = BTRFS_EXTENT_DATA_KEY; 5089 key.offset = offset; 5090 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); 5091 if (ret < 0) 5092 goto out; 5093 ret = 0; 5094 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 5095 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) 5096 goto out; 5097 5098 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 5099 struct btrfs_file_extent_item); 5100 type = btrfs_file_extent_type(path->nodes[0], fi); 5101 if (type == BTRFS_FILE_EXTENT_INLINE) { 5102 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5103 path->slots[0], fi); 5104 extent_end = ALIGN(key.offset + size, 5105 sctx->send_root->sectorsize); 5106 } else { 5107 extent_end = key.offset + 5108 btrfs_file_extent_num_bytes(path->nodes[0], fi); 5109 } 5110 sctx->cur_inode_last_extent = extent_end; 5111 out: 5112 btrfs_free_path(path); 5113 return ret; 5114 } 5115 5116 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, 5117 struct btrfs_key *key) 5118 { 5119 struct btrfs_file_extent_item *fi; 5120 u64 extent_end; 5121 u8 type; 5122 int ret = 0; 5123 5124 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) 5125 return 0; 5126 5127 if (sctx->cur_inode_last_extent == (u64)-1) { 5128 ret = get_last_extent(sctx, key->offset - 1); 5129 if (ret) 5130 return ret; 5131 } 5132 5133 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 5134 struct btrfs_file_extent_item); 5135 type = btrfs_file_extent_type(path->nodes[0], fi); 5136 if (type == BTRFS_FILE_EXTENT_INLINE) { 5137 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5138 path->slots[0], fi); 5139 extent_end = ALIGN(key->offset + size, 5140 sctx->send_root->sectorsize); 5141 } else { 5142 extent_end = key->offset + 5143 btrfs_file_extent_num_bytes(path->nodes[0], fi); 5144 } 5145 5146 if (path->slots[0] == 0 && 5147 sctx->cur_inode_last_extent < key->offset) { 5148 /* 5149 * We might have skipped entire leafs that contained only 5150 * file extent items for our current inode. These leafs have 5151 * a generation number smaller (older) than the one in the 5152 * current leaf and the leaf our last extent came from, and 5153 * are located between these 2 leafs. 5154 */ 5155 ret = get_last_extent(sctx, key->offset - 1); 5156 if (ret) 5157 return ret; 5158 } 5159 5160 if (sctx->cur_inode_last_extent < key->offset) 5161 ret = send_hole(sctx, key->offset); 5162 sctx->cur_inode_last_extent = extent_end; 5163 return ret; 5164 } 5165 5166 static int process_extent(struct send_ctx *sctx, 5167 struct btrfs_path *path, 5168 struct btrfs_key *key) 5169 { 5170 struct clone_root *found_clone = NULL; 5171 int ret = 0; 5172 5173 if (S_ISLNK(sctx->cur_inode_mode)) 5174 return 0; 5175 5176 if (sctx->parent_root && !sctx->cur_inode_new) { 5177 ret = is_extent_unchanged(sctx, path, key); 5178 if (ret < 0) 5179 goto out; 5180 if (ret) { 5181 ret = 0; 5182 goto out_hole; 5183 } 5184 } else { 5185 struct btrfs_file_extent_item *ei; 5186 u8 type; 5187 5188 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 5189 struct btrfs_file_extent_item); 5190 type = btrfs_file_extent_type(path->nodes[0], ei); 5191 if (type == BTRFS_FILE_EXTENT_PREALLOC || 5192 type == BTRFS_FILE_EXTENT_REG) { 5193 /* 5194 * The send spec does not have a prealloc command yet, 5195 * so just leave a hole for prealloc'ed extents until 5196 * we have enough commands queued up to justify rev'ing 5197 * the send spec. 5198 */ 5199 if (type == BTRFS_FILE_EXTENT_PREALLOC) { 5200 ret = 0; 5201 goto out; 5202 } 5203 5204 /* Have a hole, just skip it. */ 5205 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { 5206 ret = 0; 5207 goto out; 5208 } 5209 } 5210 } 5211 5212 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 5213 sctx->cur_inode_size, &found_clone); 5214 if (ret != -ENOENT && ret < 0) 5215 goto out; 5216 5217 ret = send_write_or_clone(sctx, path, key, found_clone); 5218 if (ret) 5219 goto out; 5220 out_hole: 5221 ret = maybe_send_hole(sctx, path, key); 5222 out: 5223 return ret; 5224 } 5225 5226 static int process_all_extents(struct send_ctx *sctx) 5227 { 5228 int ret; 5229 struct btrfs_root *root; 5230 struct btrfs_path *path; 5231 struct btrfs_key key; 5232 struct btrfs_key found_key; 5233 struct extent_buffer *eb; 5234 int slot; 5235 5236 root = sctx->send_root; 5237 path = alloc_path_for_send(); 5238 if (!path) 5239 return -ENOMEM; 5240 5241 key.objectid = sctx->cmp_key->objectid; 5242 key.type = BTRFS_EXTENT_DATA_KEY; 5243 key.offset = 0; 5244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5245 if (ret < 0) 5246 goto out; 5247 5248 while (1) { 5249 eb = path->nodes[0]; 5250 slot = path->slots[0]; 5251 5252 if (slot >= btrfs_header_nritems(eb)) { 5253 ret = btrfs_next_leaf(root, path); 5254 if (ret < 0) { 5255 goto out; 5256 } else if (ret > 0) { 5257 ret = 0; 5258 break; 5259 } 5260 continue; 5261 } 5262 5263 btrfs_item_key_to_cpu(eb, &found_key, slot); 5264 5265 if (found_key.objectid != key.objectid || 5266 found_key.type != key.type) { 5267 ret = 0; 5268 goto out; 5269 } 5270 5271 ret = process_extent(sctx, path, &found_key); 5272 if (ret < 0) 5273 goto out; 5274 5275 path->slots[0]++; 5276 } 5277 5278 out: 5279 btrfs_free_path(path); 5280 return ret; 5281 } 5282 5283 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, 5284 int *pending_move, 5285 int *refs_processed) 5286 { 5287 int ret = 0; 5288 5289 if (sctx->cur_ino == 0) 5290 goto out; 5291 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && 5292 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) 5293 goto out; 5294 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 5295 goto out; 5296 5297 ret = process_recorded_refs(sctx, pending_move); 5298 if (ret < 0) 5299 goto out; 5300 5301 *refs_processed = 1; 5302 out: 5303 return ret; 5304 } 5305 5306 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) 5307 { 5308 int ret = 0; 5309 u64 left_mode; 5310 u64 left_uid; 5311 u64 left_gid; 5312 u64 right_mode; 5313 u64 right_uid; 5314 u64 right_gid; 5315 int need_chmod = 0; 5316 int need_chown = 0; 5317 int pending_move = 0; 5318 int refs_processed = 0; 5319 5320 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 5321 &refs_processed); 5322 if (ret < 0) 5323 goto out; 5324 5325 /* 5326 * We have processed the refs and thus need to advance send_progress. 5327 * Now, calls to get_cur_xxx will take the updated refs of the current 5328 * inode into account. 5329 * 5330 * On the other hand, if our current inode is a directory and couldn't 5331 * be moved/renamed because its parent was renamed/moved too and it has 5332 * a higher inode number, we can only move/rename our current inode 5333 * after we moved/renamed its parent. Therefore in this case operate on 5334 * the old path (pre move/rename) of our current inode, and the 5335 * move/rename will be performed later. 5336 */ 5337 if (refs_processed && !pending_move) 5338 sctx->send_progress = sctx->cur_ino + 1; 5339 5340 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 5341 goto out; 5342 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 5343 goto out; 5344 5345 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, 5346 &left_mode, &left_uid, &left_gid, NULL); 5347 if (ret < 0) 5348 goto out; 5349 5350 if (!sctx->parent_root || sctx->cur_inode_new) { 5351 need_chown = 1; 5352 if (!S_ISLNK(sctx->cur_inode_mode)) 5353 need_chmod = 1; 5354 } else { 5355 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, 5356 NULL, NULL, &right_mode, &right_uid, 5357 &right_gid, NULL); 5358 if (ret < 0) 5359 goto out; 5360 5361 if (left_uid != right_uid || left_gid != right_gid) 5362 need_chown = 1; 5363 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) 5364 need_chmod = 1; 5365 } 5366 5367 if (S_ISREG(sctx->cur_inode_mode)) { 5368 if (need_send_hole(sctx)) { 5369 if (sctx->cur_inode_last_extent == (u64)-1 || 5370 sctx->cur_inode_last_extent < 5371 sctx->cur_inode_size) { 5372 ret = get_last_extent(sctx, (u64)-1); 5373 if (ret) 5374 goto out; 5375 } 5376 if (sctx->cur_inode_last_extent < 5377 sctx->cur_inode_size) { 5378 ret = send_hole(sctx, sctx->cur_inode_size); 5379 if (ret) 5380 goto out; 5381 } 5382 } 5383 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5384 sctx->cur_inode_size); 5385 if (ret < 0) 5386 goto out; 5387 } 5388 5389 if (need_chown) { 5390 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5391 left_uid, left_gid); 5392 if (ret < 0) 5393 goto out; 5394 } 5395 if (need_chmod) { 5396 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5397 left_mode); 5398 if (ret < 0) 5399 goto out; 5400 } 5401 5402 /* 5403 * If other directory inodes depended on our current directory 5404 * inode's move/rename, now do their move/rename operations. 5405 */ 5406 if (!is_waiting_for_move(sctx, sctx->cur_ino)) { 5407 ret = apply_children_dir_moves(sctx); 5408 if (ret) 5409 goto out; 5410 /* 5411 * Need to send that every time, no matter if it actually 5412 * changed between the two trees as we have done changes to 5413 * the inode before. If our inode is a directory and it's 5414 * waiting to be moved/renamed, we will send its utimes when 5415 * it's moved/renamed, therefore we don't need to do it here. 5416 */ 5417 sctx->send_progress = sctx->cur_ino + 1; 5418 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 5419 if (ret < 0) 5420 goto out; 5421 } 5422 5423 out: 5424 return ret; 5425 } 5426 5427 static int changed_inode(struct send_ctx *sctx, 5428 enum btrfs_compare_tree_result result) 5429 { 5430 int ret = 0; 5431 struct btrfs_key *key = sctx->cmp_key; 5432 struct btrfs_inode_item *left_ii = NULL; 5433 struct btrfs_inode_item *right_ii = NULL; 5434 u64 left_gen = 0; 5435 u64 right_gen = 0; 5436 5437 sctx->cur_ino = key->objectid; 5438 sctx->cur_inode_new_gen = 0; 5439 sctx->cur_inode_last_extent = (u64)-1; 5440 5441 /* 5442 * Set send_progress to current inode. This will tell all get_cur_xxx 5443 * functions that the current inode's refs are not updated yet. Later, 5444 * when process_recorded_refs is finished, it is set to cur_ino + 1. 5445 */ 5446 sctx->send_progress = sctx->cur_ino; 5447 5448 if (result == BTRFS_COMPARE_TREE_NEW || 5449 result == BTRFS_COMPARE_TREE_CHANGED) { 5450 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], 5451 sctx->left_path->slots[0], 5452 struct btrfs_inode_item); 5453 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], 5454 left_ii); 5455 } else { 5456 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5457 sctx->right_path->slots[0], 5458 struct btrfs_inode_item); 5459 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5460 right_ii); 5461 } 5462 if (result == BTRFS_COMPARE_TREE_CHANGED) { 5463 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5464 sctx->right_path->slots[0], 5465 struct btrfs_inode_item); 5466 5467 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5468 right_ii); 5469 5470 /* 5471 * The cur_ino = root dir case is special here. We can't treat 5472 * the inode as deleted+reused because it would generate a 5473 * stream that tries to delete/mkdir the root dir. 5474 */ 5475 if (left_gen != right_gen && 5476 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5477 sctx->cur_inode_new_gen = 1; 5478 } 5479 5480 if (result == BTRFS_COMPARE_TREE_NEW) { 5481 sctx->cur_inode_gen = left_gen; 5482 sctx->cur_inode_new = 1; 5483 sctx->cur_inode_deleted = 0; 5484 sctx->cur_inode_size = btrfs_inode_size( 5485 sctx->left_path->nodes[0], left_ii); 5486 sctx->cur_inode_mode = btrfs_inode_mode( 5487 sctx->left_path->nodes[0], left_ii); 5488 sctx->cur_inode_rdev = btrfs_inode_rdev( 5489 sctx->left_path->nodes[0], left_ii); 5490 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5491 ret = send_create_inode_if_needed(sctx); 5492 } else if (result == BTRFS_COMPARE_TREE_DELETED) { 5493 sctx->cur_inode_gen = right_gen; 5494 sctx->cur_inode_new = 0; 5495 sctx->cur_inode_deleted = 1; 5496 sctx->cur_inode_size = btrfs_inode_size( 5497 sctx->right_path->nodes[0], right_ii); 5498 sctx->cur_inode_mode = btrfs_inode_mode( 5499 sctx->right_path->nodes[0], right_ii); 5500 } else if (result == BTRFS_COMPARE_TREE_CHANGED) { 5501 /* 5502 * We need to do some special handling in case the inode was 5503 * reported as changed with a changed generation number. This 5504 * means that the original inode was deleted and new inode 5505 * reused the same inum. So we have to treat the old inode as 5506 * deleted and the new one as new. 5507 */ 5508 if (sctx->cur_inode_new_gen) { 5509 /* 5510 * First, process the inode as if it was deleted. 5511 */ 5512 sctx->cur_inode_gen = right_gen; 5513 sctx->cur_inode_new = 0; 5514 sctx->cur_inode_deleted = 1; 5515 sctx->cur_inode_size = btrfs_inode_size( 5516 sctx->right_path->nodes[0], right_ii); 5517 sctx->cur_inode_mode = btrfs_inode_mode( 5518 sctx->right_path->nodes[0], right_ii); 5519 ret = process_all_refs(sctx, 5520 BTRFS_COMPARE_TREE_DELETED); 5521 if (ret < 0) 5522 goto out; 5523 5524 /* 5525 * Now process the inode as if it was new. 5526 */ 5527 sctx->cur_inode_gen = left_gen; 5528 sctx->cur_inode_new = 1; 5529 sctx->cur_inode_deleted = 0; 5530 sctx->cur_inode_size = btrfs_inode_size( 5531 sctx->left_path->nodes[0], left_ii); 5532 sctx->cur_inode_mode = btrfs_inode_mode( 5533 sctx->left_path->nodes[0], left_ii); 5534 sctx->cur_inode_rdev = btrfs_inode_rdev( 5535 sctx->left_path->nodes[0], left_ii); 5536 ret = send_create_inode_if_needed(sctx); 5537 if (ret < 0) 5538 goto out; 5539 5540 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); 5541 if (ret < 0) 5542 goto out; 5543 /* 5544 * Advance send_progress now as we did not get into 5545 * process_recorded_refs_if_needed in the new_gen case. 5546 */ 5547 sctx->send_progress = sctx->cur_ino + 1; 5548 5549 /* 5550 * Now process all extents and xattrs of the inode as if 5551 * they were all new. 5552 */ 5553 ret = process_all_extents(sctx); 5554 if (ret < 0) 5555 goto out; 5556 ret = process_all_new_xattrs(sctx); 5557 if (ret < 0) 5558 goto out; 5559 } else { 5560 sctx->cur_inode_gen = left_gen; 5561 sctx->cur_inode_new = 0; 5562 sctx->cur_inode_new_gen = 0; 5563 sctx->cur_inode_deleted = 0; 5564 sctx->cur_inode_size = btrfs_inode_size( 5565 sctx->left_path->nodes[0], left_ii); 5566 sctx->cur_inode_mode = btrfs_inode_mode( 5567 sctx->left_path->nodes[0], left_ii); 5568 } 5569 } 5570 5571 out: 5572 return ret; 5573 } 5574 5575 /* 5576 * We have to process new refs before deleted refs, but compare_trees gives us 5577 * the new and deleted refs mixed. To fix this, we record the new/deleted refs 5578 * first and later process them in process_recorded_refs. 5579 * For the cur_inode_new_gen case, we skip recording completely because 5580 * changed_inode did already initiate processing of refs. The reason for this is 5581 * that in this case, compare_tree actually compares the refs of 2 different 5582 * inodes. To fix this, process_all_refs is used in changed_inode to handle all 5583 * refs of the right tree as deleted and all refs of the left tree as new. 5584 */ 5585 static int changed_ref(struct send_ctx *sctx, 5586 enum btrfs_compare_tree_result result) 5587 { 5588 int ret = 0; 5589 5590 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5591 5592 if (!sctx->cur_inode_new_gen && 5593 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 5594 if (result == BTRFS_COMPARE_TREE_NEW) 5595 ret = record_new_ref(sctx); 5596 else if (result == BTRFS_COMPARE_TREE_DELETED) 5597 ret = record_deleted_ref(sctx); 5598 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5599 ret = record_changed_ref(sctx); 5600 } 5601 5602 return ret; 5603 } 5604 5605 /* 5606 * Process new/deleted/changed xattrs. We skip processing in the 5607 * cur_inode_new_gen case because changed_inode did already initiate processing 5608 * of xattrs. The reason is the same as in changed_ref 5609 */ 5610 static int changed_xattr(struct send_ctx *sctx, 5611 enum btrfs_compare_tree_result result) 5612 { 5613 int ret = 0; 5614 5615 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5616 5617 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5618 if (result == BTRFS_COMPARE_TREE_NEW) 5619 ret = process_new_xattr(sctx); 5620 else if (result == BTRFS_COMPARE_TREE_DELETED) 5621 ret = process_deleted_xattr(sctx); 5622 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5623 ret = process_changed_xattr(sctx); 5624 } 5625 5626 return ret; 5627 } 5628 5629 /* 5630 * Process new/deleted/changed extents. We skip processing in the 5631 * cur_inode_new_gen case because changed_inode did already initiate processing 5632 * of extents. The reason is the same as in changed_ref 5633 */ 5634 static int changed_extent(struct send_ctx *sctx, 5635 enum btrfs_compare_tree_result result) 5636 { 5637 int ret = 0; 5638 5639 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 5640 5641 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5642 if (result != BTRFS_COMPARE_TREE_DELETED) 5643 ret = process_extent(sctx, sctx->left_path, 5644 sctx->cmp_key); 5645 } 5646 5647 return ret; 5648 } 5649 5650 static int dir_changed(struct send_ctx *sctx, u64 dir) 5651 { 5652 u64 orig_gen, new_gen; 5653 int ret; 5654 5655 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, 5656 NULL, NULL); 5657 if (ret) 5658 return ret; 5659 5660 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, 5661 NULL, NULL, NULL); 5662 if (ret) 5663 return ret; 5664 5665 return (orig_gen != new_gen) ? 1 : 0; 5666 } 5667 5668 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, 5669 struct btrfs_key *key) 5670 { 5671 struct btrfs_inode_extref *extref; 5672 struct extent_buffer *leaf; 5673 u64 dirid = 0, last_dirid = 0; 5674 unsigned long ptr; 5675 u32 item_size; 5676 u32 cur_offset = 0; 5677 int ref_name_len; 5678 int ret = 0; 5679 5680 /* Easy case, just check this one dirid */ 5681 if (key->type == BTRFS_INODE_REF_KEY) { 5682 dirid = key->offset; 5683 5684 ret = dir_changed(sctx, dirid); 5685 goto out; 5686 } 5687 5688 leaf = path->nodes[0]; 5689 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 5690 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 5691 while (cur_offset < item_size) { 5692 extref = (struct btrfs_inode_extref *)(ptr + 5693 cur_offset); 5694 dirid = btrfs_inode_extref_parent(leaf, extref); 5695 ref_name_len = btrfs_inode_extref_name_len(leaf, extref); 5696 cur_offset += ref_name_len + sizeof(*extref); 5697 if (dirid == last_dirid) 5698 continue; 5699 ret = dir_changed(sctx, dirid); 5700 if (ret) 5701 break; 5702 last_dirid = dirid; 5703 } 5704 out: 5705 return ret; 5706 } 5707 5708 /* 5709 * Updates compare related fields in sctx and simply forwards to the actual 5710 * changed_xxx functions. 5711 */ 5712 static int changed_cb(struct btrfs_root *left_root, 5713 struct btrfs_root *right_root, 5714 struct btrfs_path *left_path, 5715 struct btrfs_path *right_path, 5716 struct btrfs_key *key, 5717 enum btrfs_compare_tree_result result, 5718 void *ctx) 5719 { 5720 int ret = 0; 5721 struct send_ctx *sctx = ctx; 5722 5723 if (result == BTRFS_COMPARE_TREE_SAME) { 5724 if (key->type == BTRFS_INODE_REF_KEY || 5725 key->type == BTRFS_INODE_EXTREF_KEY) { 5726 ret = compare_refs(sctx, left_path, key); 5727 if (!ret) 5728 return 0; 5729 if (ret < 0) 5730 return ret; 5731 } else if (key->type == BTRFS_EXTENT_DATA_KEY) { 5732 return maybe_send_hole(sctx, left_path, key); 5733 } else { 5734 return 0; 5735 } 5736 result = BTRFS_COMPARE_TREE_CHANGED; 5737 ret = 0; 5738 } 5739 5740 sctx->left_path = left_path; 5741 sctx->right_path = right_path; 5742 sctx->cmp_key = key; 5743 5744 ret = finish_inode_if_needed(sctx, 0); 5745 if (ret < 0) 5746 goto out; 5747 5748 /* Ignore non-FS objects */ 5749 if (key->objectid == BTRFS_FREE_INO_OBJECTID || 5750 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 5751 goto out; 5752 5753 if (key->type == BTRFS_INODE_ITEM_KEY) 5754 ret = changed_inode(sctx, result); 5755 else if (key->type == BTRFS_INODE_REF_KEY || 5756 key->type == BTRFS_INODE_EXTREF_KEY) 5757 ret = changed_ref(sctx, result); 5758 else if (key->type == BTRFS_XATTR_ITEM_KEY) 5759 ret = changed_xattr(sctx, result); 5760 else if (key->type == BTRFS_EXTENT_DATA_KEY) 5761 ret = changed_extent(sctx, result); 5762 5763 out: 5764 return ret; 5765 } 5766 5767 static int full_send_tree(struct send_ctx *sctx) 5768 { 5769 int ret; 5770 struct btrfs_root *send_root = sctx->send_root; 5771 struct btrfs_key key; 5772 struct btrfs_key found_key; 5773 struct btrfs_path *path; 5774 struct extent_buffer *eb; 5775 int slot; 5776 5777 path = alloc_path_for_send(); 5778 if (!path) 5779 return -ENOMEM; 5780 5781 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 5782 key.type = BTRFS_INODE_ITEM_KEY; 5783 key.offset = 0; 5784 5785 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 5786 if (ret < 0) 5787 goto out; 5788 if (ret) 5789 goto out_finish; 5790 5791 while (1) { 5792 eb = path->nodes[0]; 5793 slot = path->slots[0]; 5794 btrfs_item_key_to_cpu(eb, &found_key, slot); 5795 5796 ret = changed_cb(send_root, NULL, path, NULL, 5797 &found_key, BTRFS_COMPARE_TREE_NEW, sctx); 5798 if (ret < 0) 5799 goto out; 5800 5801 key.objectid = found_key.objectid; 5802 key.type = found_key.type; 5803 key.offset = found_key.offset + 1; 5804 5805 ret = btrfs_next_item(send_root, path); 5806 if (ret < 0) 5807 goto out; 5808 if (ret) { 5809 ret = 0; 5810 break; 5811 } 5812 } 5813 5814 out_finish: 5815 ret = finish_inode_if_needed(sctx, 1); 5816 5817 out: 5818 btrfs_free_path(path); 5819 return ret; 5820 } 5821 5822 static int send_subvol(struct send_ctx *sctx) 5823 { 5824 int ret; 5825 5826 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) { 5827 ret = send_header(sctx); 5828 if (ret < 0) 5829 goto out; 5830 } 5831 5832 ret = send_subvol_begin(sctx); 5833 if (ret < 0) 5834 goto out; 5835 5836 if (sctx->parent_root) { 5837 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, 5838 changed_cb, sctx); 5839 if (ret < 0) 5840 goto out; 5841 ret = finish_inode_if_needed(sctx, 1); 5842 if (ret < 0) 5843 goto out; 5844 } else { 5845 ret = full_send_tree(sctx); 5846 if (ret < 0) 5847 goto out; 5848 } 5849 5850 out: 5851 free_recorded_refs(sctx); 5852 return ret; 5853 } 5854 5855 /* 5856 * If orphan cleanup did remove any orphans from a root, it means the tree 5857 * was modified and therefore the commit root is not the same as the current 5858 * root anymore. This is a problem, because send uses the commit root and 5859 * therefore can see inode items that don't exist in the current root anymore, 5860 * and for example make calls to btrfs_iget, which will do tree lookups based 5861 * on the current root and not on the commit root. Those lookups will fail, 5862 * returning a -ESTALE error, and making send fail with that error. So make 5863 * sure a send does not see any orphans we have just removed, and that it will 5864 * see the same inodes regardless of whether a transaction commit happened 5865 * before it started (meaning that the commit root will be the same as the 5866 * current root) or not. 5867 */ 5868 static int ensure_commit_roots_uptodate(struct send_ctx *sctx) 5869 { 5870 int i; 5871 struct btrfs_trans_handle *trans = NULL; 5872 5873 again: 5874 if (sctx->parent_root && 5875 sctx->parent_root->node != sctx->parent_root->commit_root) 5876 goto commit_trans; 5877 5878 for (i = 0; i < sctx->clone_roots_cnt; i++) 5879 if (sctx->clone_roots[i].root->node != 5880 sctx->clone_roots[i].root->commit_root) 5881 goto commit_trans; 5882 5883 if (trans) 5884 return btrfs_end_transaction(trans, sctx->send_root); 5885 5886 return 0; 5887 5888 commit_trans: 5889 /* Use any root, all fs roots will get their commit roots updated. */ 5890 if (!trans) { 5891 trans = btrfs_join_transaction(sctx->send_root); 5892 if (IS_ERR(trans)) 5893 return PTR_ERR(trans); 5894 goto again; 5895 } 5896 5897 return btrfs_commit_transaction(trans, sctx->send_root); 5898 } 5899 5900 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) 5901 { 5902 spin_lock(&root->root_item_lock); 5903 root->send_in_progress--; 5904 /* 5905 * Not much left to do, we don't know why it's unbalanced and 5906 * can't blindly reset it to 0. 5907 */ 5908 if (root->send_in_progress < 0) 5909 btrfs_err(root->fs_info, 5910 "send_in_progres unbalanced %d root %llu", 5911 root->send_in_progress, root->root_key.objectid); 5912 spin_unlock(&root->root_item_lock); 5913 } 5914 5915 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5916 { 5917 int ret = 0; 5918 struct btrfs_root *send_root; 5919 struct btrfs_root *clone_root; 5920 struct btrfs_fs_info *fs_info; 5921 struct btrfs_ioctl_send_args *arg = NULL; 5922 struct btrfs_key key; 5923 struct send_ctx *sctx = NULL; 5924 u32 i; 5925 u64 *clone_sources_tmp = NULL; 5926 int clone_sources_to_rollback = 0; 5927 int sort_clone_roots = 0; 5928 int index; 5929 5930 if (!capable(CAP_SYS_ADMIN)) 5931 return -EPERM; 5932 5933 send_root = BTRFS_I(file_inode(mnt_file))->root; 5934 fs_info = send_root->fs_info; 5935 5936 /* 5937 * The subvolume must remain read-only during send, protect against 5938 * making it RW. This also protects against deletion. 5939 */ 5940 spin_lock(&send_root->root_item_lock); 5941 send_root->send_in_progress++; 5942 spin_unlock(&send_root->root_item_lock); 5943 5944 /* 5945 * This is done when we lookup the root, it should already be complete 5946 * by the time we get here. 5947 */ 5948 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5949 5950 /* 5951 * Userspace tools do the checks and warn the user if it's 5952 * not RO. 5953 */ 5954 if (!btrfs_root_readonly(send_root)) { 5955 ret = -EPERM; 5956 goto out; 5957 } 5958 5959 arg = memdup_user(arg_, sizeof(*arg)); 5960 if (IS_ERR(arg)) { 5961 ret = PTR_ERR(arg); 5962 arg = NULL; 5963 goto out; 5964 } 5965 5966 if (!access_ok(VERIFY_READ, arg->clone_sources, 5967 sizeof(*arg->clone_sources) * 5968 arg->clone_sources_count)) { 5969 ret = -EFAULT; 5970 goto out; 5971 } 5972 5973 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { 5974 ret = -EINVAL; 5975 goto out; 5976 } 5977 5978 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); 5979 if (!sctx) { 5980 ret = -ENOMEM; 5981 goto out; 5982 } 5983 5984 INIT_LIST_HEAD(&sctx->new_refs); 5985 INIT_LIST_HEAD(&sctx->deleted_refs); 5986 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); 5987 INIT_LIST_HEAD(&sctx->name_cache_list); 5988 5989 sctx->flags = arg->flags; 5990 5991 sctx->send_filp = fget(arg->send_fd); 5992 if (!sctx->send_filp) { 5993 ret = -EBADF; 5994 goto out; 5995 } 5996 5997 sctx->send_root = send_root; 5998 /* 5999 * Unlikely but possible, if the subvolume is marked for deletion but 6000 * is slow to remove the directory entry, send can still be started 6001 */ 6002 if (btrfs_root_dead(sctx->send_root)) { 6003 ret = -EPERM; 6004 goto out; 6005 } 6006 6007 sctx->clone_roots_cnt = arg->clone_sources_count; 6008 6009 sctx->send_max_size = BTRFS_SEND_BUF_SIZE; 6010 sctx->send_buf = vmalloc(sctx->send_max_size); 6011 if (!sctx->send_buf) { 6012 ret = -ENOMEM; 6013 goto out; 6014 } 6015 6016 sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); 6017 if (!sctx->read_buf) { 6018 ret = -ENOMEM; 6019 goto out; 6020 } 6021 6022 sctx->pending_dir_moves = RB_ROOT; 6023 sctx->waiting_dir_moves = RB_ROOT; 6024 sctx->orphan_dirs = RB_ROOT; 6025 6026 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 6027 (arg->clone_sources_count + 1)); 6028 if (!sctx->clone_roots) { 6029 ret = -ENOMEM; 6030 goto out; 6031 } 6032 6033 if (arg->clone_sources_count) { 6034 clone_sources_tmp = vmalloc(arg->clone_sources_count * 6035 sizeof(*arg->clone_sources)); 6036 if (!clone_sources_tmp) { 6037 ret = -ENOMEM; 6038 goto out; 6039 } 6040 6041 ret = copy_from_user(clone_sources_tmp, arg->clone_sources, 6042 arg->clone_sources_count * 6043 sizeof(*arg->clone_sources)); 6044 if (ret) { 6045 ret = -EFAULT; 6046 goto out; 6047 } 6048 6049 for (i = 0; i < arg->clone_sources_count; i++) { 6050 key.objectid = clone_sources_tmp[i]; 6051 key.type = BTRFS_ROOT_ITEM_KEY; 6052 key.offset = (u64)-1; 6053 6054 index = srcu_read_lock(&fs_info->subvol_srcu); 6055 6056 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 6057 if (IS_ERR(clone_root)) { 6058 srcu_read_unlock(&fs_info->subvol_srcu, index); 6059 ret = PTR_ERR(clone_root); 6060 goto out; 6061 } 6062 spin_lock(&clone_root->root_item_lock); 6063 if (!btrfs_root_readonly(clone_root) || 6064 btrfs_root_dead(clone_root)) { 6065 spin_unlock(&clone_root->root_item_lock); 6066 srcu_read_unlock(&fs_info->subvol_srcu, index); 6067 ret = -EPERM; 6068 goto out; 6069 } 6070 clone_root->send_in_progress++; 6071 spin_unlock(&clone_root->root_item_lock); 6072 srcu_read_unlock(&fs_info->subvol_srcu, index); 6073 6074 sctx->clone_roots[i].root = clone_root; 6075 clone_sources_to_rollback = i + 1; 6076 } 6077 vfree(clone_sources_tmp); 6078 clone_sources_tmp = NULL; 6079 } 6080 6081 if (arg->parent_root) { 6082 key.objectid = arg->parent_root; 6083 key.type = BTRFS_ROOT_ITEM_KEY; 6084 key.offset = (u64)-1; 6085 6086 index = srcu_read_lock(&fs_info->subvol_srcu); 6087 6088 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 6089 if (IS_ERR(sctx->parent_root)) { 6090 srcu_read_unlock(&fs_info->subvol_srcu, index); 6091 ret = PTR_ERR(sctx->parent_root); 6092 goto out; 6093 } 6094 6095 spin_lock(&sctx->parent_root->root_item_lock); 6096 sctx->parent_root->send_in_progress++; 6097 if (!btrfs_root_readonly(sctx->parent_root) || 6098 btrfs_root_dead(sctx->parent_root)) { 6099 spin_unlock(&sctx->parent_root->root_item_lock); 6100 srcu_read_unlock(&fs_info->subvol_srcu, index); 6101 ret = -EPERM; 6102 goto out; 6103 } 6104 spin_unlock(&sctx->parent_root->root_item_lock); 6105 6106 srcu_read_unlock(&fs_info->subvol_srcu, index); 6107 } 6108 6109 /* 6110 * Clones from send_root are allowed, but only if the clone source 6111 * is behind the current send position. This is checked while searching 6112 * for possible clone sources. 6113 */ 6114 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; 6115 6116 /* We do a bsearch later */ 6117 sort(sctx->clone_roots, sctx->clone_roots_cnt, 6118 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 6119 NULL); 6120 sort_clone_roots = 1; 6121 6122 ret = ensure_commit_roots_uptodate(sctx); 6123 if (ret) 6124 goto out; 6125 6126 current->journal_info = BTRFS_SEND_TRANS_STUB; 6127 ret = send_subvol(sctx); 6128 current->journal_info = NULL; 6129 if (ret < 0) 6130 goto out; 6131 6132 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) { 6133 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 6134 if (ret < 0) 6135 goto out; 6136 ret = send_cmd(sctx); 6137 if (ret < 0) 6138 goto out; 6139 } 6140 6141 out: 6142 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)); 6143 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) { 6144 struct rb_node *n; 6145 struct pending_dir_move *pm; 6146 6147 n = rb_first(&sctx->pending_dir_moves); 6148 pm = rb_entry(n, struct pending_dir_move, node); 6149 while (!list_empty(&pm->list)) { 6150 struct pending_dir_move *pm2; 6151 6152 pm2 = list_first_entry(&pm->list, 6153 struct pending_dir_move, list); 6154 free_pending_move(sctx, pm2); 6155 } 6156 free_pending_move(sctx, pm); 6157 } 6158 6159 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)); 6160 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) { 6161 struct rb_node *n; 6162 struct waiting_dir_move *dm; 6163 6164 n = rb_first(&sctx->waiting_dir_moves); 6165 dm = rb_entry(n, struct waiting_dir_move, node); 6166 rb_erase(&dm->node, &sctx->waiting_dir_moves); 6167 kfree(dm); 6168 } 6169 6170 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); 6171 while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { 6172 struct rb_node *n; 6173 struct orphan_dir_info *odi; 6174 6175 n = rb_first(&sctx->orphan_dirs); 6176 odi = rb_entry(n, struct orphan_dir_info, node); 6177 free_orphan_dir_info(sctx, odi); 6178 } 6179 6180 if (sort_clone_roots) { 6181 for (i = 0; i < sctx->clone_roots_cnt; i++) 6182 btrfs_root_dec_send_in_progress( 6183 sctx->clone_roots[i].root); 6184 } else { 6185 for (i = 0; sctx && i < clone_sources_to_rollback; i++) 6186 btrfs_root_dec_send_in_progress( 6187 sctx->clone_roots[i].root); 6188 6189 btrfs_root_dec_send_in_progress(send_root); 6190 } 6191 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) 6192 btrfs_root_dec_send_in_progress(sctx->parent_root); 6193 6194 kfree(arg); 6195 vfree(clone_sources_tmp); 6196 6197 if (sctx) { 6198 if (sctx->send_filp) 6199 fput(sctx->send_filp); 6200 6201 vfree(sctx->clone_roots); 6202 vfree(sctx->send_buf); 6203 vfree(sctx->read_buf); 6204 6205 name_cache_free(sctx); 6206 6207 kfree(sctx); 6208 } 6209 6210 return ret; 6211 } 6212