1 /* 2 * Copyright (C) 2012 Alexander Block. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/bsearch.h> 20 #include <linux/fs.h> 21 #include <linux/file.h> 22 #include <linux/sort.h> 23 #include <linux/mount.h> 24 #include <linux/xattr.h> 25 #include <linux/posix_acl_xattr.h> 26 #include <linux/radix-tree.h> 27 #include <linux/vmalloc.h> 28 #include <linux/string.h> 29 30 #include "send.h" 31 #include "backref.h" 32 #include "hash.h" 33 #include "locking.h" 34 #include "disk-io.h" 35 #include "btrfs_inode.h" 36 #include "transaction.h" 37 #include "compression.h" 38 39 /* 40 * A fs_path is a helper to dynamically build path names with unknown size. 41 * It reallocates the internal buffer on demand. 42 * It allows fast adding of path elements on the right side (normal path) and 43 * fast adding to the left side (reversed path). A reversed path can also be 44 * unreversed if needed. 45 */ 46 struct fs_path { 47 union { 48 struct { 49 char *start; 50 char *end; 51 52 char *buf; 53 unsigned short buf_len:15; 54 unsigned short reversed:1; 55 char inline_buf[]; 56 }; 57 /* 58 * Average path length does not exceed 200 bytes, we'll have 59 * better packing in the slab and higher chance to satisfy 60 * a allocation later during send. 61 */ 62 char pad[256]; 63 }; 64 }; 65 #define FS_PATH_INLINE_SIZE \ 66 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) 67 68 69 /* reused for each extent */ 70 struct clone_root { 71 struct btrfs_root *root; 72 u64 ino; 73 u64 offset; 74 75 u64 found_refs; 76 }; 77 78 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 79 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) 80 81 struct send_ctx { 82 struct file *send_filp; 83 loff_t send_off; 84 char *send_buf; 85 u32 send_size; 86 u32 send_max_size; 87 u64 total_send_size; 88 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 89 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 90 91 struct btrfs_root *send_root; 92 struct btrfs_root *parent_root; 93 struct clone_root *clone_roots; 94 int clone_roots_cnt; 95 96 /* current state of the compare_tree call */ 97 struct btrfs_path *left_path; 98 struct btrfs_path *right_path; 99 struct btrfs_key *cmp_key; 100 101 /* 102 * infos of the currently processed inode. In case of deleted inodes, 103 * these are the values from the deleted inode. 104 */ 105 u64 cur_ino; 106 u64 cur_inode_gen; 107 int cur_inode_new; 108 int cur_inode_new_gen; 109 int cur_inode_deleted; 110 u64 cur_inode_size; 111 u64 cur_inode_mode; 112 u64 cur_inode_rdev; 113 u64 cur_inode_last_extent; 114 115 u64 send_progress; 116 117 struct list_head new_refs; 118 struct list_head deleted_refs; 119 120 struct radix_tree_root name_cache; 121 struct list_head name_cache_list; 122 int name_cache_size; 123 124 struct file_ra_state ra; 125 126 char *read_buf; 127 128 /* 129 * We process inodes by their increasing order, so if before an 130 * incremental send we reverse the parent/child relationship of 131 * directories such that a directory with a lower inode number was 132 * the parent of a directory with a higher inode number, and the one 133 * becoming the new parent got renamed too, we can't rename/move the 134 * directory with lower inode number when we finish processing it - we 135 * must process the directory with higher inode number first, then 136 * rename/move it and then rename/move the directory with lower inode 137 * number. Example follows. 138 * 139 * Tree state when the first send was performed: 140 * 141 * . 142 * |-- a (ino 257) 143 * |-- b (ino 258) 144 * | 145 * | 146 * |-- c (ino 259) 147 * | |-- d (ino 260) 148 * | 149 * |-- c2 (ino 261) 150 * 151 * Tree state when the second (incremental) send is performed: 152 * 153 * . 154 * |-- a (ino 257) 155 * |-- b (ino 258) 156 * |-- c2 (ino 261) 157 * |-- d2 (ino 260) 158 * |-- cc (ino 259) 159 * 160 * The sequence of steps that lead to the second state was: 161 * 162 * mv /a/b/c/d /a/b/c2/d2 163 * mv /a/b/c /a/b/c2/d2/cc 164 * 165 * "c" has lower inode number, but we can't move it (2nd mv operation) 166 * before we move "d", which has higher inode number. 167 * 168 * So we just memorize which move/rename operations must be performed 169 * later when their respective parent is processed and moved/renamed. 170 */ 171 172 /* Indexed by parent directory inode number. */ 173 struct rb_root pending_dir_moves; 174 175 /* 176 * Reverse index, indexed by the inode number of a directory that 177 * is waiting for the move/rename of its immediate parent before its 178 * own move/rename can be performed. 179 */ 180 struct rb_root waiting_dir_moves; 181 182 /* 183 * A directory that is going to be rm'ed might have a child directory 184 * which is in the pending directory moves index above. In this case, 185 * the directory can only be removed after the move/rename of its child 186 * is performed. Example: 187 * 188 * Parent snapshot: 189 * 190 * . (ino 256) 191 * |-- a/ (ino 257) 192 * |-- b/ (ino 258) 193 * |-- c/ (ino 259) 194 * | |-- x/ (ino 260) 195 * | 196 * |-- y/ (ino 261) 197 * 198 * Send snapshot: 199 * 200 * . (ino 256) 201 * |-- a/ (ino 257) 202 * |-- b/ (ino 258) 203 * |-- YY/ (ino 261) 204 * |-- x/ (ino 260) 205 * 206 * Sequence of steps that lead to the send snapshot: 207 * rm -f /a/b/c/foo.txt 208 * mv /a/b/y /a/b/YY 209 * mv /a/b/c/x /a/b/YY 210 * rmdir /a/b/c 211 * 212 * When the child is processed, its move/rename is delayed until its 213 * parent is processed (as explained above), but all other operations 214 * like update utimes, chown, chgrp, etc, are performed and the paths 215 * that it uses for those operations must use the orphanized name of 216 * its parent (the directory we're going to rm later), so we need to 217 * memorize that name. 218 * 219 * Indexed by the inode number of the directory to be deleted. 220 */ 221 struct rb_root orphan_dirs; 222 }; 223 224 struct pending_dir_move { 225 struct rb_node node; 226 struct list_head list; 227 u64 parent_ino; 228 u64 ino; 229 u64 gen; 230 struct list_head update_refs; 231 }; 232 233 struct waiting_dir_move { 234 struct rb_node node; 235 u64 ino; 236 /* 237 * There might be some directory that could not be removed because it 238 * was waiting for this directory inode to be moved first. Therefore 239 * after this directory is moved, we can try to rmdir the ino rmdir_ino. 240 */ 241 u64 rmdir_ino; 242 bool orphanized; 243 }; 244 245 struct orphan_dir_info { 246 struct rb_node node; 247 u64 ino; 248 u64 gen; 249 }; 250 251 struct name_cache_entry { 252 struct list_head list; 253 /* 254 * radix_tree has only 32bit entries but we need to handle 64bit inums. 255 * We use the lower 32bit of the 64bit inum to store it in the tree. If 256 * more then one inum would fall into the same entry, we use radix_list 257 * to store the additional entries. radix_list is also used to store 258 * entries where two entries have the same inum but different 259 * generations. 260 */ 261 struct list_head radix_list; 262 u64 ino; 263 u64 gen; 264 u64 parent_ino; 265 u64 parent_gen; 266 int ret; 267 int need_later_update; 268 int name_len; 269 char name[]; 270 }; 271 272 static void inconsistent_snapshot_error(struct send_ctx *sctx, 273 enum btrfs_compare_tree_result result, 274 const char *what) 275 { 276 const char *result_string; 277 278 switch (result) { 279 case BTRFS_COMPARE_TREE_NEW: 280 result_string = "new"; 281 break; 282 case BTRFS_COMPARE_TREE_DELETED: 283 result_string = "deleted"; 284 break; 285 case BTRFS_COMPARE_TREE_CHANGED: 286 result_string = "updated"; 287 break; 288 case BTRFS_COMPARE_TREE_SAME: 289 ASSERT(0); 290 result_string = "unchanged"; 291 break; 292 default: 293 ASSERT(0); 294 result_string = "unexpected"; 295 } 296 297 btrfs_err(sctx->send_root->fs_info, 298 "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu", 299 result_string, what, sctx->cmp_key->objectid, 300 sctx->send_root->root_key.objectid, 301 (sctx->parent_root ? 302 sctx->parent_root->root_key.objectid : 0)); 303 } 304 305 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 306 307 static struct waiting_dir_move * 308 get_waiting_dir_move(struct send_ctx *sctx, u64 ino); 309 310 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); 311 312 static int need_send_hole(struct send_ctx *sctx) 313 { 314 return (sctx->parent_root && !sctx->cur_inode_new && 315 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && 316 S_ISREG(sctx->cur_inode_mode)); 317 } 318 319 static void fs_path_reset(struct fs_path *p) 320 { 321 if (p->reversed) { 322 p->start = p->buf + p->buf_len - 1; 323 p->end = p->start; 324 *p->start = 0; 325 } else { 326 p->start = p->buf; 327 p->end = p->start; 328 *p->start = 0; 329 } 330 } 331 332 static struct fs_path *fs_path_alloc(void) 333 { 334 struct fs_path *p; 335 336 p = kmalloc(sizeof(*p), GFP_KERNEL); 337 if (!p) 338 return NULL; 339 p->reversed = 0; 340 p->buf = p->inline_buf; 341 p->buf_len = FS_PATH_INLINE_SIZE; 342 fs_path_reset(p); 343 return p; 344 } 345 346 static struct fs_path *fs_path_alloc_reversed(void) 347 { 348 struct fs_path *p; 349 350 p = fs_path_alloc(); 351 if (!p) 352 return NULL; 353 p->reversed = 1; 354 fs_path_reset(p); 355 return p; 356 } 357 358 static void fs_path_free(struct fs_path *p) 359 { 360 if (!p) 361 return; 362 if (p->buf != p->inline_buf) 363 kfree(p->buf); 364 kfree(p); 365 } 366 367 static int fs_path_len(struct fs_path *p) 368 { 369 return p->end - p->start; 370 } 371 372 static int fs_path_ensure_buf(struct fs_path *p, int len) 373 { 374 char *tmp_buf; 375 int path_len; 376 int old_buf_len; 377 378 len++; 379 380 if (p->buf_len >= len) 381 return 0; 382 383 if (len > PATH_MAX) { 384 WARN_ON(1); 385 return -ENOMEM; 386 } 387 388 path_len = p->end - p->start; 389 old_buf_len = p->buf_len; 390 391 /* 392 * First time the inline_buf does not suffice 393 */ 394 if (p->buf == p->inline_buf) { 395 tmp_buf = kmalloc(len, GFP_KERNEL); 396 if (tmp_buf) 397 memcpy(tmp_buf, p->buf, old_buf_len); 398 } else { 399 tmp_buf = krealloc(p->buf, len, GFP_KERNEL); 400 } 401 if (!tmp_buf) 402 return -ENOMEM; 403 p->buf = tmp_buf; 404 /* 405 * The real size of the buffer is bigger, this will let the fast path 406 * happen most of the time 407 */ 408 p->buf_len = ksize(p->buf); 409 410 if (p->reversed) { 411 tmp_buf = p->buf + old_buf_len - path_len - 1; 412 p->end = p->buf + p->buf_len - 1; 413 p->start = p->end - path_len; 414 memmove(p->start, tmp_buf, path_len + 1); 415 } else { 416 p->start = p->buf; 417 p->end = p->start + path_len; 418 } 419 return 0; 420 } 421 422 static int fs_path_prepare_for_add(struct fs_path *p, int name_len, 423 char **prepared) 424 { 425 int ret; 426 int new_len; 427 428 new_len = p->end - p->start + name_len; 429 if (p->start != p->end) 430 new_len++; 431 ret = fs_path_ensure_buf(p, new_len); 432 if (ret < 0) 433 goto out; 434 435 if (p->reversed) { 436 if (p->start != p->end) 437 *--p->start = '/'; 438 p->start -= name_len; 439 *prepared = p->start; 440 } else { 441 if (p->start != p->end) 442 *p->end++ = '/'; 443 *prepared = p->end; 444 p->end += name_len; 445 *p->end = 0; 446 } 447 448 out: 449 return ret; 450 } 451 452 static int fs_path_add(struct fs_path *p, const char *name, int name_len) 453 { 454 int ret; 455 char *prepared; 456 457 ret = fs_path_prepare_for_add(p, name_len, &prepared); 458 if (ret < 0) 459 goto out; 460 memcpy(prepared, name, name_len); 461 462 out: 463 return ret; 464 } 465 466 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) 467 { 468 int ret; 469 char *prepared; 470 471 ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); 472 if (ret < 0) 473 goto out; 474 memcpy(prepared, p2->start, p2->end - p2->start); 475 476 out: 477 return ret; 478 } 479 480 static int fs_path_add_from_extent_buffer(struct fs_path *p, 481 struct extent_buffer *eb, 482 unsigned long off, int len) 483 { 484 int ret; 485 char *prepared; 486 487 ret = fs_path_prepare_for_add(p, len, &prepared); 488 if (ret < 0) 489 goto out; 490 491 read_extent_buffer(eb, prepared, off, len); 492 493 out: 494 return ret; 495 } 496 497 static int fs_path_copy(struct fs_path *p, struct fs_path *from) 498 { 499 int ret; 500 501 p->reversed = from->reversed; 502 fs_path_reset(p); 503 504 ret = fs_path_add_path(p, from); 505 506 return ret; 507 } 508 509 510 static void fs_path_unreverse(struct fs_path *p) 511 { 512 char *tmp; 513 int len; 514 515 if (!p->reversed) 516 return; 517 518 tmp = p->start; 519 len = p->end - p->start; 520 p->start = p->buf; 521 p->end = p->start + len; 522 memmove(p->start, tmp, len + 1); 523 p->reversed = 0; 524 } 525 526 static struct btrfs_path *alloc_path_for_send(void) 527 { 528 struct btrfs_path *path; 529 530 path = btrfs_alloc_path(); 531 if (!path) 532 return NULL; 533 path->search_commit_root = 1; 534 path->skip_locking = 1; 535 path->need_commit_sem = 1; 536 return path; 537 } 538 539 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 540 { 541 int ret; 542 mm_segment_t old_fs; 543 u32 pos = 0; 544 545 old_fs = get_fs(); 546 set_fs(KERNEL_DS); 547 548 while (pos < len) { 549 ret = vfs_write(filp, (__force const char __user *)buf + pos, 550 len - pos, off); 551 /* TODO handle that correctly */ 552 /*if (ret == -ERESTARTSYS) { 553 continue; 554 }*/ 555 if (ret < 0) 556 goto out; 557 if (ret == 0) { 558 ret = -EIO; 559 goto out; 560 } 561 pos += ret; 562 } 563 564 ret = 0; 565 566 out: 567 set_fs(old_fs); 568 return ret; 569 } 570 571 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) 572 { 573 struct btrfs_tlv_header *hdr; 574 int total_len = sizeof(*hdr) + len; 575 int left = sctx->send_max_size - sctx->send_size; 576 577 if (unlikely(left < total_len)) 578 return -EOVERFLOW; 579 580 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); 581 hdr->tlv_type = cpu_to_le16(attr); 582 hdr->tlv_len = cpu_to_le16(len); 583 memcpy(hdr + 1, data, len); 584 sctx->send_size += total_len; 585 586 return 0; 587 } 588 589 #define TLV_PUT_DEFINE_INT(bits) \ 590 static int tlv_put_u##bits(struct send_ctx *sctx, \ 591 u##bits attr, u##bits value) \ 592 { \ 593 __le##bits __tmp = cpu_to_le##bits(value); \ 594 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ 595 } 596 597 TLV_PUT_DEFINE_INT(64) 598 599 static int tlv_put_string(struct send_ctx *sctx, u16 attr, 600 const char *str, int len) 601 { 602 if (len == -1) 603 len = strlen(str); 604 return tlv_put(sctx, attr, str, len); 605 } 606 607 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, 608 const u8 *uuid) 609 { 610 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 611 } 612 613 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 614 struct extent_buffer *eb, 615 struct btrfs_timespec *ts) 616 { 617 struct btrfs_timespec bts; 618 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); 619 return tlv_put(sctx, attr, &bts, sizeof(bts)); 620 } 621 622 623 #define TLV_PUT(sctx, attrtype, attrlen, data) \ 624 do { \ 625 ret = tlv_put(sctx, attrtype, attrlen, data); \ 626 if (ret < 0) \ 627 goto tlv_put_failure; \ 628 } while (0) 629 630 #define TLV_PUT_INT(sctx, attrtype, bits, value) \ 631 do { \ 632 ret = tlv_put_u##bits(sctx, attrtype, value); \ 633 if (ret < 0) \ 634 goto tlv_put_failure; \ 635 } while (0) 636 637 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) 638 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) 639 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) 640 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) 641 #define TLV_PUT_STRING(sctx, attrtype, str, len) \ 642 do { \ 643 ret = tlv_put_string(sctx, attrtype, str, len); \ 644 if (ret < 0) \ 645 goto tlv_put_failure; \ 646 } while (0) 647 #define TLV_PUT_PATH(sctx, attrtype, p) \ 648 do { \ 649 ret = tlv_put_string(sctx, attrtype, p->start, \ 650 p->end - p->start); \ 651 if (ret < 0) \ 652 goto tlv_put_failure; \ 653 } while(0) 654 #define TLV_PUT_UUID(sctx, attrtype, uuid) \ 655 do { \ 656 ret = tlv_put_uuid(sctx, attrtype, uuid); \ 657 if (ret < 0) \ 658 goto tlv_put_failure; \ 659 } while (0) 660 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 661 do { \ 662 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 663 if (ret < 0) \ 664 goto tlv_put_failure; \ 665 } while (0) 666 667 static int send_header(struct send_ctx *sctx) 668 { 669 struct btrfs_stream_header hdr; 670 671 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); 672 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); 673 674 return write_buf(sctx->send_filp, &hdr, sizeof(hdr), 675 &sctx->send_off); 676 } 677 678 /* 679 * For each command/item we want to send to userspace, we call this function. 680 */ 681 static int begin_cmd(struct send_ctx *sctx, int cmd) 682 { 683 struct btrfs_cmd_header *hdr; 684 685 if (WARN_ON(!sctx->send_buf)) 686 return -EINVAL; 687 688 BUG_ON(sctx->send_size); 689 690 sctx->send_size += sizeof(*hdr); 691 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 692 hdr->cmd = cpu_to_le16(cmd); 693 694 return 0; 695 } 696 697 static int send_cmd(struct send_ctx *sctx) 698 { 699 int ret; 700 struct btrfs_cmd_header *hdr; 701 u32 crc; 702 703 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 704 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 705 hdr->crc = 0; 706 707 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 708 hdr->crc = cpu_to_le32(crc); 709 710 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 711 &sctx->send_off); 712 713 sctx->total_send_size += sctx->send_size; 714 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; 715 sctx->send_size = 0; 716 717 return ret; 718 } 719 720 /* 721 * Sends a move instruction to user space 722 */ 723 static int send_rename(struct send_ctx *sctx, 724 struct fs_path *from, struct fs_path *to) 725 { 726 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 727 int ret; 728 729 btrfs_debug(fs_info, "send_rename %s -> %s", from->start, to->start); 730 731 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); 732 if (ret < 0) 733 goto out; 734 735 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); 736 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); 737 738 ret = send_cmd(sctx); 739 740 tlv_put_failure: 741 out: 742 return ret; 743 } 744 745 /* 746 * Sends a link instruction to user space 747 */ 748 static int send_link(struct send_ctx *sctx, 749 struct fs_path *path, struct fs_path *lnk) 750 { 751 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 752 int ret; 753 754 btrfs_debug(fs_info, "send_link %s -> %s", path->start, lnk->start); 755 756 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); 757 if (ret < 0) 758 goto out; 759 760 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 761 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); 762 763 ret = send_cmd(sctx); 764 765 tlv_put_failure: 766 out: 767 return ret; 768 } 769 770 /* 771 * Sends an unlink instruction to user space 772 */ 773 static int send_unlink(struct send_ctx *sctx, struct fs_path *path) 774 { 775 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 776 int ret; 777 778 btrfs_debug(fs_info, "send_unlink %s", path->start); 779 780 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); 781 if (ret < 0) 782 goto out; 783 784 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 785 786 ret = send_cmd(sctx); 787 788 tlv_put_failure: 789 out: 790 return ret; 791 } 792 793 /* 794 * Sends a rmdir instruction to user space 795 */ 796 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) 797 { 798 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 799 int ret; 800 801 btrfs_debug(fs_info, "send_rmdir %s", path->start); 802 803 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); 804 if (ret < 0) 805 goto out; 806 807 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 808 809 ret = send_cmd(sctx); 810 811 tlv_put_failure: 812 out: 813 return ret; 814 } 815 816 /* 817 * Helper function to retrieve some fields from an inode item. 818 */ 819 static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path, 820 u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid, 821 u64 *gid, u64 *rdev) 822 { 823 int ret; 824 struct btrfs_inode_item *ii; 825 struct btrfs_key key; 826 827 key.objectid = ino; 828 key.type = BTRFS_INODE_ITEM_KEY; 829 key.offset = 0; 830 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 831 if (ret) { 832 if (ret > 0) 833 ret = -ENOENT; 834 return ret; 835 } 836 837 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 838 struct btrfs_inode_item); 839 if (size) 840 *size = btrfs_inode_size(path->nodes[0], ii); 841 if (gen) 842 *gen = btrfs_inode_generation(path->nodes[0], ii); 843 if (mode) 844 *mode = btrfs_inode_mode(path->nodes[0], ii); 845 if (uid) 846 *uid = btrfs_inode_uid(path->nodes[0], ii); 847 if (gid) 848 *gid = btrfs_inode_gid(path->nodes[0], ii); 849 if (rdev) 850 *rdev = btrfs_inode_rdev(path->nodes[0], ii); 851 852 return ret; 853 } 854 855 static int get_inode_info(struct btrfs_root *root, 856 u64 ino, u64 *size, u64 *gen, 857 u64 *mode, u64 *uid, u64 *gid, 858 u64 *rdev) 859 { 860 struct btrfs_path *path; 861 int ret; 862 863 path = alloc_path_for_send(); 864 if (!path) 865 return -ENOMEM; 866 ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid, 867 rdev); 868 btrfs_free_path(path); 869 return ret; 870 } 871 872 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, 873 struct fs_path *p, 874 void *ctx); 875 876 /* 877 * Helper function to iterate the entries in ONE btrfs_inode_ref or 878 * btrfs_inode_extref. 879 * The iterate callback may return a non zero value to stop iteration. This can 880 * be a negative value for error codes or 1 to simply stop it. 881 * 882 * path must point to the INODE_REF or INODE_EXTREF when called. 883 */ 884 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, 885 struct btrfs_key *found_key, int resolve, 886 iterate_inode_ref_t iterate, void *ctx) 887 { 888 struct extent_buffer *eb = path->nodes[0]; 889 struct btrfs_item *item; 890 struct btrfs_inode_ref *iref; 891 struct btrfs_inode_extref *extref; 892 struct btrfs_path *tmp_path; 893 struct fs_path *p; 894 u32 cur = 0; 895 u32 total; 896 int slot = path->slots[0]; 897 u32 name_len; 898 char *start; 899 int ret = 0; 900 int num = 0; 901 int index; 902 u64 dir; 903 unsigned long name_off; 904 unsigned long elem_size; 905 unsigned long ptr; 906 907 p = fs_path_alloc_reversed(); 908 if (!p) 909 return -ENOMEM; 910 911 tmp_path = alloc_path_for_send(); 912 if (!tmp_path) { 913 fs_path_free(p); 914 return -ENOMEM; 915 } 916 917 918 if (found_key->type == BTRFS_INODE_REF_KEY) { 919 ptr = (unsigned long)btrfs_item_ptr(eb, slot, 920 struct btrfs_inode_ref); 921 item = btrfs_item_nr(slot); 922 total = btrfs_item_size(eb, item); 923 elem_size = sizeof(*iref); 924 } else { 925 ptr = btrfs_item_ptr_offset(eb, slot); 926 total = btrfs_item_size_nr(eb, slot); 927 elem_size = sizeof(*extref); 928 } 929 930 while (cur < total) { 931 fs_path_reset(p); 932 933 if (found_key->type == BTRFS_INODE_REF_KEY) { 934 iref = (struct btrfs_inode_ref *)(ptr + cur); 935 name_len = btrfs_inode_ref_name_len(eb, iref); 936 name_off = (unsigned long)(iref + 1); 937 index = btrfs_inode_ref_index(eb, iref); 938 dir = found_key->offset; 939 } else { 940 extref = (struct btrfs_inode_extref *)(ptr + cur); 941 name_len = btrfs_inode_extref_name_len(eb, extref); 942 name_off = (unsigned long)&extref->name; 943 index = btrfs_inode_extref_index(eb, extref); 944 dir = btrfs_inode_extref_parent(eb, extref); 945 } 946 947 if (resolve) { 948 start = btrfs_ref_to_path(root, tmp_path, name_len, 949 name_off, eb, dir, 950 p->buf, p->buf_len); 951 if (IS_ERR(start)) { 952 ret = PTR_ERR(start); 953 goto out; 954 } 955 if (start < p->buf) { 956 /* overflow , try again with larger buffer */ 957 ret = fs_path_ensure_buf(p, 958 p->buf_len + p->buf - start); 959 if (ret < 0) 960 goto out; 961 start = btrfs_ref_to_path(root, tmp_path, 962 name_len, name_off, 963 eb, dir, 964 p->buf, p->buf_len); 965 if (IS_ERR(start)) { 966 ret = PTR_ERR(start); 967 goto out; 968 } 969 BUG_ON(start < p->buf); 970 } 971 p->start = start; 972 } else { 973 ret = fs_path_add_from_extent_buffer(p, eb, name_off, 974 name_len); 975 if (ret < 0) 976 goto out; 977 } 978 979 cur += elem_size + name_len; 980 ret = iterate(num, dir, index, p, ctx); 981 if (ret) 982 goto out; 983 num++; 984 } 985 986 out: 987 btrfs_free_path(tmp_path); 988 fs_path_free(p); 989 return ret; 990 } 991 992 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, 993 const char *name, int name_len, 994 const char *data, int data_len, 995 u8 type, void *ctx); 996 997 /* 998 * Helper function to iterate the entries in ONE btrfs_dir_item. 999 * The iterate callback may return a non zero value to stop iteration. This can 1000 * be a negative value for error codes or 1 to simply stop it. 1001 * 1002 * path must point to the dir item when called. 1003 */ 1004 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, 1005 struct btrfs_key *found_key, 1006 iterate_dir_item_t iterate, void *ctx) 1007 { 1008 int ret = 0; 1009 struct extent_buffer *eb; 1010 struct btrfs_item *item; 1011 struct btrfs_dir_item *di; 1012 struct btrfs_key di_key; 1013 char *buf = NULL; 1014 int buf_len; 1015 u32 name_len; 1016 u32 data_len; 1017 u32 cur; 1018 u32 len; 1019 u32 total; 1020 int slot; 1021 int num; 1022 u8 type; 1023 1024 /* 1025 * Start with a small buffer (1 page). If later we end up needing more 1026 * space, which can happen for xattrs on a fs with a leaf size greater 1027 * then the page size, attempt to increase the buffer. Typically xattr 1028 * values are small. 1029 */ 1030 buf_len = PATH_MAX; 1031 buf = kmalloc(buf_len, GFP_KERNEL); 1032 if (!buf) { 1033 ret = -ENOMEM; 1034 goto out; 1035 } 1036 1037 eb = path->nodes[0]; 1038 slot = path->slots[0]; 1039 item = btrfs_item_nr(slot); 1040 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 1041 cur = 0; 1042 len = 0; 1043 total = btrfs_item_size(eb, item); 1044 1045 num = 0; 1046 while (cur < total) { 1047 name_len = btrfs_dir_name_len(eb, di); 1048 data_len = btrfs_dir_data_len(eb, di); 1049 type = btrfs_dir_type(eb, di); 1050 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 1051 1052 if (type == BTRFS_FT_XATTR) { 1053 if (name_len > XATTR_NAME_MAX) { 1054 ret = -ENAMETOOLONG; 1055 goto out; 1056 } 1057 if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root)) { 1058 ret = -E2BIG; 1059 goto out; 1060 } 1061 } else { 1062 /* 1063 * Path too long 1064 */ 1065 if (name_len + data_len > PATH_MAX) { 1066 ret = -ENAMETOOLONG; 1067 goto out; 1068 } 1069 } 1070 1071 if (name_len + data_len > buf_len) { 1072 buf_len = name_len + data_len; 1073 if (is_vmalloc_addr(buf)) { 1074 vfree(buf); 1075 buf = NULL; 1076 } else { 1077 char *tmp = krealloc(buf, buf_len, 1078 GFP_KERNEL | __GFP_NOWARN); 1079 1080 if (!tmp) 1081 kfree(buf); 1082 buf = tmp; 1083 } 1084 if (!buf) { 1085 buf = vmalloc(buf_len); 1086 if (!buf) { 1087 ret = -ENOMEM; 1088 goto out; 1089 } 1090 } 1091 } 1092 1093 read_extent_buffer(eb, buf, (unsigned long)(di + 1), 1094 name_len + data_len); 1095 1096 len = sizeof(*di) + name_len + data_len; 1097 di = (struct btrfs_dir_item *)((char *)di + len); 1098 cur += len; 1099 1100 ret = iterate(num, &di_key, buf, name_len, buf + name_len, 1101 data_len, type, ctx); 1102 if (ret < 0) 1103 goto out; 1104 if (ret) { 1105 ret = 0; 1106 goto out; 1107 } 1108 1109 num++; 1110 } 1111 1112 out: 1113 kvfree(buf); 1114 return ret; 1115 } 1116 1117 static int __copy_first_ref(int num, u64 dir, int index, 1118 struct fs_path *p, void *ctx) 1119 { 1120 int ret; 1121 struct fs_path *pt = ctx; 1122 1123 ret = fs_path_copy(pt, p); 1124 if (ret < 0) 1125 return ret; 1126 1127 /* we want the first only */ 1128 return 1; 1129 } 1130 1131 /* 1132 * Retrieve the first path of an inode. If an inode has more then one 1133 * ref/hardlink, this is ignored. 1134 */ 1135 static int get_inode_path(struct btrfs_root *root, 1136 u64 ino, struct fs_path *path) 1137 { 1138 int ret; 1139 struct btrfs_key key, found_key; 1140 struct btrfs_path *p; 1141 1142 p = alloc_path_for_send(); 1143 if (!p) 1144 return -ENOMEM; 1145 1146 fs_path_reset(path); 1147 1148 key.objectid = ino; 1149 key.type = BTRFS_INODE_REF_KEY; 1150 key.offset = 0; 1151 1152 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); 1153 if (ret < 0) 1154 goto out; 1155 if (ret) { 1156 ret = 1; 1157 goto out; 1158 } 1159 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1160 if (found_key.objectid != ino || 1161 (found_key.type != BTRFS_INODE_REF_KEY && 1162 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1163 ret = -ENOENT; 1164 goto out; 1165 } 1166 1167 ret = iterate_inode_ref(root, p, &found_key, 1, 1168 __copy_first_ref, path); 1169 if (ret < 0) 1170 goto out; 1171 ret = 0; 1172 1173 out: 1174 btrfs_free_path(p); 1175 return ret; 1176 } 1177 1178 struct backref_ctx { 1179 struct send_ctx *sctx; 1180 1181 struct btrfs_path *path; 1182 /* number of total found references */ 1183 u64 found; 1184 1185 /* 1186 * used for clones found in send_root. clones found behind cur_objectid 1187 * and cur_offset are not considered as allowed clones. 1188 */ 1189 u64 cur_objectid; 1190 u64 cur_offset; 1191 1192 /* may be truncated in case it's the last extent in a file */ 1193 u64 extent_len; 1194 1195 /* data offset in the file extent item */ 1196 u64 data_offset; 1197 1198 /* Just to check for bugs in backref resolving */ 1199 int found_itself; 1200 }; 1201 1202 static int __clone_root_cmp_bsearch(const void *key, const void *elt) 1203 { 1204 u64 root = (u64)(uintptr_t)key; 1205 struct clone_root *cr = (struct clone_root *)elt; 1206 1207 if (root < cr->root->objectid) 1208 return -1; 1209 if (root > cr->root->objectid) 1210 return 1; 1211 return 0; 1212 } 1213 1214 static int __clone_root_cmp_sort(const void *e1, const void *e2) 1215 { 1216 struct clone_root *cr1 = (struct clone_root *)e1; 1217 struct clone_root *cr2 = (struct clone_root *)e2; 1218 1219 if (cr1->root->objectid < cr2->root->objectid) 1220 return -1; 1221 if (cr1->root->objectid > cr2->root->objectid) 1222 return 1; 1223 return 0; 1224 } 1225 1226 /* 1227 * Called for every backref that is found for the current extent. 1228 * Results are collected in sctx->clone_roots->ino/offset/found_refs 1229 */ 1230 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) 1231 { 1232 struct backref_ctx *bctx = ctx_; 1233 struct clone_root *found; 1234 int ret; 1235 u64 i_size; 1236 1237 /* First check if the root is in the list of accepted clone sources */ 1238 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, 1239 bctx->sctx->clone_roots_cnt, 1240 sizeof(struct clone_root), 1241 __clone_root_cmp_bsearch); 1242 if (!found) 1243 return 0; 1244 1245 if (found->root == bctx->sctx->send_root && 1246 ino == bctx->cur_objectid && 1247 offset == bctx->cur_offset) { 1248 bctx->found_itself = 1; 1249 } 1250 1251 /* 1252 * There are inodes that have extents that lie behind its i_size. Don't 1253 * accept clones from these extents. 1254 */ 1255 ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL, 1256 NULL, NULL, NULL); 1257 btrfs_release_path(bctx->path); 1258 if (ret < 0) 1259 return ret; 1260 1261 if (offset + bctx->data_offset + bctx->extent_len > i_size) 1262 return 0; 1263 1264 /* 1265 * Make sure we don't consider clones from send_root that are 1266 * behind the current inode/offset. 1267 */ 1268 if (found->root == bctx->sctx->send_root) { 1269 /* 1270 * TODO for the moment we don't accept clones from the inode 1271 * that is currently send. We may change this when 1272 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same 1273 * file. 1274 */ 1275 if (ino >= bctx->cur_objectid) 1276 return 0; 1277 #if 0 1278 if (ino > bctx->cur_objectid) 1279 return 0; 1280 if (offset + bctx->extent_len > bctx->cur_offset) 1281 return 0; 1282 #endif 1283 } 1284 1285 bctx->found++; 1286 found->found_refs++; 1287 if (ino < found->ino) { 1288 found->ino = ino; 1289 found->offset = offset; 1290 } else if (found->ino == ino) { 1291 /* 1292 * same extent found more then once in the same file. 1293 */ 1294 if (found->offset > offset + bctx->extent_len) 1295 found->offset = offset; 1296 } 1297 1298 return 0; 1299 } 1300 1301 /* 1302 * Given an inode, offset and extent item, it finds a good clone for a clone 1303 * instruction. Returns -ENOENT when none could be found. The function makes 1304 * sure that the returned clone is usable at the point where sending is at the 1305 * moment. This means, that no clones are accepted which lie behind the current 1306 * inode+offset. 1307 * 1308 * path must point to the extent item when called. 1309 */ 1310 static int find_extent_clone(struct send_ctx *sctx, 1311 struct btrfs_path *path, 1312 u64 ino, u64 data_offset, 1313 u64 ino_size, 1314 struct clone_root **found) 1315 { 1316 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 1317 int ret; 1318 int extent_type; 1319 u64 logical; 1320 u64 disk_byte; 1321 u64 num_bytes; 1322 u64 extent_item_pos; 1323 u64 flags = 0; 1324 struct btrfs_file_extent_item *fi; 1325 struct extent_buffer *eb = path->nodes[0]; 1326 struct backref_ctx *backref_ctx = NULL; 1327 struct clone_root *cur_clone_root; 1328 struct btrfs_key found_key; 1329 struct btrfs_path *tmp_path; 1330 int compressed; 1331 u32 i; 1332 1333 tmp_path = alloc_path_for_send(); 1334 if (!tmp_path) 1335 return -ENOMEM; 1336 1337 /* We only use this path under the commit sem */ 1338 tmp_path->need_commit_sem = 0; 1339 1340 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL); 1341 if (!backref_ctx) { 1342 ret = -ENOMEM; 1343 goto out; 1344 } 1345 1346 backref_ctx->path = tmp_path; 1347 1348 if (data_offset >= ino_size) { 1349 /* 1350 * There may be extents that lie behind the file's size. 1351 * I at least had this in combination with snapshotting while 1352 * writing large files. 1353 */ 1354 ret = 0; 1355 goto out; 1356 } 1357 1358 fi = btrfs_item_ptr(eb, path->slots[0], 1359 struct btrfs_file_extent_item); 1360 extent_type = btrfs_file_extent_type(eb, fi); 1361 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1362 ret = -ENOENT; 1363 goto out; 1364 } 1365 compressed = btrfs_file_extent_compression(eb, fi); 1366 1367 num_bytes = btrfs_file_extent_num_bytes(eb, fi); 1368 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 1369 if (disk_byte == 0) { 1370 ret = -ENOENT; 1371 goto out; 1372 } 1373 logical = disk_byte + btrfs_file_extent_offset(eb, fi); 1374 1375 down_read(&fs_info->commit_root_sem); 1376 ret = extent_from_logical(fs_info, disk_byte, tmp_path, 1377 &found_key, &flags); 1378 up_read(&fs_info->commit_root_sem); 1379 btrfs_release_path(tmp_path); 1380 1381 if (ret < 0) 1382 goto out; 1383 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1384 ret = -EIO; 1385 goto out; 1386 } 1387 1388 /* 1389 * Setup the clone roots. 1390 */ 1391 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1392 cur_clone_root = sctx->clone_roots + i; 1393 cur_clone_root->ino = (u64)-1; 1394 cur_clone_root->offset = 0; 1395 cur_clone_root->found_refs = 0; 1396 } 1397 1398 backref_ctx->sctx = sctx; 1399 backref_ctx->found = 0; 1400 backref_ctx->cur_objectid = ino; 1401 backref_ctx->cur_offset = data_offset; 1402 backref_ctx->found_itself = 0; 1403 backref_ctx->extent_len = num_bytes; 1404 /* 1405 * For non-compressed extents iterate_extent_inodes() gives us extent 1406 * offsets that already take into account the data offset, but not for 1407 * compressed extents, since the offset is logical and not relative to 1408 * the physical extent locations. We must take this into account to 1409 * avoid sending clone offsets that go beyond the source file's size, 1410 * which would result in the clone ioctl failing with -EINVAL on the 1411 * receiving end. 1412 */ 1413 if (compressed == BTRFS_COMPRESS_NONE) 1414 backref_ctx->data_offset = 0; 1415 else 1416 backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi); 1417 1418 /* 1419 * The last extent of a file may be too large due to page alignment. 1420 * We need to adjust extent_len in this case so that the checks in 1421 * __iterate_backrefs work. 1422 */ 1423 if (data_offset + num_bytes >= ino_size) 1424 backref_ctx->extent_len = ino_size - data_offset; 1425 1426 /* 1427 * Now collect all backrefs. 1428 */ 1429 if (compressed == BTRFS_COMPRESS_NONE) 1430 extent_item_pos = logical - found_key.objectid; 1431 else 1432 extent_item_pos = 0; 1433 ret = iterate_extent_inodes(fs_info, 1434 found_key.objectid, extent_item_pos, 1, 1435 __iterate_backrefs, backref_ctx); 1436 1437 if (ret < 0) 1438 goto out; 1439 1440 if (!backref_ctx->found_itself) { 1441 /* found a bug in backref code? */ 1442 ret = -EIO; 1443 btrfs_err(fs_info, 1444 "did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu", 1445 ino, data_offset, disk_byte, found_key.objectid); 1446 goto out; 1447 } 1448 1449 btrfs_debug(fs_info, 1450 "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu", 1451 data_offset, ino, num_bytes, logical); 1452 1453 if (!backref_ctx->found) 1454 btrfs_debug(fs_info, "no clones found"); 1455 1456 cur_clone_root = NULL; 1457 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1458 if (sctx->clone_roots[i].found_refs) { 1459 if (!cur_clone_root) 1460 cur_clone_root = sctx->clone_roots + i; 1461 else if (sctx->clone_roots[i].root == sctx->send_root) 1462 /* prefer clones from send_root over others */ 1463 cur_clone_root = sctx->clone_roots + i; 1464 } 1465 1466 } 1467 1468 if (cur_clone_root) { 1469 *found = cur_clone_root; 1470 ret = 0; 1471 } else { 1472 ret = -ENOENT; 1473 } 1474 1475 out: 1476 btrfs_free_path(tmp_path); 1477 kfree(backref_ctx); 1478 return ret; 1479 } 1480 1481 static int read_symlink(struct btrfs_root *root, 1482 u64 ino, 1483 struct fs_path *dest) 1484 { 1485 int ret; 1486 struct btrfs_path *path; 1487 struct btrfs_key key; 1488 struct btrfs_file_extent_item *ei; 1489 u8 type; 1490 u8 compression; 1491 unsigned long off; 1492 int len; 1493 1494 path = alloc_path_for_send(); 1495 if (!path) 1496 return -ENOMEM; 1497 1498 key.objectid = ino; 1499 key.type = BTRFS_EXTENT_DATA_KEY; 1500 key.offset = 0; 1501 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1502 if (ret < 0) 1503 goto out; 1504 if (ret) { 1505 /* 1506 * An empty symlink inode. Can happen in rare error paths when 1507 * creating a symlink (transaction committed before the inode 1508 * eviction handler removed the symlink inode items and a crash 1509 * happened in between or the subvol was snapshoted in between). 1510 * Print an informative message to dmesg/syslog so that the user 1511 * can delete the symlink. 1512 */ 1513 btrfs_err(root->fs_info, 1514 "Found empty symlink inode %llu at root %llu", 1515 ino, root->root_key.objectid); 1516 ret = -EIO; 1517 goto out; 1518 } 1519 1520 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 1521 struct btrfs_file_extent_item); 1522 type = btrfs_file_extent_type(path->nodes[0], ei); 1523 compression = btrfs_file_extent_compression(path->nodes[0], ei); 1524 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); 1525 BUG_ON(compression); 1526 1527 off = btrfs_file_extent_inline_start(ei); 1528 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1529 1530 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1531 1532 out: 1533 btrfs_free_path(path); 1534 return ret; 1535 } 1536 1537 /* 1538 * Helper function to generate a file name that is unique in the root of 1539 * send_root and parent_root. This is used to generate names for orphan inodes. 1540 */ 1541 static int gen_unique_name(struct send_ctx *sctx, 1542 u64 ino, u64 gen, 1543 struct fs_path *dest) 1544 { 1545 int ret = 0; 1546 struct btrfs_path *path; 1547 struct btrfs_dir_item *di; 1548 char tmp[64]; 1549 int len; 1550 u64 idx = 0; 1551 1552 path = alloc_path_for_send(); 1553 if (!path) 1554 return -ENOMEM; 1555 1556 while (1) { 1557 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", 1558 ino, gen, idx); 1559 ASSERT(len < sizeof(tmp)); 1560 1561 di = btrfs_lookup_dir_item(NULL, sctx->send_root, 1562 path, BTRFS_FIRST_FREE_OBJECTID, 1563 tmp, strlen(tmp), 0); 1564 btrfs_release_path(path); 1565 if (IS_ERR(di)) { 1566 ret = PTR_ERR(di); 1567 goto out; 1568 } 1569 if (di) { 1570 /* not unique, try again */ 1571 idx++; 1572 continue; 1573 } 1574 1575 if (!sctx->parent_root) { 1576 /* unique */ 1577 ret = 0; 1578 break; 1579 } 1580 1581 di = btrfs_lookup_dir_item(NULL, sctx->parent_root, 1582 path, BTRFS_FIRST_FREE_OBJECTID, 1583 tmp, strlen(tmp), 0); 1584 btrfs_release_path(path); 1585 if (IS_ERR(di)) { 1586 ret = PTR_ERR(di); 1587 goto out; 1588 } 1589 if (di) { 1590 /* not unique, try again */ 1591 idx++; 1592 continue; 1593 } 1594 /* unique */ 1595 break; 1596 } 1597 1598 ret = fs_path_add(dest, tmp, strlen(tmp)); 1599 1600 out: 1601 btrfs_free_path(path); 1602 return ret; 1603 } 1604 1605 enum inode_state { 1606 inode_state_no_change, 1607 inode_state_will_create, 1608 inode_state_did_create, 1609 inode_state_will_delete, 1610 inode_state_did_delete, 1611 }; 1612 1613 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) 1614 { 1615 int ret; 1616 int left_ret; 1617 int right_ret; 1618 u64 left_gen; 1619 u64 right_gen; 1620 1621 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, 1622 NULL, NULL); 1623 if (ret < 0 && ret != -ENOENT) 1624 goto out; 1625 left_ret = ret; 1626 1627 if (!sctx->parent_root) { 1628 right_ret = -ENOENT; 1629 } else { 1630 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, 1631 NULL, NULL, NULL, NULL); 1632 if (ret < 0 && ret != -ENOENT) 1633 goto out; 1634 right_ret = ret; 1635 } 1636 1637 if (!left_ret && !right_ret) { 1638 if (left_gen == gen && right_gen == gen) { 1639 ret = inode_state_no_change; 1640 } else if (left_gen == gen) { 1641 if (ino < sctx->send_progress) 1642 ret = inode_state_did_create; 1643 else 1644 ret = inode_state_will_create; 1645 } else if (right_gen == gen) { 1646 if (ino < sctx->send_progress) 1647 ret = inode_state_did_delete; 1648 else 1649 ret = inode_state_will_delete; 1650 } else { 1651 ret = -ENOENT; 1652 } 1653 } else if (!left_ret) { 1654 if (left_gen == gen) { 1655 if (ino < sctx->send_progress) 1656 ret = inode_state_did_create; 1657 else 1658 ret = inode_state_will_create; 1659 } else { 1660 ret = -ENOENT; 1661 } 1662 } else if (!right_ret) { 1663 if (right_gen == gen) { 1664 if (ino < sctx->send_progress) 1665 ret = inode_state_did_delete; 1666 else 1667 ret = inode_state_will_delete; 1668 } else { 1669 ret = -ENOENT; 1670 } 1671 } else { 1672 ret = -ENOENT; 1673 } 1674 1675 out: 1676 return ret; 1677 } 1678 1679 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) 1680 { 1681 int ret; 1682 1683 ret = get_cur_inode_state(sctx, ino, gen); 1684 if (ret < 0) 1685 goto out; 1686 1687 if (ret == inode_state_no_change || 1688 ret == inode_state_did_create || 1689 ret == inode_state_will_delete) 1690 ret = 1; 1691 else 1692 ret = 0; 1693 1694 out: 1695 return ret; 1696 } 1697 1698 /* 1699 * Helper function to lookup a dir item in a dir. 1700 */ 1701 static int lookup_dir_item_inode(struct btrfs_root *root, 1702 u64 dir, const char *name, int name_len, 1703 u64 *found_inode, 1704 u8 *found_type) 1705 { 1706 int ret = 0; 1707 struct btrfs_dir_item *di; 1708 struct btrfs_key key; 1709 struct btrfs_path *path; 1710 1711 path = alloc_path_for_send(); 1712 if (!path) 1713 return -ENOMEM; 1714 1715 di = btrfs_lookup_dir_item(NULL, root, path, 1716 dir, name, name_len, 0); 1717 if (!di) { 1718 ret = -ENOENT; 1719 goto out; 1720 } 1721 if (IS_ERR(di)) { 1722 ret = PTR_ERR(di); 1723 goto out; 1724 } 1725 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1726 if (key.type == BTRFS_ROOT_ITEM_KEY) { 1727 ret = -ENOENT; 1728 goto out; 1729 } 1730 *found_inode = key.objectid; 1731 *found_type = btrfs_dir_type(path->nodes[0], di); 1732 1733 out: 1734 btrfs_free_path(path); 1735 return ret; 1736 } 1737 1738 /* 1739 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, 1740 * generation of the parent dir and the name of the dir entry. 1741 */ 1742 static int get_first_ref(struct btrfs_root *root, u64 ino, 1743 u64 *dir, u64 *dir_gen, struct fs_path *name) 1744 { 1745 int ret; 1746 struct btrfs_key key; 1747 struct btrfs_key found_key; 1748 struct btrfs_path *path; 1749 int len; 1750 u64 parent_dir; 1751 1752 path = alloc_path_for_send(); 1753 if (!path) 1754 return -ENOMEM; 1755 1756 key.objectid = ino; 1757 key.type = BTRFS_INODE_REF_KEY; 1758 key.offset = 0; 1759 1760 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 1761 if (ret < 0) 1762 goto out; 1763 if (!ret) 1764 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1765 path->slots[0]); 1766 if (ret || found_key.objectid != ino || 1767 (found_key.type != BTRFS_INODE_REF_KEY && 1768 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1769 ret = -ENOENT; 1770 goto out; 1771 } 1772 1773 if (found_key.type == BTRFS_INODE_REF_KEY) { 1774 struct btrfs_inode_ref *iref; 1775 iref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1776 struct btrfs_inode_ref); 1777 len = btrfs_inode_ref_name_len(path->nodes[0], iref); 1778 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1779 (unsigned long)(iref + 1), 1780 len); 1781 parent_dir = found_key.offset; 1782 } else { 1783 struct btrfs_inode_extref *extref; 1784 extref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1785 struct btrfs_inode_extref); 1786 len = btrfs_inode_extref_name_len(path->nodes[0], extref); 1787 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1788 (unsigned long)&extref->name, len); 1789 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); 1790 } 1791 if (ret < 0) 1792 goto out; 1793 btrfs_release_path(path); 1794 1795 if (dir_gen) { 1796 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, 1797 NULL, NULL, NULL); 1798 if (ret < 0) 1799 goto out; 1800 } 1801 1802 *dir = parent_dir; 1803 1804 out: 1805 btrfs_free_path(path); 1806 return ret; 1807 } 1808 1809 static int is_first_ref(struct btrfs_root *root, 1810 u64 ino, u64 dir, 1811 const char *name, int name_len) 1812 { 1813 int ret; 1814 struct fs_path *tmp_name; 1815 u64 tmp_dir; 1816 1817 tmp_name = fs_path_alloc(); 1818 if (!tmp_name) 1819 return -ENOMEM; 1820 1821 ret = get_first_ref(root, ino, &tmp_dir, NULL, tmp_name); 1822 if (ret < 0) 1823 goto out; 1824 1825 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { 1826 ret = 0; 1827 goto out; 1828 } 1829 1830 ret = !memcmp(tmp_name->start, name, name_len); 1831 1832 out: 1833 fs_path_free(tmp_name); 1834 return ret; 1835 } 1836 1837 /* 1838 * Used by process_recorded_refs to determine if a new ref would overwrite an 1839 * already existing ref. In case it detects an overwrite, it returns the 1840 * inode/gen in who_ino/who_gen. 1841 * When an overwrite is detected, process_recorded_refs does proper orphanizing 1842 * to make sure later references to the overwritten inode are possible. 1843 * Orphanizing is however only required for the first ref of an inode. 1844 * process_recorded_refs does an additional is_first_ref check to see if 1845 * orphanizing is really required. 1846 */ 1847 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, 1848 const char *name, int name_len, 1849 u64 *who_ino, u64 *who_gen) 1850 { 1851 int ret = 0; 1852 u64 gen; 1853 u64 other_inode = 0; 1854 u8 other_type = 0; 1855 1856 if (!sctx->parent_root) 1857 goto out; 1858 1859 ret = is_inode_existent(sctx, dir, dir_gen); 1860 if (ret <= 0) 1861 goto out; 1862 1863 /* 1864 * If we have a parent root we need to verify that the parent dir was 1865 * not deleted and then re-created, if it was then we have no overwrite 1866 * and we can just unlink this entry. 1867 */ 1868 if (sctx->parent_root) { 1869 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, 1870 NULL, NULL, NULL); 1871 if (ret < 0 && ret != -ENOENT) 1872 goto out; 1873 if (ret) { 1874 ret = 0; 1875 goto out; 1876 } 1877 if (gen != dir_gen) 1878 goto out; 1879 } 1880 1881 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1882 &other_inode, &other_type); 1883 if (ret < 0 && ret != -ENOENT) 1884 goto out; 1885 if (ret) { 1886 ret = 0; 1887 goto out; 1888 } 1889 1890 /* 1891 * Check if the overwritten ref was already processed. If yes, the ref 1892 * was already unlinked/moved, so we can safely assume that we will not 1893 * overwrite anything at this point in time. 1894 */ 1895 if (other_inode > sctx->send_progress || 1896 is_waiting_for_move(sctx, other_inode)) { 1897 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1898 who_gen, NULL, NULL, NULL, NULL); 1899 if (ret < 0) 1900 goto out; 1901 1902 ret = 1; 1903 *who_ino = other_inode; 1904 } else { 1905 ret = 0; 1906 } 1907 1908 out: 1909 return ret; 1910 } 1911 1912 /* 1913 * Checks if the ref was overwritten by an already processed inode. This is 1914 * used by __get_cur_name_and_parent to find out if the ref was orphanized and 1915 * thus the orphan name needs be used. 1916 * process_recorded_refs also uses it to avoid unlinking of refs that were 1917 * overwritten. 1918 */ 1919 static int did_overwrite_ref(struct send_ctx *sctx, 1920 u64 dir, u64 dir_gen, 1921 u64 ino, u64 ino_gen, 1922 const char *name, int name_len) 1923 { 1924 int ret = 0; 1925 u64 gen; 1926 u64 ow_inode; 1927 u8 other_type; 1928 1929 if (!sctx->parent_root) 1930 goto out; 1931 1932 ret = is_inode_existent(sctx, dir, dir_gen); 1933 if (ret <= 0) 1934 goto out; 1935 1936 /* check if the ref was overwritten by another ref */ 1937 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, 1938 &ow_inode, &other_type); 1939 if (ret < 0 && ret != -ENOENT) 1940 goto out; 1941 if (ret) { 1942 /* was never and will never be overwritten */ 1943 ret = 0; 1944 goto out; 1945 } 1946 1947 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, 1948 NULL, NULL); 1949 if (ret < 0) 1950 goto out; 1951 1952 if (ow_inode == ino && gen == ino_gen) { 1953 ret = 0; 1954 goto out; 1955 } 1956 1957 /* 1958 * We know that it is or will be overwritten. Check this now. 1959 * The current inode being processed might have been the one that caused 1960 * inode 'ino' to be orphanized, therefore check if ow_inode matches 1961 * the current inode being processed. 1962 */ 1963 if ((ow_inode < sctx->send_progress) || 1964 (ino != sctx->cur_ino && ow_inode == sctx->cur_ino && 1965 gen == sctx->cur_inode_gen)) 1966 ret = 1; 1967 else 1968 ret = 0; 1969 1970 out: 1971 return ret; 1972 } 1973 1974 /* 1975 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode 1976 * that got overwritten. This is used by process_recorded_refs to determine 1977 * if it has to use the path as returned by get_cur_path or the orphan name. 1978 */ 1979 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) 1980 { 1981 int ret = 0; 1982 struct fs_path *name = NULL; 1983 u64 dir; 1984 u64 dir_gen; 1985 1986 if (!sctx->parent_root) 1987 goto out; 1988 1989 name = fs_path_alloc(); 1990 if (!name) 1991 return -ENOMEM; 1992 1993 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); 1994 if (ret < 0) 1995 goto out; 1996 1997 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, 1998 name->start, fs_path_len(name)); 1999 2000 out: 2001 fs_path_free(name); 2002 return ret; 2003 } 2004 2005 /* 2006 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, 2007 * so we need to do some special handling in case we have clashes. This function 2008 * takes care of this with the help of name_cache_entry::radix_list. 2009 * In case of error, nce is kfreed. 2010 */ 2011 static int name_cache_insert(struct send_ctx *sctx, 2012 struct name_cache_entry *nce) 2013 { 2014 int ret = 0; 2015 struct list_head *nce_head; 2016 2017 nce_head = radix_tree_lookup(&sctx->name_cache, 2018 (unsigned long)nce->ino); 2019 if (!nce_head) { 2020 nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL); 2021 if (!nce_head) { 2022 kfree(nce); 2023 return -ENOMEM; 2024 } 2025 INIT_LIST_HEAD(nce_head); 2026 2027 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 2028 if (ret < 0) { 2029 kfree(nce_head); 2030 kfree(nce); 2031 return ret; 2032 } 2033 } 2034 list_add_tail(&nce->radix_list, nce_head); 2035 list_add_tail(&nce->list, &sctx->name_cache_list); 2036 sctx->name_cache_size++; 2037 2038 return ret; 2039 } 2040 2041 static void name_cache_delete(struct send_ctx *sctx, 2042 struct name_cache_entry *nce) 2043 { 2044 struct list_head *nce_head; 2045 2046 nce_head = radix_tree_lookup(&sctx->name_cache, 2047 (unsigned long)nce->ino); 2048 if (!nce_head) { 2049 btrfs_err(sctx->send_root->fs_info, 2050 "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", 2051 nce->ino, sctx->name_cache_size); 2052 } 2053 2054 list_del(&nce->radix_list); 2055 list_del(&nce->list); 2056 sctx->name_cache_size--; 2057 2058 /* 2059 * We may not get to the final release of nce_head if the lookup fails 2060 */ 2061 if (nce_head && list_empty(nce_head)) { 2062 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); 2063 kfree(nce_head); 2064 } 2065 } 2066 2067 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, 2068 u64 ino, u64 gen) 2069 { 2070 struct list_head *nce_head; 2071 struct name_cache_entry *cur; 2072 2073 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); 2074 if (!nce_head) 2075 return NULL; 2076 2077 list_for_each_entry(cur, nce_head, radix_list) { 2078 if (cur->ino == ino && cur->gen == gen) 2079 return cur; 2080 } 2081 return NULL; 2082 } 2083 2084 /* 2085 * Removes the entry from the list and adds it back to the end. This marks the 2086 * entry as recently used so that name_cache_clean_unused does not remove it. 2087 */ 2088 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) 2089 { 2090 list_del(&nce->list); 2091 list_add_tail(&nce->list, &sctx->name_cache_list); 2092 } 2093 2094 /* 2095 * Remove some entries from the beginning of name_cache_list. 2096 */ 2097 static void name_cache_clean_unused(struct send_ctx *sctx) 2098 { 2099 struct name_cache_entry *nce; 2100 2101 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) 2102 return; 2103 2104 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { 2105 nce = list_entry(sctx->name_cache_list.next, 2106 struct name_cache_entry, list); 2107 name_cache_delete(sctx, nce); 2108 kfree(nce); 2109 } 2110 } 2111 2112 static void name_cache_free(struct send_ctx *sctx) 2113 { 2114 struct name_cache_entry *nce; 2115 2116 while (!list_empty(&sctx->name_cache_list)) { 2117 nce = list_entry(sctx->name_cache_list.next, 2118 struct name_cache_entry, list); 2119 name_cache_delete(sctx, nce); 2120 kfree(nce); 2121 } 2122 } 2123 2124 /* 2125 * Used by get_cur_path for each ref up to the root. 2126 * Returns 0 if it succeeded. 2127 * Returns 1 if the inode is not existent or got overwritten. In that case, the 2128 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 2129 * is returned, parent_ino/parent_gen are not guaranteed to be valid. 2130 * Returns <0 in case of error. 2131 */ 2132 static int __get_cur_name_and_parent(struct send_ctx *sctx, 2133 u64 ino, u64 gen, 2134 u64 *parent_ino, 2135 u64 *parent_gen, 2136 struct fs_path *dest) 2137 { 2138 int ret; 2139 int nce_ret; 2140 struct name_cache_entry *nce = NULL; 2141 2142 /* 2143 * First check if we already did a call to this function with the same 2144 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 2145 * return the cached result. 2146 */ 2147 nce = name_cache_search(sctx, ino, gen); 2148 if (nce) { 2149 if (ino < sctx->send_progress && nce->need_later_update) { 2150 name_cache_delete(sctx, nce); 2151 kfree(nce); 2152 nce = NULL; 2153 } else { 2154 name_cache_used(sctx, nce); 2155 *parent_ino = nce->parent_ino; 2156 *parent_gen = nce->parent_gen; 2157 ret = fs_path_add(dest, nce->name, nce->name_len); 2158 if (ret < 0) 2159 goto out; 2160 ret = nce->ret; 2161 goto out; 2162 } 2163 } 2164 2165 /* 2166 * If the inode is not existent yet, add the orphan name and return 1. 2167 * This should only happen for the parent dir that we determine in 2168 * __record_new_ref 2169 */ 2170 ret = is_inode_existent(sctx, ino, gen); 2171 if (ret < 0) 2172 goto out; 2173 2174 if (!ret) { 2175 ret = gen_unique_name(sctx, ino, gen, dest); 2176 if (ret < 0) 2177 goto out; 2178 ret = 1; 2179 goto out_cache; 2180 } 2181 2182 /* 2183 * Depending on whether the inode was already processed or not, use 2184 * send_root or parent_root for ref lookup. 2185 */ 2186 if (ino < sctx->send_progress) 2187 ret = get_first_ref(sctx->send_root, ino, 2188 parent_ino, parent_gen, dest); 2189 else 2190 ret = get_first_ref(sctx->parent_root, ino, 2191 parent_ino, parent_gen, dest); 2192 if (ret < 0) 2193 goto out; 2194 2195 /* 2196 * Check if the ref was overwritten by an inode's ref that was processed 2197 * earlier. If yes, treat as orphan and return 1. 2198 */ 2199 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, 2200 dest->start, dest->end - dest->start); 2201 if (ret < 0) 2202 goto out; 2203 if (ret) { 2204 fs_path_reset(dest); 2205 ret = gen_unique_name(sctx, ino, gen, dest); 2206 if (ret < 0) 2207 goto out; 2208 ret = 1; 2209 } 2210 2211 out_cache: 2212 /* 2213 * Store the result of the lookup in the name cache. 2214 */ 2215 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); 2216 if (!nce) { 2217 ret = -ENOMEM; 2218 goto out; 2219 } 2220 2221 nce->ino = ino; 2222 nce->gen = gen; 2223 nce->parent_ino = *parent_ino; 2224 nce->parent_gen = *parent_gen; 2225 nce->name_len = fs_path_len(dest); 2226 nce->ret = ret; 2227 strcpy(nce->name, dest->start); 2228 2229 if (ino < sctx->send_progress) 2230 nce->need_later_update = 0; 2231 else 2232 nce->need_later_update = 1; 2233 2234 nce_ret = name_cache_insert(sctx, nce); 2235 if (nce_ret < 0) 2236 ret = nce_ret; 2237 name_cache_clean_unused(sctx); 2238 2239 out: 2240 return ret; 2241 } 2242 2243 /* 2244 * Magic happens here. This function returns the first ref to an inode as it 2245 * would look like while receiving the stream at this point in time. 2246 * We walk the path up to the root. For every inode in between, we check if it 2247 * was already processed/sent. If yes, we continue with the parent as found 2248 * in send_root. If not, we continue with the parent as found in parent_root. 2249 * If we encounter an inode that was deleted at this point in time, we use the 2250 * inodes "orphan" name instead of the real name and stop. Same with new inodes 2251 * that were not created yet and overwritten inodes/refs. 2252 * 2253 * When do we have have orphan inodes: 2254 * 1. When an inode is freshly created and thus no valid refs are available yet 2255 * 2. When a directory lost all it's refs (deleted) but still has dir items 2256 * inside which were not processed yet (pending for move/delete). If anyone 2257 * tried to get the path to the dir items, it would get a path inside that 2258 * orphan directory. 2259 * 3. When an inode is moved around or gets new links, it may overwrite the ref 2260 * of an unprocessed inode. If in that case the first ref would be 2261 * overwritten, the overwritten inode gets "orphanized". Later when we 2262 * process this overwritten inode, it is restored at a new place by moving 2263 * the orphan inode. 2264 * 2265 * sctx->send_progress tells this function at which point in time receiving 2266 * would be. 2267 */ 2268 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, 2269 struct fs_path *dest) 2270 { 2271 int ret = 0; 2272 struct fs_path *name = NULL; 2273 u64 parent_inode = 0; 2274 u64 parent_gen = 0; 2275 int stop = 0; 2276 2277 name = fs_path_alloc(); 2278 if (!name) { 2279 ret = -ENOMEM; 2280 goto out; 2281 } 2282 2283 dest->reversed = 1; 2284 fs_path_reset(dest); 2285 2286 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2287 struct waiting_dir_move *wdm; 2288 2289 fs_path_reset(name); 2290 2291 if (is_waiting_for_rm(sctx, ino)) { 2292 ret = gen_unique_name(sctx, ino, gen, name); 2293 if (ret < 0) 2294 goto out; 2295 ret = fs_path_add_path(dest, name); 2296 break; 2297 } 2298 2299 wdm = get_waiting_dir_move(sctx, ino); 2300 if (wdm && wdm->orphanized) { 2301 ret = gen_unique_name(sctx, ino, gen, name); 2302 stop = 1; 2303 } else if (wdm) { 2304 ret = get_first_ref(sctx->parent_root, ino, 2305 &parent_inode, &parent_gen, name); 2306 } else { 2307 ret = __get_cur_name_and_parent(sctx, ino, gen, 2308 &parent_inode, 2309 &parent_gen, name); 2310 if (ret) 2311 stop = 1; 2312 } 2313 2314 if (ret < 0) 2315 goto out; 2316 2317 ret = fs_path_add_path(dest, name); 2318 if (ret < 0) 2319 goto out; 2320 2321 ino = parent_inode; 2322 gen = parent_gen; 2323 } 2324 2325 out: 2326 fs_path_free(name); 2327 if (!ret) 2328 fs_path_unreverse(dest); 2329 return ret; 2330 } 2331 2332 /* 2333 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace 2334 */ 2335 static int send_subvol_begin(struct send_ctx *sctx) 2336 { 2337 int ret; 2338 struct btrfs_root *send_root = sctx->send_root; 2339 struct btrfs_root *parent_root = sctx->parent_root; 2340 struct btrfs_path *path; 2341 struct btrfs_key key; 2342 struct btrfs_root_ref *ref; 2343 struct extent_buffer *leaf; 2344 char *name = NULL; 2345 int namelen; 2346 2347 path = btrfs_alloc_path(); 2348 if (!path) 2349 return -ENOMEM; 2350 2351 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); 2352 if (!name) { 2353 btrfs_free_path(path); 2354 return -ENOMEM; 2355 } 2356 2357 key.objectid = send_root->objectid; 2358 key.type = BTRFS_ROOT_BACKREF_KEY; 2359 key.offset = 0; 2360 2361 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, 2362 &key, path, 1, 0); 2363 if (ret < 0) 2364 goto out; 2365 if (ret) { 2366 ret = -ENOENT; 2367 goto out; 2368 } 2369 2370 leaf = path->nodes[0]; 2371 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2372 if (key.type != BTRFS_ROOT_BACKREF_KEY || 2373 key.objectid != send_root->objectid) { 2374 ret = -ENOENT; 2375 goto out; 2376 } 2377 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 2378 namelen = btrfs_root_ref_name_len(leaf, ref); 2379 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); 2380 btrfs_release_path(path); 2381 2382 if (parent_root) { 2383 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); 2384 if (ret < 0) 2385 goto out; 2386 } else { 2387 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); 2388 if (ret < 0) 2389 goto out; 2390 } 2391 2392 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); 2393 2394 if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid)) 2395 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2396 sctx->send_root->root_item.received_uuid); 2397 else 2398 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2399 sctx->send_root->root_item.uuid); 2400 2401 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2402 le64_to_cpu(sctx->send_root->root_item.ctransid)); 2403 if (parent_root) { 2404 if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid)) 2405 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2406 parent_root->root_item.received_uuid); 2407 else 2408 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2409 parent_root->root_item.uuid); 2410 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2411 le64_to_cpu(sctx->parent_root->root_item.ctransid)); 2412 } 2413 2414 ret = send_cmd(sctx); 2415 2416 tlv_put_failure: 2417 out: 2418 btrfs_free_path(path); 2419 kfree(name); 2420 return ret; 2421 } 2422 2423 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) 2424 { 2425 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 2426 int ret = 0; 2427 struct fs_path *p; 2428 2429 btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size); 2430 2431 p = fs_path_alloc(); 2432 if (!p) 2433 return -ENOMEM; 2434 2435 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); 2436 if (ret < 0) 2437 goto out; 2438 2439 ret = get_cur_path(sctx, ino, gen, p); 2440 if (ret < 0) 2441 goto out; 2442 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2443 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); 2444 2445 ret = send_cmd(sctx); 2446 2447 tlv_put_failure: 2448 out: 2449 fs_path_free(p); 2450 return ret; 2451 } 2452 2453 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) 2454 { 2455 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 2456 int ret = 0; 2457 struct fs_path *p; 2458 2459 btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode); 2460 2461 p = fs_path_alloc(); 2462 if (!p) 2463 return -ENOMEM; 2464 2465 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); 2466 if (ret < 0) 2467 goto out; 2468 2469 ret = get_cur_path(sctx, ino, gen, p); 2470 if (ret < 0) 2471 goto out; 2472 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2473 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); 2474 2475 ret = send_cmd(sctx); 2476 2477 tlv_put_failure: 2478 out: 2479 fs_path_free(p); 2480 return ret; 2481 } 2482 2483 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) 2484 { 2485 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 2486 int ret = 0; 2487 struct fs_path *p; 2488 2489 btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu", 2490 ino, uid, gid); 2491 2492 p = fs_path_alloc(); 2493 if (!p) 2494 return -ENOMEM; 2495 2496 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); 2497 if (ret < 0) 2498 goto out; 2499 2500 ret = get_cur_path(sctx, ino, gen, p); 2501 if (ret < 0) 2502 goto out; 2503 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2504 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); 2505 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); 2506 2507 ret = send_cmd(sctx); 2508 2509 tlv_put_failure: 2510 out: 2511 fs_path_free(p); 2512 return ret; 2513 } 2514 2515 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) 2516 { 2517 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 2518 int ret = 0; 2519 struct fs_path *p = NULL; 2520 struct btrfs_inode_item *ii; 2521 struct btrfs_path *path = NULL; 2522 struct extent_buffer *eb; 2523 struct btrfs_key key; 2524 int slot; 2525 2526 btrfs_debug(fs_info, "send_utimes %llu", ino); 2527 2528 p = fs_path_alloc(); 2529 if (!p) 2530 return -ENOMEM; 2531 2532 path = alloc_path_for_send(); 2533 if (!path) { 2534 ret = -ENOMEM; 2535 goto out; 2536 } 2537 2538 key.objectid = ino; 2539 key.type = BTRFS_INODE_ITEM_KEY; 2540 key.offset = 0; 2541 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2542 if (ret > 0) 2543 ret = -ENOENT; 2544 if (ret < 0) 2545 goto out; 2546 2547 eb = path->nodes[0]; 2548 slot = path->slots[0]; 2549 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 2550 2551 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); 2552 if (ret < 0) 2553 goto out; 2554 2555 ret = get_cur_path(sctx, ino, gen, p); 2556 if (ret < 0) 2557 goto out; 2558 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2559 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime); 2560 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime); 2561 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, &ii->ctime); 2562 /* TODO Add otime support when the otime patches get into upstream */ 2563 2564 ret = send_cmd(sctx); 2565 2566 tlv_put_failure: 2567 out: 2568 fs_path_free(p); 2569 btrfs_free_path(path); 2570 return ret; 2571 } 2572 2573 /* 2574 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have 2575 * a valid path yet because we did not process the refs yet. So, the inode 2576 * is created as orphan. 2577 */ 2578 static int send_create_inode(struct send_ctx *sctx, u64 ino) 2579 { 2580 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 2581 int ret = 0; 2582 struct fs_path *p; 2583 int cmd; 2584 u64 gen; 2585 u64 mode; 2586 u64 rdev; 2587 2588 btrfs_debug(fs_info, "send_create_inode %llu", ino); 2589 2590 p = fs_path_alloc(); 2591 if (!p) 2592 return -ENOMEM; 2593 2594 if (ino != sctx->cur_ino) { 2595 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, 2596 NULL, NULL, &rdev); 2597 if (ret < 0) 2598 goto out; 2599 } else { 2600 gen = sctx->cur_inode_gen; 2601 mode = sctx->cur_inode_mode; 2602 rdev = sctx->cur_inode_rdev; 2603 } 2604 2605 if (S_ISREG(mode)) { 2606 cmd = BTRFS_SEND_C_MKFILE; 2607 } else if (S_ISDIR(mode)) { 2608 cmd = BTRFS_SEND_C_MKDIR; 2609 } else if (S_ISLNK(mode)) { 2610 cmd = BTRFS_SEND_C_SYMLINK; 2611 } else if (S_ISCHR(mode) || S_ISBLK(mode)) { 2612 cmd = BTRFS_SEND_C_MKNOD; 2613 } else if (S_ISFIFO(mode)) { 2614 cmd = BTRFS_SEND_C_MKFIFO; 2615 } else if (S_ISSOCK(mode)) { 2616 cmd = BTRFS_SEND_C_MKSOCK; 2617 } else { 2618 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o", 2619 (int)(mode & S_IFMT)); 2620 ret = -ENOTSUPP; 2621 goto out; 2622 } 2623 2624 ret = begin_cmd(sctx, cmd); 2625 if (ret < 0) 2626 goto out; 2627 2628 ret = gen_unique_name(sctx, ino, gen, p); 2629 if (ret < 0) 2630 goto out; 2631 2632 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2633 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); 2634 2635 if (S_ISLNK(mode)) { 2636 fs_path_reset(p); 2637 ret = read_symlink(sctx->send_root, ino, p); 2638 if (ret < 0) 2639 goto out; 2640 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); 2641 } else if (S_ISCHR(mode) || S_ISBLK(mode) || 2642 S_ISFIFO(mode) || S_ISSOCK(mode)) { 2643 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); 2644 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); 2645 } 2646 2647 ret = send_cmd(sctx); 2648 if (ret < 0) 2649 goto out; 2650 2651 2652 tlv_put_failure: 2653 out: 2654 fs_path_free(p); 2655 return ret; 2656 } 2657 2658 /* 2659 * We need some special handling for inodes that get processed before the parent 2660 * directory got created. See process_recorded_refs for details. 2661 * This function does the check if we already created the dir out of order. 2662 */ 2663 static int did_create_dir(struct send_ctx *sctx, u64 dir) 2664 { 2665 int ret = 0; 2666 struct btrfs_path *path = NULL; 2667 struct btrfs_key key; 2668 struct btrfs_key found_key; 2669 struct btrfs_key di_key; 2670 struct extent_buffer *eb; 2671 struct btrfs_dir_item *di; 2672 int slot; 2673 2674 path = alloc_path_for_send(); 2675 if (!path) { 2676 ret = -ENOMEM; 2677 goto out; 2678 } 2679 2680 key.objectid = dir; 2681 key.type = BTRFS_DIR_INDEX_KEY; 2682 key.offset = 0; 2683 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2684 if (ret < 0) 2685 goto out; 2686 2687 while (1) { 2688 eb = path->nodes[0]; 2689 slot = path->slots[0]; 2690 if (slot >= btrfs_header_nritems(eb)) { 2691 ret = btrfs_next_leaf(sctx->send_root, path); 2692 if (ret < 0) { 2693 goto out; 2694 } else if (ret > 0) { 2695 ret = 0; 2696 break; 2697 } 2698 continue; 2699 } 2700 2701 btrfs_item_key_to_cpu(eb, &found_key, slot); 2702 if (found_key.objectid != key.objectid || 2703 found_key.type != key.type) { 2704 ret = 0; 2705 goto out; 2706 } 2707 2708 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2709 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2710 2711 if (di_key.type != BTRFS_ROOT_ITEM_KEY && 2712 di_key.objectid < sctx->send_progress) { 2713 ret = 1; 2714 goto out; 2715 } 2716 2717 path->slots[0]++; 2718 } 2719 2720 out: 2721 btrfs_free_path(path); 2722 return ret; 2723 } 2724 2725 /* 2726 * Only creates the inode if it is: 2727 * 1. Not a directory 2728 * 2. Or a directory which was not created already due to out of order 2729 * directories. See did_create_dir and process_recorded_refs for details. 2730 */ 2731 static int send_create_inode_if_needed(struct send_ctx *sctx) 2732 { 2733 int ret; 2734 2735 if (S_ISDIR(sctx->cur_inode_mode)) { 2736 ret = did_create_dir(sctx, sctx->cur_ino); 2737 if (ret < 0) 2738 goto out; 2739 if (ret) { 2740 ret = 0; 2741 goto out; 2742 } 2743 } 2744 2745 ret = send_create_inode(sctx, sctx->cur_ino); 2746 if (ret < 0) 2747 goto out; 2748 2749 out: 2750 return ret; 2751 } 2752 2753 struct recorded_ref { 2754 struct list_head list; 2755 char *dir_path; 2756 char *name; 2757 struct fs_path *full_path; 2758 u64 dir; 2759 u64 dir_gen; 2760 int dir_path_len; 2761 int name_len; 2762 }; 2763 2764 /* 2765 * We need to process new refs before deleted refs, but compare_tree gives us 2766 * everything mixed. So we first record all refs and later process them. 2767 * This function is a helper to record one ref. 2768 */ 2769 static int __record_ref(struct list_head *head, u64 dir, 2770 u64 dir_gen, struct fs_path *path) 2771 { 2772 struct recorded_ref *ref; 2773 2774 ref = kmalloc(sizeof(*ref), GFP_KERNEL); 2775 if (!ref) 2776 return -ENOMEM; 2777 2778 ref->dir = dir; 2779 ref->dir_gen = dir_gen; 2780 ref->full_path = path; 2781 2782 ref->name = (char *)kbasename(ref->full_path->start); 2783 ref->name_len = ref->full_path->end - ref->name; 2784 ref->dir_path = ref->full_path->start; 2785 if (ref->name == ref->full_path->start) 2786 ref->dir_path_len = 0; 2787 else 2788 ref->dir_path_len = ref->full_path->end - 2789 ref->full_path->start - 1 - ref->name_len; 2790 2791 list_add_tail(&ref->list, head); 2792 return 0; 2793 } 2794 2795 static int dup_ref(struct recorded_ref *ref, struct list_head *list) 2796 { 2797 struct recorded_ref *new; 2798 2799 new = kmalloc(sizeof(*ref), GFP_KERNEL); 2800 if (!new) 2801 return -ENOMEM; 2802 2803 new->dir = ref->dir; 2804 new->dir_gen = ref->dir_gen; 2805 new->full_path = NULL; 2806 INIT_LIST_HEAD(&new->list); 2807 list_add_tail(&new->list, list); 2808 return 0; 2809 } 2810 2811 static void __free_recorded_refs(struct list_head *head) 2812 { 2813 struct recorded_ref *cur; 2814 2815 while (!list_empty(head)) { 2816 cur = list_entry(head->next, struct recorded_ref, list); 2817 fs_path_free(cur->full_path); 2818 list_del(&cur->list); 2819 kfree(cur); 2820 } 2821 } 2822 2823 static void free_recorded_refs(struct send_ctx *sctx) 2824 { 2825 __free_recorded_refs(&sctx->new_refs); 2826 __free_recorded_refs(&sctx->deleted_refs); 2827 } 2828 2829 /* 2830 * Renames/moves a file/dir to its orphan name. Used when the first 2831 * ref of an unprocessed inode gets overwritten and for all non empty 2832 * directories. 2833 */ 2834 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, 2835 struct fs_path *path) 2836 { 2837 int ret; 2838 struct fs_path *orphan; 2839 2840 orphan = fs_path_alloc(); 2841 if (!orphan) 2842 return -ENOMEM; 2843 2844 ret = gen_unique_name(sctx, ino, gen, orphan); 2845 if (ret < 0) 2846 goto out; 2847 2848 ret = send_rename(sctx, path, orphan); 2849 2850 out: 2851 fs_path_free(orphan); 2852 return ret; 2853 } 2854 2855 static struct orphan_dir_info * 2856 add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2857 { 2858 struct rb_node **p = &sctx->orphan_dirs.rb_node; 2859 struct rb_node *parent = NULL; 2860 struct orphan_dir_info *entry, *odi; 2861 2862 odi = kmalloc(sizeof(*odi), GFP_KERNEL); 2863 if (!odi) 2864 return ERR_PTR(-ENOMEM); 2865 odi->ino = dir_ino; 2866 odi->gen = 0; 2867 2868 while (*p) { 2869 parent = *p; 2870 entry = rb_entry(parent, struct orphan_dir_info, node); 2871 if (dir_ino < entry->ino) { 2872 p = &(*p)->rb_left; 2873 } else if (dir_ino > entry->ino) { 2874 p = &(*p)->rb_right; 2875 } else { 2876 kfree(odi); 2877 return entry; 2878 } 2879 } 2880 2881 rb_link_node(&odi->node, parent, p); 2882 rb_insert_color(&odi->node, &sctx->orphan_dirs); 2883 return odi; 2884 } 2885 2886 static struct orphan_dir_info * 2887 get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) 2888 { 2889 struct rb_node *n = sctx->orphan_dirs.rb_node; 2890 struct orphan_dir_info *entry; 2891 2892 while (n) { 2893 entry = rb_entry(n, struct orphan_dir_info, node); 2894 if (dir_ino < entry->ino) 2895 n = n->rb_left; 2896 else if (dir_ino > entry->ino) 2897 n = n->rb_right; 2898 else 2899 return entry; 2900 } 2901 return NULL; 2902 } 2903 2904 static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) 2905 { 2906 struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); 2907 2908 return odi != NULL; 2909 } 2910 2911 static void free_orphan_dir_info(struct send_ctx *sctx, 2912 struct orphan_dir_info *odi) 2913 { 2914 if (!odi) 2915 return; 2916 rb_erase(&odi->node, &sctx->orphan_dirs); 2917 kfree(odi); 2918 } 2919 2920 /* 2921 * Returns 1 if a directory can be removed at this point in time. 2922 * We check this by iterating all dir items and checking if the inode behind 2923 * the dir item was already processed. 2924 */ 2925 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, 2926 u64 send_progress) 2927 { 2928 int ret = 0; 2929 struct btrfs_root *root = sctx->parent_root; 2930 struct btrfs_path *path; 2931 struct btrfs_key key; 2932 struct btrfs_key found_key; 2933 struct btrfs_key loc; 2934 struct btrfs_dir_item *di; 2935 2936 /* 2937 * Don't try to rmdir the top/root subvolume dir. 2938 */ 2939 if (dir == BTRFS_FIRST_FREE_OBJECTID) 2940 return 0; 2941 2942 path = alloc_path_for_send(); 2943 if (!path) 2944 return -ENOMEM; 2945 2946 key.objectid = dir; 2947 key.type = BTRFS_DIR_INDEX_KEY; 2948 key.offset = 0; 2949 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2950 if (ret < 0) 2951 goto out; 2952 2953 while (1) { 2954 struct waiting_dir_move *dm; 2955 2956 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 2957 ret = btrfs_next_leaf(root, path); 2958 if (ret < 0) 2959 goto out; 2960 else if (ret > 0) 2961 break; 2962 continue; 2963 } 2964 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2965 path->slots[0]); 2966 if (found_key.objectid != key.objectid || 2967 found_key.type != key.type) 2968 break; 2969 2970 di = btrfs_item_ptr(path->nodes[0], path->slots[0], 2971 struct btrfs_dir_item); 2972 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); 2973 2974 dm = get_waiting_dir_move(sctx, loc.objectid); 2975 if (dm) { 2976 struct orphan_dir_info *odi; 2977 2978 odi = add_orphan_dir_info(sctx, dir); 2979 if (IS_ERR(odi)) { 2980 ret = PTR_ERR(odi); 2981 goto out; 2982 } 2983 odi->gen = dir_gen; 2984 dm->rmdir_ino = dir; 2985 ret = 0; 2986 goto out; 2987 } 2988 2989 if (loc.objectid > send_progress) { 2990 struct orphan_dir_info *odi; 2991 2992 odi = get_orphan_dir_info(sctx, dir); 2993 free_orphan_dir_info(sctx, odi); 2994 ret = 0; 2995 goto out; 2996 } 2997 2998 path->slots[0]++; 2999 } 3000 3001 ret = 1; 3002 3003 out: 3004 btrfs_free_path(path); 3005 return ret; 3006 } 3007 3008 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) 3009 { 3010 struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); 3011 3012 return entry != NULL; 3013 } 3014 3015 static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) 3016 { 3017 struct rb_node **p = &sctx->waiting_dir_moves.rb_node; 3018 struct rb_node *parent = NULL; 3019 struct waiting_dir_move *entry, *dm; 3020 3021 dm = kmalloc(sizeof(*dm), GFP_KERNEL); 3022 if (!dm) 3023 return -ENOMEM; 3024 dm->ino = ino; 3025 dm->rmdir_ino = 0; 3026 dm->orphanized = orphanized; 3027 3028 while (*p) { 3029 parent = *p; 3030 entry = rb_entry(parent, struct waiting_dir_move, node); 3031 if (ino < entry->ino) { 3032 p = &(*p)->rb_left; 3033 } else if (ino > entry->ino) { 3034 p = &(*p)->rb_right; 3035 } else { 3036 kfree(dm); 3037 return -EEXIST; 3038 } 3039 } 3040 3041 rb_link_node(&dm->node, parent, p); 3042 rb_insert_color(&dm->node, &sctx->waiting_dir_moves); 3043 return 0; 3044 } 3045 3046 static struct waiting_dir_move * 3047 get_waiting_dir_move(struct send_ctx *sctx, u64 ino) 3048 { 3049 struct rb_node *n = sctx->waiting_dir_moves.rb_node; 3050 struct waiting_dir_move *entry; 3051 3052 while (n) { 3053 entry = rb_entry(n, struct waiting_dir_move, node); 3054 if (ino < entry->ino) 3055 n = n->rb_left; 3056 else if (ino > entry->ino) 3057 n = n->rb_right; 3058 else 3059 return entry; 3060 } 3061 return NULL; 3062 } 3063 3064 static void free_waiting_dir_move(struct send_ctx *sctx, 3065 struct waiting_dir_move *dm) 3066 { 3067 if (!dm) 3068 return; 3069 rb_erase(&dm->node, &sctx->waiting_dir_moves); 3070 kfree(dm); 3071 } 3072 3073 static int add_pending_dir_move(struct send_ctx *sctx, 3074 u64 ino, 3075 u64 ino_gen, 3076 u64 parent_ino, 3077 struct list_head *new_refs, 3078 struct list_head *deleted_refs, 3079 const bool is_orphan) 3080 { 3081 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 3082 struct rb_node *parent = NULL; 3083 struct pending_dir_move *entry = NULL, *pm; 3084 struct recorded_ref *cur; 3085 int exists = 0; 3086 int ret; 3087 3088 pm = kmalloc(sizeof(*pm), GFP_KERNEL); 3089 if (!pm) 3090 return -ENOMEM; 3091 pm->parent_ino = parent_ino; 3092 pm->ino = ino; 3093 pm->gen = ino_gen; 3094 INIT_LIST_HEAD(&pm->list); 3095 INIT_LIST_HEAD(&pm->update_refs); 3096 RB_CLEAR_NODE(&pm->node); 3097 3098 while (*p) { 3099 parent = *p; 3100 entry = rb_entry(parent, struct pending_dir_move, node); 3101 if (parent_ino < entry->parent_ino) { 3102 p = &(*p)->rb_left; 3103 } else if (parent_ino > entry->parent_ino) { 3104 p = &(*p)->rb_right; 3105 } else { 3106 exists = 1; 3107 break; 3108 } 3109 } 3110 3111 list_for_each_entry(cur, deleted_refs, list) { 3112 ret = dup_ref(cur, &pm->update_refs); 3113 if (ret < 0) 3114 goto out; 3115 } 3116 list_for_each_entry(cur, new_refs, list) { 3117 ret = dup_ref(cur, &pm->update_refs); 3118 if (ret < 0) 3119 goto out; 3120 } 3121 3122 ret = add_waiting_dir_move(sctx, pm->ino, is_orphan); 3123 if (ret) 3124 goto out; 3125 3126 if (exists) { 3127 list_add_tail(&pm->list, &entry->list); 3128 } else { 3129 rb_link_node(&pm->node, parent, p); 3130 rb_insert_color(&pm->node, &sctx->pending_dir_moves); 3131 } 3132 ret = 0; 3133 out: 3134 if (ret) { 3135 __free_recorded_refs(&pm->update_refs); 3136 kfree(pm); 3137 } 3138 return ret; 3139 } 3140 3141 static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, 3142 u64 parent_ino) 3143 { 3144 struct rb_node *n = sctx->pending_dir_moves.rb_node; 3145 struct pending_dir_move *entry; 3146 3147 while (n) { 3148 entry = rb_entry(n, struct pending_dir_move, node); 3149 if (parent_ino < entry->parent_ino) 3150 n = n->rb_left; 3151 else if (parent_ino > entry->parent_ino) 3152 n = n->rb_right; 3153 else 3154 return entry; 3155 } 3156 return NULL; 3157 } 3158 3159 static int path_loop(struct send_ctx *sctx, struct fs_path *name, 3160 u64 ino, u64 gen, u64 *ancestor_ino) 3161 { 3162 int ret = 0; 3163 u64 parent_inode = 0; 3164 u64 parent_gen = 0; 3165 u64 start_ino = ino; 3166 3167 *ancestor_ino = 0; 3168 while (ino != BTRFS_FIRST_FREE_OBJECTID) { 3169 fs_path_reset(name); 3170 3171 if (is_waiting_for_rm(sctx, ino)) 3172 break; 3173 if (is_waiting_for_move(sctx, ino)) { 3174 if (*ancestor_ino == 0) 3175 *ancestor_ino = ino; 3176 ret = get_first_ref(sctx->parent_root, ino, 3177 &parent_inode, &parent_gen, name); 3178 } else { 3179 ret = __get_cur_name_and_parent(sctx, ino, gen, 3180 &parent_inode, 3181 &parent_gen, name); 3182 if (ret > 0) { 3183 ret = 0; 3184 break; 3185 } 3186 } 3187 if (ret < 0) 3188 break; 3189 if (parent_inode == start_ino) { 3190 ret = 1; 3191 if (*ancestor_ino == 0) 3192 *ancestor_ino = ino; 3193 break; 3194 } 3195 ino = parent_inode; 3196 gen = parent_gen; 3197 } 3198 return ret; 3199 } 3200 3201 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 3202 { 3203 struct fs_path *from_path = NULL; 3204 struct fs_path *to_path = NULL; 3205 struct fs_path *name = NULL; 3206 u64 orig_progress = sctx->send_progress; 3207 struct recorded_ref *cur; 3208 u64 parent_ino, parent_gen; 3209 struct waiting_dir_move *dm = NULL; 3210 u64 rmdir_ino = 0; 3211 u64 ancestor; 3212 bool is_orphan; 3213 int ret; 3214 3215 name = fs_path_alloc(); 3216 from_path = fs_path_alloc(); 3217 if (!name || !from_path) { 3218 ret = -ENOMEM; 3219 goto out; 3220 } 3221 3222 dm = get_waiting_dir_move(sctx, pm->ino); 3223 ASSERT(dm); 3224 rmdir_ino = dm->rmdir_ino; 3225 is_orphan = dm->orphanized; 3226 free_waiting_dir_move(sctx, dm); 3227 3228 if (is_orphan) { 3229 ret = gen_unique_name(sctx, pm->ino, 3230 pm->gen, from_path); 3231 } else { 3232 ret = get_first_ref(sctx->parent_root, pm->ino, 3233 &parent_ino, &parent_gen, name); 3234 if (ret < 0) 3235 goto out; 3236 ret = get_cur_path(sctx, parent_ino, parent_gen, 3237 from_path); 3238 if (ret < 0) 3239 goto out; 3240 ret = fs_path_add_path(from_path, name); 3241 } 3242 if (ret < 0) 3243 goto out; 3244 3245 sctx->send_progress = sctx->cur_ino + 1; 3246 ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor); 3247 if (ret < 0) 3248 goto out; 3249 if (ret) { 3250 LIST_HEAD(deleted_refs); 3251 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); 3252 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, 3253 &pm->update_refs, &deleted_refs, 3254 is_orphan); 3255 if (ret < 0) 3256 goto out; 3257 if (rmdir_ino) { 3258 dm = get_waiting_dir_move(sctx, pm->ino); 3259 ASSERT(dm); 3260 dm->rmdir_ino = rmdir_ino; 3261 } 3262 goto out; 3263 } 3264 fs_path_reset(name); 3265 to_path = name; 3266 name = NULL; 3267 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); 3268 if (ret < 0) 3269 goto out; 3270 3271 ret = send_rename(sctx, from_path, to_path); 3272 if (ret < 0) 3273 goto out; 3274 3275 if (rmdir_ino) { 3276 struct orphan_dir_info *odi; 3277 3278 odi = get_orphan_dir_info(sctx, rmdir_ino); 3279 if (!odi) { 3280 /* already deleted */ 3281 goto finish; 3282 } 3283 ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino); 3284 if (ret < 0) 3285 goto out; 3286 if (!ret) 3287 goto finish; 3288 3289 name = fs_path_alloc(); 3290 if (!name) { 3291 ret = -ENOMEM; 3292 goto out; 3293 } 3294 ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); 3295 if (ret < 0) 3296 goto out; 3297 ret = send_rmdir(sctx, name); 3298 if (ret < 0) 3299 goto out; 3300 free_orphan_dir_info(sctx, odi); 3301 } 3302 3303 finish: 3304 ret = send_utimes(sctx, pm->ino, pm->gen); 3305 if (ret < 0) 3306 goto out; 3307 3308 /* 3309 * After rename/move, need to update the utimes of both new parent(s) 3310 * and old parent(s). 3311 */ 3312 list_for_each_entry(cur, &pm->update_refs, list) { 3313 /* 3314 * The parent inode might have been deleted in the send snapshot 3315 */ 3316 ret = get_inode_info(sctx->send_root, cur->dir, NULL, 3317 NULL, NULL, NULL, NULL, NULL); 3318 if (ret == -ENOENT) { 3319 ret = 0; 3320 continue; 3321 } 3322 if (ret < 0) 3323 goto out; 3324 3325 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3326 if (ret < 0) 3327 goto out; 3328 } 3329 3330 out: 3331 fs_path_free(name); 3332 fs_path_free(from_path); 3333 fs_path_free(to_path); 3334 sctx->send_progress = orig_progress; 3335 3336 return ret; 3337 } 3338 3339 static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) 3340 { 3341 if (!list_empty(&m->list)) 3342 list_del(&m->list); 3343 if (!RB_EMPTY_NODE(&m->node)) 3344 rb_erase(&m->node, &sctx->pending_dir_moves); 3345 __free_recorded_refs(&m->update_refs); 3346 kfree(m); 3347 } 3348 3349 static void tail_append_pending_moves(struct pending_dir_move *moves, 3350 struct list_head *stack) 3351 { 3352 if (list_empty(&moves->list)) { 3353 list_add_tail(&moves->list, stack); 3354 } else { 3355 LIST_HEAD(list); 3356 list_splice_init(&moves->list, &list); 3357 list_add_tail(&moves->list, stack); 3358 list_splice_tail(&list, stack); 3359 } 3360 } 3361 3362 static int apply_children_dir_moves(struct send_ctx *sctx) 3363 { 3364 struct pending_dir_move *pm; 3365 struct list_head stack; 3366 u64 parent_ino = sctx->cur_ino; 3367 int ret = 0; 3368 3369 pm = get_pending_dir_moves(sctx, parent_ino); 3370 if (!pm) 3371 return 0; 3372 3373 INIT_LIST_HEAD(&stack); 3374 tail_append_pending_moves(pm, &stack); 3375 3376 while (!list_empty(&stack)) { 3377 pm = list_first_entry(&stack, struct pending_dir_move, list); 3378 parent_ino = pm->ino; 3379 ret = apply_dir_move(sctx, pm); 3380 free_pending_move(sctx, pm); 3381 if (ret) 3382 goto out; 3383 pm = get_pending_dir_moves(sctx, parent_ino); 3384 if (pm) 3385 tail_append_pending_moves(pm, &stack); 3386 } 3387 return 0; 3388 3389 out: 3390 while (!list_empty(&stack)) { 3391 pm = list_first_entry(&stack, struct pending_dir_move, list); 3392 free_pending_move(sctx, pm); 3393 } 3394 return ret; 3395 } 3396 3397 /* 3398 * We might need to delay a directory rename even when no ancestor directory 3399 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was 3400 * renamed. This happens when we rename a directory to the old name (the name 3401 * in the parent root) of some other unrelated directory that got its rename 3402 * delayed due to some ancestor with higher number that got renamed. 3403 * 3404 * Example: 3405 * 3406 * Parent snapshot: 3407 * . (ino 256) 3408 * |---- a/ (ino 257) 3409 * | |---- file (ino 260) 3410 * | 3411 * |---- b/ (ino 258) 3412 * |---- c/ (ino 259) 3413 * 3414 * Send snapshot: 3415 * . (ino 256) 3416 * |---- a/ (ino 258) 3417 * |---- x/ (ino 259) 3418 * |---- y/ (ino 257) 3419 * |----- file (ino 260) 3420 * 3421 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 3422 * from 'a' to 'x/y' happening first, which in turn depends on the rename of 3423 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream 3424 * must issue is: 3425 * 3426 * 1 - rename 259 from 'c' to 'x' 3427 * 2 - rename 257 from 'a' to 'x/y' 3428 * 3 - rename 258 from 'b' to 'a' 3429 * 3430 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can 3431 * be done right away and < 0 on error. 3432 */ 3433 static int wait_for_dest_dir_move(struct send_ctx *sctx, 3434 struct recorded_ref *parent_ref, 3435 const bool is_orphan) 3436 { 3437 struct btrfs_path *path; 3438 struct btrfs_key key; 3439 struct btrfs_key di_key; 3440 struct btrfs_dir_item *di; 3441 u64 left_gen; 3442 u64 right_gen; 3443 int ret = 0; 3444 struct waiting_dir_move *wdm; 3445 3446 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) 3447 return 0; 3448 3449 path = alloc_path_for_send(); 3450 if (!path) 3451 return -ENOMEM; 3452 3453 key.objectid = parent_ref->dir; 3454 key.type = BTRFS_DIR_ITEM_KEY; 3455 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); 3456 3457 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); 3458 if (ret < 0) { 3459 goto out; 3460 } else if (ret > 0) { 3461 ret = 0; 3462 goto out; 3463 } 3464 3465 di = btrfs_match_dir_item_name(sctx->parent_root, path, 3466 parent_ref->name, parent_ref->name_len); 3467 if (!di) { 3468 ret = 0; 3469 goto out; 3470 } 3471 /* 3472 * di_key.objectid has the number of the inode that has a dentry in the 3473 * parent directory with the same name that sctx->cur_ino is being 3474 * renamed to. We need to check if that inode is in the send root as 3475 * well and if it is currently marked as an inode with a pending rename, 3476 * if it is, we need to delay the rename of sctx->cur_ino as well, so 3477 * that it happens after that other inode is renamed. 3478 */ 3479 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); 3480 if (di_key.type != BTRFS_INODE_ITEM_KEY) { 3481 ret = 0; 3482 goto out; 3483 } 3484 3485 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, 3486 &left_gen, NULL, NULL, NULL, NULL); 3487 if (ret < 0) 3488 goto out; 3489 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, 3490 &right_gen, NULL, NULL, NULL, NULL); 3491 if (ret < 0) { 3492 if (ret == -ENOENT) 3493 ret = 0; 3494 goto out; 3495 } 3496 3497 /* Different inode, no need to delay the rename of sctx->cur_ino */ 3498 if (right_gen != left_gen) { 3499 ret = 0; 3500 goto out; 3501 } 3502 3503 wdm = get_waiting_dir_move(sctx, di_key.objectid); 3504 if (wdm && !wdm->orphanized) { 3505 ret = add_pending_dir_move(sctx, 3506 sctx->cur_ino, 3507 sctx->cur_inode_gen, 3508 di_key.objectid, 3509 &sctx->new_refs, 3510 &sctx->deleted_refs, 3511 is_orphan); 3512 if (!ret) 3513 ret = 1; 3514 } 3515 out: 3516 btrfs_free_path(path); 3517 return ret; 3518 } 3519 3520 /* 3521 * Check if ino ino1 is an ancestor of inode ino2 in the given root. 3522 * Return 1 if true, 0 if false and < 0 on error. 3523 */ 3524 static int is_ancestor(struct btrfs_root *root, 3525 const u64 ino1, 3526 const u64 ino1_gen, 3527 const u64 ino2, 3528 struct fs_path *fs_path) 3529 { 3530 u64 ino = ino2; 3531 3532 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3533 int ret; 3534 u64 parent; 3535 u64 parent_gen; 3536 3537 fs_path_reset(fs_path); 3538 ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path); 3539 if (ret < 0) { 3540 if (ret == -ENOENT && ino == ino2) 3541 ret = 0; 3542 return ret; 3543 } 3544 if (parent == ino1) 3545 return parent_gen == ino1_gen ? 1 : 0; 3546 ino = parent; 3547 } 3548 return 0; 3549 } 3550 3551 static int wait_for_parent_move(struct send_ctx *sctx, 3552 struct recorded_ref *parent_ref, 3553 const bool is_orphan) 3554 { 3555 int ret = 0; 3556 u64 ino = parent_ref->dir; 3557 u64 parent_ino_before, parent_ino_after; 3558 struct fs_path *path_before = NULL; 3559 struct fs_path *path_after = NULL; 3560 int len1, len2; 3561 3562 path_after = fs_path_alloc(); 3563 path_before = fs_path_alloc(); 3564 if (!path_after || !path_before) { 3565 ret = -ENOMEM; 3566 goto out; 3567 } 3568 3569 /* 3570 * Our current directory inode may not yet be renamed/moved because some 3571 * ancestor (immediate or not) has to be renamed/moved first. So find if 3572 * such ancestor exists and make sure our own rename/move happens after 3573 * that ancestor is processed to avoid path build infinite loops (done 3574 * at get_cur_path()). 3575 */ 3576 while (ino > BTRFS_FIRST_FREE_OBJECTID) { 3577 if (is_waiting_for_move(sctx, ino)) { 3578 /* 3579 * If the current inode is an ancestor of ino in the 3580 * parent root, we need to delay the rename of the 3581 * current inode, otherwise don't delayed the rename 3582 * because we can end up with a circular dependency 3583 * of renames, resulting in some directories never 3584 * getting the respective rename operations issued in 3585 * the send stream or getting into infinite path build 3586 * loops. 3587 */ 3588 ret = is_ancestor(sctx->parent_root, 3589 sctx->cur_ino, sctx->cur_inode_gen, 3590 ino, path_before); 3591 if (ret) 3592 break; 3593 } 3594 3595 fs_path_reset(path_before); 3596 fs_path_reset(path_after); 3597 3598 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3599 NULL, path_after); 3600 if (ret < 0) 3601 goto out; 3602 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3603 NULL, path_before); 3604 if (ret < 0 && ret != -ENOENT) { 3605 goto out; 3606 } else if (ret == -ENOENT) { 3607 ret = 0; 3608 break; 3609 } 3610 3611 len1 = fs_path_len(path_before); 3612 len2 = fs_path_len(path_after); 3613 if (ino > sctx->cur_ino && 3614 (parent_ino_before != parent_ino_after || len1 != len2 || 3615 memcmp(path_before->start, path_after->start, len1))) { 3616 ret = 1; 3617 break; 3618 } 3619 ino = parent_ino_after; 3620 } 3621 3622 out: 3623 fs_path_free(path_before); 3624 fs_path_free(path_after); 3625 3626 if (ret == 1) { 3627 ret = add_pending_dir_move(sctx, 3628 sctx->cur_ino, 3629 sctx->cur_inode_gen, 3630 ino, 3631 &sctx->new_refs, 3632 &sctx->deleted_refs, 3633 is_orphan); 3634 if (!ret) 3635 ret = 1; 3636 } 3637 3638 return ret; 3639 } 3640 3641 /* 3642 * This does all the move/link/unlink/rmdir magic. 3643 */ 3644 static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) 3645 { 3646 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 3647 int ret = 0; 3648 struct recorded_ref *cur; 3649 struct recorded_ref *cur2; 3650 struct list_head check_dirs; 3651 struct fs_path *valid_path = NULL; 3652 u64 ow_inode = 0; 3653 u64 ow_gen; 3654 int did_overwrite = 0; 3655 int is_orphan = 0; 3656 u64 last_dir_ino_rm = 0; 3657 bool can_rename = true; 3658 3659 btrfs_debug(fs_info, "process_recorded_refs %llu", sctx->cur_ino); 3660 3661 /* 3662 * This should never happen as the root dir always has the same ref 3663 * which is always '..' 3664 */ 3665 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 3666 INIT_LIST_HEAD(&check_dirs); 3667 3668 valid_path = fs_path_alloc(); 3669 if (!valid_path) { 3670 ret = -ENOMEM; 3671 goto out; 3672 } 3673 3674 /* 3675 * First, check if the first ref of the current inode was overwritten 3676 * before. If yes, we know that the current inode was already orphanized 3677 * and thus use the orphan name. If not, we can use get_cur_path to 3678 * get the path of the first ref as it would like while receiving at 3679 * this point in time. 3680 * New inodes are always orphan at the beginning, so force to use the 3681 * orphan name in this case. 3682 * The first ref is stored in valid_path and will be updated if it 3683 * gets moved around. 3684 */ 3685 if (!sctx->cur_inode_new) { 3686 ret = did_overwrite_first_ref(sctx, sctx->cur_ino, 3687 sctx->cur_inode_gen); 3688 if (ret < 0) 3689 goto out; 3690 if (ret) 3691 did_overwrite = 1; 3692 } 3693 if (sctx->cur_inode_new || did_overwrite) { 3694 ret = gen_unique_name(sctx, sctx->cur_ino, 3695 sctx->cur_inode_gen, valid_path); 3696 if (ret < 0) 3697 goto out; 3698 is_orphan = 1; 3699 } else { 3700 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3701 valid_path); 3702 if (ret < 0) 3703 goto out; 3704 } 3705 3706 list_for_each_entry(cur, &sctx->new_refs, list) { 3707 /* 3708 * We may have refs where the parent directory does not exist 3709 * yet. This happens if the parent directories inum is higher 3710 * the the current inum. To handle this case, we create the 3711 * parent directory out of order. But we need to check if this 3712 * did already happen before due to other refs in the same dir. 3713 */ 3714 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3715 if (ret < 0) 3716 goto out; 3717 if (ret == inode_state_will_create) { 3718 ret = 0; 3719 /* 3720 * First check if any of the current inodes refs did 3721 * already create the dir. 3722 */ 3723 list_for_each_entry(cur2, &sctx->new_refs, list) { 3724 if (cur == cur2) 3725 break; 3726 if (cur2->dir == cur->dir) { 3727 ret = 1; 3728 break; 3729 } 3730 } 3731 3732 /* 3733 * If that did not happen, check if a previous inode 3734 * did already create the dir. 3735 */ 3736 if (!ret) 3737 ret = did_create_dir(sctx, cur->dir); 3738 if (ret < 0) 3739 goto out; 3740 if (!ret) { 3741 ret = send_create_inode(sctx, cur->dir); 3742 if (ret < 0) 3743 goto out; 3744 } 3745 } 3746 3747 /* 3748 * Check if this new ref would overwrite the first ref of 3749 * another unprocessed inode. If yes, orphanize the 3750 * overwritten inode. If we find an overwritten ref that is 3751 * not the first ref, simply unlink it. 3752 */ 3753 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3754 cur->name, cur->name_len, 3755 &ow_inode, &ow_gen); 3756 if (ret < 0) 3757 goto out; 3758 if (ret) { 3759 ret = is_first_ref(sctx->parent_root, 3760 ow_inode, cur->dir, cur->name, 3761 cur->name_len); 3762 if (ret < 0) 3763 goto out; 3764 if (ret) { 3765 struct name_cache_entry *nce; 3766 struct waiting_dir_move *wdm; 3767 3768 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3769 cur->full_path); 3770 if (ret < 0) 3771 goto out; 3772 3773 /* 3774 * If ow_inode has its rename operation delayed 3775 * make sure that its orphanized name is used in 3776 * the source path when performing its rename 3777 * operation. 3778 */ 3779 if (is_waiting_for_move(sctx, ow_inode)) { 3780 wdm = get_waiting_dir_move(sctx, 3781 ow_inode); 3782 ASSERT(wdm); 3783 wdm->orphanized = true; 3784 } 3785 3786 /* 3787 * Make sure we clear our orphanized inode's 3788 * name from the name cache. This is because the 3789 * inode ow_inode might be an ancestor of some 3790 * other inode that will be orphanized as well 3791 * later and has an inode number greater than 3792 * sctx->send_progress. We need to prevent 3793 * future name lookups from using the old name 3794 * and get instead the orphan name. 3795 */ 3796 nce = name_cache_search(sctx, ow_inode, ow_gen); 3797 if (nce) { 3798 name_cache_delete(sctx, nce); 3799 kfree(nce); 3800 } 3801 3802 /* 3803 * ow_inode might currently be an ancestor of 3804 * cur_ino, therefore compute valid_path (the 3805 * current path of cur_ino) again because it 3806 * might contain the pre-orphanization name of 3807 * ow_inode, which is no longer valid. 3808 */ 3809 fs_path_reset(valid_path); 3810 ret = get_cur_path(sctx, sctx->cur_ino, 3811 sctx->cur_inode_gen, valid_path); 3812 if (ret < 0) 3813 goto out; 3814 } else { 3815 ret = send_unlink(sctx, cur->full_path); 3816 if (ret < 0) 3817 goto out; 3818 } 3819 } 3820 3821 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { 3822 ret = wait_for_dest_dir_move(sctx, cur, is_orphan); 3823 if (ret < 0) 3824 goto out; 3825 if (ret == 1) { 3826 can_rename = false; 3827 *pending_move = 1; 3828 } 3829 } 3830 3831 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root && 3832 can_rename) { 3833 ret = wait_for_parent_move(sctx, cur, is_orphan); 3834 if (ret < 0) 3835 goto out; 3836 if (ret == 1) { 3837 can_rename = false; 3838 *pending_move = 1; 3839 } 3840 } 3841 3842 /* 3843 * link/move the ref to the new place. If we have an orphan 3844 * inode, move it and update valid_path. If not, link or move 3845 * it depending on the inode mode. 3846 */ 3847 if (is_orphan && can_rename) { 3848 ret = send_rename(sctx, valid_path, cur->full_path); 3849 if (ret < 0) 3850 goto out; 3851 is_orphan = 0; 3852 ret = fs_path_copy(valid_path, cur->full_path); 3853 if (ret < 0) 3854 goto out; 3855 } else if (can_rename) { 3856 if (S_ISDIR(sctx->cur_inode_mode)) { 3857 /* 3858 * Dirs can't be linked, so move it. For moved 3859 * dirs, we always have one new and one deleted 3860 * ref. The deleted ref is ignored later. 3861 */ 3862 ret = send_rename(sctx, valid_path, 3863 cur->full_path); 3864 if (!ret) 3865 ret = fs_path_copy(valid_path, 3866 cur->full_path); 3867 if (ret < 0) 3868 goto out; 3869 } else { 3870 ret = send_link(sctx, cur->full_path, 3871 valid_path); 3872 if (ret < 0) 3873 goto out; 3874 } 3875 } 3876 ret = dup_ref(cur, &check_dirs); 3877 if (ret < 0) 3878 goto out; 3879 } 3880 3881 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { 3882 /* 3883 * Check if we can already rmdir the directory. If not, 3884 * orphanize it. For every dir item inside that gets deleted 3885 * later, we do this check again and rmdir it then if possible. 3886 * See the use of check_dirs for more details. 3887 */ 3888 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3889 sctx->cur_ino); 3890 if (ret < 0) 3891 goto out; 3892 if (ret) { 3893 ret = send_rmdir(sctx, valid_path); 3894 if (ret < 0) 3895 goto out; 3896 } else if (!is_orphan) { 3897 ret = orphanize_inode(sctx, sctx->cur_ino, 3898 sctx->cur_inode_gen, valid_path); 3899 if (ret < 0) 3900 goto out; 3901 is_orphan = 1; 3902 } 3903 3904 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3905 ret = dup_ref(cur, &check_dirs); 3906 if (ret < 0) 3907 goto out; 3908 } 3909 } else if (S_ISDIR(sctx->cur_inode_mode) && 3910 !list_empty(&sctx->deleted_refs)) { 3911 /* 3912 * We have a moved dir. Add the old parent to check_dirs 3913 */ 3914 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 3915 list); 3916 ret = dup_ref(cur, &check_dirs); 3917 if (ret < 0) 3918 goto out; 3919 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 3920 /* 3921 * We have a non dir inode. Go through all deleted refs and 3922 * unlink them if they were not already overwritten by other 3923 * inodes. 3924 */ 3925 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3926 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3927 sctx->cur_ino, sctx->cur_inode_gen, 3928 cur->name, cur->name_len); 3929 if (ret < 0) 3930 goto out; 3931 if (!ret) { 3932 ret = send_unlink(sctx, cur->full_path); 3933 if (ret < 0) 3934 goto out; 3935 } 3936 ret = dup_ref(cur, &check_dirs); 3937 if (ret < 0) 3938 goto out; 3939 } 3940 /* 3941 * If the inode is still orphan, unlink the orphan. This may 3942 * happen when a previous inode did overwrite the first ref 3943 * of this inode and no new refs were added for the current 3944 * inode. Unlinking does not mean that the inode is deleted in 3945 * all cases. There may still be links to this inode in other 3946 * places. 3947 */ 3948 if (is_orphan) { 3949 ret = send_unlink(sctx, valid_path); 3950 if (ret < 0) 3951 goto out; 3952 } 3953 } 3954 3955 /* 3956 * We did collect all parent dirs where cur_inode was once located. We 3957 * now go through all these dirs and check if they are pending for 3958 * deletion and if it's finally possible to perform the rmdir now. 3959 * We also update the inode stats of the parent dirs here. 3960 */ 3961 list_for_each_entry(cur, &check_dirs, list) { 3962 /* 3963 * In case we had refs into dirs that were not processed yet, 3964 * we don't need to do the utime and rmdir logic for these dirs. 3965 * The dir will be processed later. 3966 */ 3967 if (cur->dir > sctx->cur_ino) 3968 continue; 3969 3970 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3971 if (ret < 0) 3972 goto out; 3973 3974 if (ret == inode_state_did_create || 3975 ret == inode_state_no_change) { 3976 /* TODO delayed utimes */ 3977 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3978 if (ret < 0) 3979 goto out; 3980 } else if (ret == inode_state_did_delete && 3981 cur->dir != last_dir_ino_rm) { 3982 ret = can_rmdir(sctx, cur->dir, cur->dir_gen, 3983 sctx->cur_ino); 3984 if (ret < 0) 3985 goto out; 3986 if (ret) { 3987 ret = get_cur_path(sctx, cur->dir, 3988 cur->dir_gen, valid_path); 3989 if (ret < 0) 3990 goto out; 3991 ret = send_rmdir(sctx, valid_path); 3992 if (ret < 0) 3993 goto out; 3994 last_dir_ino_rm = cur->dir; 3995 } 3996 } 3997 } 3998 3999 ret = 0; 4000 4001 out: 4002 __free_recorded_refs(&check_dirs); 4003 free_recorded_refs(sctx); 4004 fs_path_free(valid_path); 4005 return ret; 4006 } 4007 4008 static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, 4009 struct fs_path *name, void *ctx, struct list_head *refs) 4010 { 4011 int ret = 0; 4012 struct send_ctx *sctx = ctx; 4013 struct fs_path *p; 4014 u64 gen; 4015 4016 p = fs_path_alloc(); 4017 if (!p) 4018 return -ENOMEM; 4019 4020 ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, 4021 NULL, NULL); 4022 if (ret < 0) 4023 goto out; 4024 4025 ret = get_cur_path(sctx, dir, gen, p); 4026 if (ret < 0) 4027 goto out; 4028 ret = fs_path_add_path(p, name); 4029 if (ret < 0) 4030 goto out; 4031 4032 ret = __record_ref(refs, dir, gen, p); 4033 4034 out: 4035 if (ret) 4036 fs_path_free(p); 4037 return ret; 4038 } 4039 4040 static int __record_new_ref(int num, u64 dir, int index, 4041 struct fs_path *name, 4042 void *ctx) 4043 { 4044 struct send_ctx *sctx = ctx; 4045 return record_ref(sctx->send_root, num, dir, index, name, 4046 ctx, &sctx->new_refs); 4047 } 4048 4049 4050 static int __record_deleted_ref(int num, u64 dir, int index, 4051 struct fs_path *name, 4052 void *ctx) 4053 { 4054 struct send_ctx *sctx = ctx; 4055 return record_ref(sctx->parent_root, num, dir, index, name, 4056 ctx, &sctx->deleted_refs); 4057 } 4058 4059 static int record_new_ref(struct send_ctx *sctx) 4060 { 4061 int ret; 4062 4063 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4064 sctx->cmp_key, 0, __record_new_ref, sctx); 4065 if (ret < 0) 4066 goto out; 4067 ret = 0; 4068 4069 out: 4070 return ret; 4071 } 4072 4073 static int record_deleted_ref(struct send_ctx *sctx) 4074 { 4075 int ret; 4076 4077 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 4078 sctx->cmp_key, 0, __record_deleted_ref, sctx); 4079 if (ret < 0) 4080 goto out; 4081 ret = 0; 4082 4083 out: 4084 return ret; 4085 } 4086 4087 struct find_ref_ctx { 4088 u64 dir; 4089 u64 dir_gen; 4090 struct btrfs_root *root; 4091 struct fs_path *name; 4092 int found_idx; 4093 }; 4094 4095 static int __find_iref(int num, u64 dir, int index, 4096 struct fs_path *name, 4097 void *ctx_) 4098 { 4099 struct find_ref_ctx *ctx = ctx_; 4100 u64 dir_gen; 4101 int ret; 4102 4103 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 4104 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 4105 /* 4106 * To avoid doing extra lookups we'll only do this if everything 4107 * else matches. 4108 */ 4109 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, 4110 NULL, NULL, NULL); 4111 if (ret) 4112 return ret; 4113 if (dir_gen != ctx->dir_gen) 4114 return 0; 4115 ctx->found_idx = num; 4116 return 1; 4117 } 4118 return 0; 4119 } 4120 4121 static int find_iref(struct btrfs_root *root, 4122 struct btrfs_path *path, 4123 struct btrfs_key *key, 4124 u64 dir, u64 dir_gen, struct fs_path *name) 4125 { 4126 int ret; 4127 struct find_ref_ctx ctx; 4128 4129 ctx.dir = dir; 4130 ctx.name = name; 4131 ctx.dir_gen = dir_gen; 4132 ctx.found_idx = -1; 4133 ctx.root = root; 4134 4135 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 4136 if (ret < 0) 4137 return ret; 4138 4139 if (ctx.found_idx == -1) 4140 return -ENOENT; 4141 4142 return ctx.found_idx; 4143 } 4144 4145 static int __record_changed_new_ref(int num, u64 dir, int index, 4146 struct fs_path *name, 4147 void *ctx) 4148 { 4149 u64 dir_gen; 4150 int ret; 4151 struct send_ctx *sctx = ctx; 4152 4153 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, 4154 NULL, NULL, NULL); 4155 if (ret) 4156 return ret; 4157 4158 ret = find_iref(sctx->parent_root, sctx->right_path, 4159 sctx->cmp_key, dir, dir_gen, name); 4160 if (ret == -ENOENT) 4161 ret = __record_new_ref(num, dir, index, name, sctx); 4162 else if (ret > 0) 4163 ret = 0; 4164 4165 return ret; 4166 } 4167 4168 static int __record_changed_deleted_ref(int num, u64 dir, int index, 4169 struct fs_path *name, 4170 void *ctx) 4171 { 4172 u64 dir_gen; 4173 int ret; 4174 struct send_ctx *sctx = ctx; 4175 4176 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, 4177 NULL, NULL, NULL); 4178 if (ret) 4179 return ret; 4180 4181 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 4182 dir, dir_gen, name); 4183 if (ret == -ENOENT) 4184 ret = __record_deleted_ref(num, dir, index, name, sctx); 4185 else if (ret > 0) 4186 ret = 0; 4187 4188 return ret; 4189 } 4190 4191 static int record_changed_ref(struct send_ctx *sctx) 4192 { 4193 int ret = 0; 4194 4195 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 4196 sctx->cmp_key, 0, __record_changed_new_ref, sctx); 4197 if (ret < 0) 4198 goto out; 4199 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 4200 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); 4201 if (ret < 0) 4202 goto out; 4203 ret = 0; 4204 4205 out: 4206 return ret; 4207 } 4208 4209 /* 4210 * Record and process all refs at once. Needed when an inode changes the 4211 * generation number, which means that it was deleted and recreated. 4212 */ 4213 static int process_all_refs(struct send_ctx *sctx, 4214 enum btrfs_compare_tree_result cmd) 4215 { 4216 int ret; 4217 struct btrfs_root *root; 4218 struct btrfs_path *path; 4219 struct btrfs_key key; 4220 struct btrfs_key found_key; 4221 struct extent_buffer *eb; 4222 int slot; 4223 iterate_inode_ref_t cb; 4224 int pending_move = 0; 4225 4226 path = alloc_path_for_send(); 4227 if (!path) 4228 return -ENOMEM; 4229 4230 if (cmd == BTRFS_COMPARE_TREE_NEW) { 4231 root = sctx->send_root; 4232 cb = __record_new_ref; 4233 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { 4234 root = sctx->parent_root; 4235 cb = __record_deleted_ref; 4236 } else { 4237 btrfs_err(sctx->send_root->fs_info, 4238 "Wrong command %d in process_all_refs", cmd); 4239 ret = -EINVAL; 4240 goto out; 4241 } 4242 4243 key.objectid = sctx->cmp_key->objectid; 4244 key.type = BTRFS_INODE_REF_KEY; 4245 key.offset = 0; 4246 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4247 if (ret < 0) 4248 goto out; 4249 4250 while (1) { 4251 eb = path->nodes[0]; 4252 slot = path->slots[0]; 4253 if (slot >= btrfs_header_nritems(eb)) { 4254 ret = btrfs_next_leaf(root, path); 4255 if (ret < 0) 4256 goto out; 4257 else if (ret > 0) 4258 break; 4259 continue; 4260 } 4261 4262 btrfs_item_key_to_cpu(eb, &found_key, slot); 4263 4264 if (found_key.objectid != key.objectid || 4265 (found_key.type != BTRFS_INODE_REF_KEY && 4266 found_key.type != BTRFS_INODE_EXTREF_KEY)) 4267 break; 4268 4269 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); 4270 if (ret < 0) 4271 goto out; 4272 4273 path->slots[0]++; 4274 } 4275 btrfs_release_path(path); 4276 4277 /* 4278 * We don't actually care about pending_move as we are simply 4279 * re-creating this inode and will be rename'ing it into place once we 4280 * rename the parent directory. 4281 */ 4282 ret = process_recorded_refs(sctx, &pending_move); 4283 out: 4284 btrfs_free_path(path); 4285 return ret; 4286 } 4287 4288 static int send_set_xattr(struct send_ctx *sctx, 4289 struct fs_path *path, 4290 const char *name, int name_len, 4291 const char *data, int data_len) 4292 { 4293 int ret = 0; 4294 4295 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); 4296 if (ret < 0) 4297 goto out; 4298 4299 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4300 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 4301 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); 4302 4303 ret = send_cmd(sctx); 4304 4305 tlv_put_failure: 4306 out: 4307 return ret; 4308 } 4309 4310 static int send_remove_xattr(struct send_ctx *sctx, 4311 struct fs_path *path, 4312 const char *name, int name_len) 4313 { 4314 int ret = 0; 4315 4316 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); 4317 if (ret < 0) 4318 goto out; 4319 4320 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 4321 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 4322 4323 ret = send_cmd(sctx); 4324 4325 tlv_put_failure: 4326 out: 4327 return ret; 4328 } 4329 4330 static int __process_new_xattr(int num, struct btrfs_key *di_key, 4331 const char *name, int name_len, 4332 const char *data, int data_len, 4333 u8 type, void *ctx) 4334 { 4335 int ret; 4336 struct send_ctx *sctx = ctx; 4337 struct fs_path *p; 4338 struct posix_acl_xattr_header dummy_acl; 4339 4340 p = fs_path_alloc(); 4341 if (!p) 4342 return -ENOMEM; 4343 4344 /* 4345 * This hack is needed because empty acls are stored as zero byte 4346 * data in xattrs. Problem with that is, that receiving these zero byte 4347 * acls will fail later. To fix this, we send a dummy acl list that 4348 * only contains the version number and no entries. 4349 */ 4350 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || 4351 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { 4352 if (data_len == 0) { 4353 dummy_acl.a_version = 4354 cpu_to_le32(POSIX_ACL_XATTR_VERSION); 4355 data = (char *)&dummy_acl; 4356 data_len = sizeof(dummy_acl); 4357 } 4358 } 4359 4360 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4361 if (ret < 0) 4362 goto out; 4363 4364 ret = send_set_xattr(sctx, p, name, name_len, data, data_len); 4365 4366 out: 4367 fs_path_free(p); 4368 return ret; 4369 } 4370 4371 static int __process_deleted_xattr(int num, struct btrfs_key *di_key, 4372 const char *name, int name_len, 4373 const char *data, int data_len, 4374 u8 type, void *ctx) 4375 { 4376 int ret; 4377 struct send_ctx *sctx = ctx; 4378 struct fs_path *p; 4379 4380 p = fs_path_alloc(); 4381 if (!p) 4382 return -ENOMEM; 4383 4384 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4385 if (ret < 0) 4386 goto out; 4387 4388 ret = send_remove_xattr(sctx, p, name, name_len); 4389 4390 out: 4391 fs_path_free(p); 4392 return ret; 4393 } 4394 4395 static int process_new_xattr(struct send_ctx *sctx) 4396 { 4397 int ret = 0; 4398 4399 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4400 sctx->cmp_key, __process_new_xattr, sctx); 4401 4402 return ret; 4403 } 4404 4405 static int process_deleted_xattr(struct send_ctx *sctx) 4406 { 4407 return iterate_dir_item(sctx->parent_root, sctx->right_path, 4408 sctx->cmp_key, __process_deleted_xattr, sctx); 4409 } 4410 4411 struct find_xattr_ctx { 4412 const char *name; 4413 int name_len; 4414 int found_idx; 4415 char *found_data; 4416 int found_data_len; 4417 }; 4418 4419 static int __find_xattr(int num, struct btrfs_key *di_key, 4420 const char *name, int name_len, 4421 const char *data, int data_len, 4422 u8 type, void *vctx) 4423 { 4424 struct find_xattr_ctx *ctx = vctx; 4425 4426 if (name_len == ctx->name_len && 4427 strncmp(name, ctx->name, name_len) == 0) { 4428 ctx->found_idx = num; 4429 ctx->found_data_len = data_len; 4430 ctx->found_data = kmemdup(data, data_len, GFP_KERNEL); 4431 if (!ctx->found_data) 4432 return -ENOMEM; 4433 return 1; 4434 } 4435 return 0; 4436 } 4437 4438 static int find_xattr(struct btrfs_root *root, 4439 struct btrfs_path *path, 4440 struct btrfs_key *key, 4441 const char *name, int name_len, 4442 char **data, int *data_len) 4443 { 4444 int ret; 4445 struct find_xattr_ctx ctx; 4446 4447 ctx.name = name; 4448 ctx.name_len = name_len; 4449 ctx.found_idx = -1; 4450 ctx.found_data = NULL; 4451 ctx.found_data_len = 0; 4452 4453 ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); 4454 if (ret < 0) 4455 return ret; 4456 4457 if (ctx.found_idx == -1) 4458 return -ENOENT; 4459 if (data) { 4460 *data = ctx.found_data; 4461 *data_len = ctx.found_data_len; 4462 } else { 4463 kfree(ctx.found_data); 4464 } 4465 return ctx.found_idx; 4466 } 4467 4468 4469 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, 4470 const char *name, int name_len, 4471 const char *data, int data_len, 4472 u8 type, void *ctx) 4473 { 4474 int ret; 4475 struct send_ctx *sctx = ctx; 4476 char *found_data = NULL; 4477 int found_data_len = 0; 4478 4479 ret = find_xattr(sctx->parent_root, sctx->right_path, 4480 sctx->cmp_key, name, name_len, &found_data, 4481 &found_data_len); 4482 if (ret == -ENOENT) { 4483 ret = __process_new_xattr(num, di_key, name, name_len, data, 4484 data_len, type, ctx); 4485 } else if (ret >= 0) { 4486 if (data_len != found_data_len || 4487 memcmp(data, found_data, data_len)) { 4488 ret = __process_new_xattr(num, di_key, name, name_len, 4489 data, data_len, type, ctx); 4490 } else { 4491 ret = 0; 4492 } 4493 } 4494 4495 kfree(found_data); 4496 return ret; 4497 } 4498 4499 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, 4500 const char *name, int name_len, 4501 const char *data, int data_len, 4502 u8 type, void *ctx) 4503 { 4504 int ret; 4505 struct send_ctx *sctx = ctx; 4506 4507 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, 4508 name, name_len, NULL, NULL); 4509 if (ret == -ENOENT) 4510 ret = __process_deleted_xattr(num, di_key, name, name_len, data, 4511 data_len, type, ctx); 4512 else if (ret >= 0) 4513 ret = 0; 4514 4515 return ret; 4516 } 4517 4518 static int process_changed_xattr(struct send_ctx *sctx) 4519 { 4520 int ret = 0; 4521 4522 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 4523 sctx->cmp_key, __process_changed_new_xattr, sctx); 4524 if (ret < 0) 4525 goto out; 4526 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 4527 sctx->cmp_key, __process_changed_deleted_xattr, sctx); 4528 4529 out: 4530 return ret; 4531 } 4532 4533 static int process_all_new_xattrs(struct send_ctx *sctx) 4534 { 4535 int ret; 4536 struct btrfs_root *root; 4537 struct btrfs_path *path; 4538 struct btrfs_key key; 4539 struct btrfs_key found_key; 4540 struct extent_buffer *eb; 4541 int slot; 4542 4543 path = alloc_path_for_send(); 4544 if (!path) 4545 return -ENOMEM; 4546 4547 root = sctx->send_root; 4548 4549 key.objectid = sctx->cmp_key->objectid; 4550 key.type = BTRFS_XATTR_ITEM_KEY; 4551 key.offset = 0; 4552 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4553 if (ret < 0) 4554 goto out; 4555 4556 while (1) { 4557 eb = path->nodes[0]; 4558 slot = path->slots[0]; 4559 if (slot >= btrfs_header_nritems(eb)) { 4560 ret = btrfs_next_leaf(root, path); 4561 if (ret < 0) { 4562 goto out; 4563 } else if (ret > 0) { 4564 ret = 0; 4565 break; 4566 } 4567 continue; 4568 } 4569 4570 btrfs_item_key_to_cpu(eb, &found_key, slot); 4571 if (found_key.objectid != key.objectid || 4572 found_key.type != key.type) { 4573 ret = 0; 4574 goto out; 4575 } 4576 4577 ret = iterate_dir_item(root, path, &found_key, 4578 __process_new_xattr, sctx); 4579 if (ret < 0) 4580 goto out; 4581 4582 path->slots[0]++; 4583 } 4584 4585 out: 4586 btrfs_free_path(path); 4587 return ret; 4588 } 4589 4590 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) 4591 { 4592 struct btrfs_root *root = sctx->send_root; 4593 struct btrfs_fs_info *fs_info = root->fs_info; 4594 struct inode *inode; 4595 struct page *page; 4596 char *addr; 4597 struct btrfs_key key; 4598 pgoff_t index = offset >> PAGE_SHIFT; 4599 pgoff_t last_index; 4600 unsigned pg_offset = offset & ~PAGE_MASK; 4601 ssize_t ret = 0; 4602 4603 key.objectid = sctx->cur_ino; 4604 key.type = BTRFS_INODE_ITEM_KEY; 4605 key.offset = 0; 4606 4607 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 4608 if (IS_ERR(inode)) 4609 return PTR_ERR(inode); 4610 4611 if (offset + len > i_size_read(inode)) { 4612 if (offset > i_size_read(inode)) 4613 len = 0; 4614 else 4615 len = offset - i_size_read(inode); 4616 } 4617 if (len == 0) 4618 goto out; 4619 4620 last_index = (offset + len - 1) >> PAGE_SHIFT; 4621 4622 /* initial readahead */ 4623 memset(&sctx->ra, 0, sizeof(struct file_ra_state)); 4624 file_ra_state_init(&sctx->ra, inode->i_mapping); 4625 btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, 4626 last_index - index + 1); 4627 4628 while (index <= last_index) { 4629 unsigned cur_len = min_t(unsigned, len, 4630 PAGE_SIZE - pg_offset); 4631 page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); 4632 if (!page) { 4633 ret = -ENOMEM; 4634 break; 4635 } 4636 4637 if (!PageUptodate(page)) { 4638 btrfs_readpage(NULL, page); 4639 lock_page(page); 4640 if (!PageUptodate(page)) { 4641 unlock_page(page); 4642 put_page(page); 4643 ret = -EIO; 4644 break; 4645 } 4646 } 4647 4648 addr = kmap(page); 4649 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); 4650 kunmap(page); 4651 unlock_page(page); 4652 put_page(page); 4653 index++; 4654 pg_offset = 0; 4655 len -= cur_len; 4656 ret += cur_len; 4657 } 4658 out: 4659 iput(inode); 4660 return ret; 4661 } 4662 4663 /* 4664 * Read some bytes from the current inode/file and send a write command to 4665 * user space. 4666 */ 4667 static int send_write(struct send_ctx *sctx, u64 offset, u32 len) 4668 { 4669 struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; 4670 int ret = 0; 4671 struct fs_path *p; 4672 ssize_t num_read = 0; 4673 4674 p = fs_path_alloc(); 4675 if (!p) 4676 return -ENOMEM; 4677 4678 btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len); 4679 4680 num_read = fill_read_buf(sctx, offset, len); 4681 if (num_read <= 0) { 4682 if (num_read < 0) 4683 ret = num_read; 4684 goto out; 4685 } 4686 4687 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4688 if (ret < 0) 4689 goto out; 4690 4691 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4692 if (ret < 0) 4693 goto out; 4694 4695 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4696 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4697 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); 4698 4699 ret = send_cmd(sctx); 4700 4701 tlv_put_failure: 4702 out: 4703 fs_path_free(p); 4704 if (ret < 0) 4705 return ret; 4706 return num_read; 4707 } 4708 4709 /* 4710 * Send a clone command to user space. 4711 */ 4712 static int send_clone(struct send_ctx *sctx, 4713 u64 offset, u32 len, 4714 struct clone_root *clone_root) 4715 { 4716 int ret = 0; 4717 struct fs_path *p; 4718 u64 gen; 4719 4720 btrfs_debug(sctx->send_root->fs_info, 4721 "send_clone offset=%llu, len=%d, clone_root=%llu, clone_inode=%llu, clone_offset=%llu", 4722 offset, len, clone_root->root->objectid, clone_root->ino, 4723 clone_root->offset); 4724 4725 p = fs_path_alloc(); 4726 if (!p) 4727 return -ENOMEM; 4728 4729 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); 4730 if (ret < 0) 4731 goto out; 4732 4733 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4734 if (ret < 0) 4735 goto out; 4736 4737 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4738 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); 4739 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4740 4741 if (clone_root->root == sctx->send_root) { 4742 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, 4743 &gen, NULL, NULL, NULL, NULL); 4744 if (ret < 0) 4745 goto out; 4746 ret = get_cur_path(sctx, clone_root->ino, gen, p); 4747 } else { 4748 ret = get_inode_path(clone_root->root, clone_root->ino, p); 4749 } 4750 if (ret < 0) 4751 goto out; 4752 4753 /* 4754 * If the parent we're using has a received_uuid set then use that as 4755 * our clone source as that is what we will look for when doing a 4756 * receive. 4757 * 4758 * This covers the case that we create a snapshot off of a received 4759 * subvolume and then use that as the parent and try to receive on a 4760 * different host. 4761 */ 4762 if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid)) 4763 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4764 clone_root->root->root_item.received_uuid); 4765 else 4766 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4767 clone_root->root->root_item.uuid); 4768 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4769 le64_to_cpu(clone_root->root->root_item.ctransid)); 4770 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4771 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4772 clone_root->offset); 4773 4774 ret = send_cmd(sctx); 4775 4776 tlv_put_failure: 4777 out: 4778 fs_path_free(p); 4779 return ret; 4780 } 4781 4782 /* 4783 * Send an update extent command to user space. 4784 */ 4785 static int send_update_extent(struct send_ctx *sctx, 4786 u64 offset, u32 len) 4787 { 4788 int ret = 0; 4789 struct fs_path *p; 4790 4791 p = fs_path_alloc(); 4792 if (!p) 4793 return -ENOMEM; 4794 4795 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); 4796 if (ret < 0) 4797 goto out; 4798 4799 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4800 if (ret < 0) 4801 goto out; 4802 4803 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4804 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4805 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); 4806 4807 ret = send_cmd(sctx); 4808 4809 tlv_put_failure: 4810 out: 4811 fs_path_free(p); 4812 return ret; 4813 } 4814 4815 static int send_hole(struct send_ctx *sctx, u64 end) 4816 { 4817 struct fs_path *p = NULL; 4818 u64 offset = sctx->cur_inode_last_extent; 4819 u64 len; 4820 int ret = 0; 4821 4822 p = fs_path_alloc(); 4823 if (!p) 4824 return -ENOMEM; 4825 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4826 if (ret < 0) 4827 goto tlv_put_failure; 4828 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); 4829 while (offset < end) { 4830 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); 4831 4832 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4833 if (ret < 0) 4834 break; 4835 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4836 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4837 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); 4838 ret = send_cmd(sctx); 4839 if (ret < 0) 4840 break; 4841 offset += len; 4842 } 4843 tlv_put_failure: 4844 fs_path_free(p); 4845 return ret; 4846 } 4847 4848 static int send_extent_data(struct send_ctx *sctx, 4849 const u64 offset, 4850 const u64 len) 4851 { 4852 u64 sent = 0; 4853 4854 if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) 4855 return send_update_extent(sctx, offset, len); 4856 4857 while (sent < len) { 4858 u64 size = len - sent; 4859 int ret; 4860 4861 if (size > BTRFS_SEND_READ_SIZE) 4862 size = BTRFS_SEND_READ_SIZE; 4863 ret = send_write(sctx, offset + sent, size); 4864 if (ret < 0) 4865 return ret; 4866 if (!ret) 4867 break; 4868 sent += ret; 4869 } 4870 return 0; 4871 } 4872 4873 static int clone_range(struct send_ctx *sctx, 4874 struct clone_root *clone_root, 4875 const u64 disk_byte, 4876 u64 data_offset, 4877 u64 offset, 4878 u64 len) 4879 { 4880 struct btrfs_path *path; 4881 struct btrfs_key key; 4882 int ret; 4883 4884 path = alloc_path_for_send(); 4885 if (!path) 4886 return -ENOMEM; 4887 4888 /* 4889 * We can't send a clone operation for the entire range if we find 4890 * extent items in the respective range in the source file that 4891 * refer to different extents or if we find holes. 4892 * So check for that and do a mix of clone and regular write/copy 4893 * operations if needed. 4894 * 4895 * Example: 4896 * 4897 * mkfs.btrfs -f /dev/sda 4898 * mount /dev/sda /mnt 4899 * xfs_io -f -c "pwrite -S 0xaa 0K 100K" /mnt/foo 4900 * cp --reflink=always /mnt/foo /mnt/bar 4901 * xfs_io -c "pwrite -S 0xbb 50K 50K" /mnt/foo 4902 * btrfs subvolume snapshot -r /mnt /mnt/snap 4903 * 4904 * If when we send the snapshot and we are processing file bar (which 4905 * has a higher inode number than foo) we blindly send a clone operation 4906 * for the [0, 100K[ range from foo to bar, the receiver ends up getting 4907 * a file bar that matches the content of file foo - iow, doesn't match 4908 * the content from bar in the original filesystem. 4909 */ 4910 key.objectid = clone_root->ino; 4911 key.type = BTRFS_EXTENT_DATA_KEY; 4912 key.offset = clone_root->offset; 4913 ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0); 4914 if (ret < 0) 4915 goto out; 4916 if (ret > 0 && path->slots[0] > 0) { 4917 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); 4918 if (key.objectid == clone_root->ino && 4919 key.type == BTRFS_EXTENT_DATA_KEY) 4920 path->slots[0]--; 4921 } 4922 4923 while (true) { 4924 struct extent_buffer *leaf = path->nodes[0]; 4925 int slot = path->slots[0]; 4926 struct btrfs_file_extent_item *ei; 4927 u8 type; 4928 u64 ext_len; 4929 u64 clone_len; 4930 4931 if (slot >= btrfs_header_nritems(leaf)) { 4932 ret = btrfs_next_leaf(clone_root->root, path); 4933 if (ret < 0) 4934 goto out; 4935 else if (ret > 0) 4936 break; 4937 continue; 4938 } 4939 4940 btrfs_item_key_to_cpu(leaf, &key, slot); 4941 4942 /* 4943 * We might have an implicit trailing hole (NO_HOLES feature 4944 * enabled). We deal with it after leaving this loop. 4945 */ 4946 if (key.objectid != clone_root->ino || 4947 key.type != BTRFS_EXTENT_DATA_KEY) 4948 break; 4949 4950 ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 4951 type = btrfs_file_extent_type(leaf, ei); 4952 if (type == BTRFS_FILE_EXTENT_INLINE) { 4953 ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); 4954 ext_len = PAGE_ALIGN(ext_len); 4955 } else { 4956 ext_len = btrfs_file_extent_num_bytes(leaf, ei); 4957 } 4958 4959 if (key.offset + ext_len <= clone_root->offset) 4960 goto next; 4961 4962 if (key.offset > clone_root->offset) { 4963 /* Implicit hole, NO_HOLES feature enabled. */ 4964 u64 hole_len = key.offset - clone_root->offset; 4965 4966 if (hole_len > len) 4967 hole_len = len; 4968 ret = send_extent_data(sctx, offset, hole_len); 4969 if (ret < 0) 4970 goto out; 4971 4972 len -= hole_len; 4973 if (len == 0) 4974 break; 4975 offset += hole_len; 4976 clone_root->offset += hole_len; 4977 data_offset += hole_len; 4978 } 4979 4980 if (key.offset >= clone_root->offset + len) 4981 break; 4982 4983 clone_len = min_t(u64, ext_len, len); 4984 4985 if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte && 4986 btrfs_file_extent_offset(leaf, ei) == data_offset) 4987 ret = send_clone(sctx, offset, clone_len, clone_root); 4988 else 4989 ret = send_extent_data(sctx, offset, clone_len); 4990 4991 if (ret < 0) 4992 goto out; 4993 4994 len -= clone_len; 4995 if (len == 0) 4996 break; 4997 offset += clone_len; 4998 clone_root->offset += clone_len; 4999 data_offset += clone_len; 5000 next: 5001 path->slots[0]++; 5002 } 5003 5004 if (len > 0) 5005 ret = send_extent_data(sctx, offset, len); 5006 else 5007 ret = 0; 5008 out: 5009 btrfs_free_path(path); 5010 return ret; 5011 } 5012 5013 static int send_write_or_clone(struct send_ctx *sctx, 5014 struct btrfs_path *path, 5015 struct btrfs_key *key, 5016 struct clone_root *clone_root) 5017 { 5018 int ret = 0; 5019 struct btrfs_file_extent_item *ei; 5020 u64 offset = key->offset; 5021 u64 len; 5022 u8 type; 5023 u64 bs = sctx->send_root->fs_info->sb->s_blocksize; 5024 5025 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 5026 struct btrfs_file_extent_item); 5027 type = btrfs_file_extent_type(path->nodes[0], ei); 5028 if (type == BTRFS_FILE_EXTENT_INLINE) { 5029 len = btrfs_file_extent_inline_len(path->nodes[0], 5030 path->slots[0], ei); 5031 /* 5032 * it is possible the inline item won't cover the whole page, 5033 * but there may be items after this page. Make 5034 * sure to send the whole thing 5035 */ 5036 len = PAGE_ALIGN(len); 5037 } else { 5038 len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 5039 } 5040 5041 if (offset + len > sctx->cur_inode_size) 5042 len = sctx->cur_inode_size - offset; 5043 if (len == 0) { 5044 ret = 0; 5045 goto out; 5046 } 5047 5048 if (clone_root && IS_ALIGNED(offset + len, bs)) { 5049 u64 disk_byte; 5050 u64 data_offset; 5051 5052 disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); 5053 data_offset = btrfs_file_extent_offset(path->nodes[0], ei); 5054 ret = clone_range(sctx, clone_root, disk_byte, data_offset, 5055 offset, len); 5056 } else { 5057 ret = send_extent_data(sctx, offset, len); 5058 } 5059 out: 5060 return ret; 5061 } 5062 5063 static int is_extent_unchanged(struct send_ctx *sctx, 5064 struct btrfs_path *left_path, 5065 struct btrfs_key *ekey) 5066 { 5067 int ret = 0; 5068 struct btrfs_key key; 5069 struct btrfs_path *path = NULL; 5070 struct extent_buffer *eb; 5071 int slot; 5072 struct btrfs_key found_key; 5073 struct btrfs_file_extent_item *ei; 5074 u64 left_disknr; 5075 u64 right_disknr; 5076 u64 left_offset; 5077 u64 right_offset; 5078 u64 left_offset_fixed; 5079 u64 left_len; 5080 u64 right_len; 5081 u64 left_gen; 5082 u64 right_gen; 5083 u8 left_type; 5084 u8 right_type; 5085 5086 path = alloc_path_for_send(); 5087 if (!path) 5088 return -ENOMEM; 5089 5090 eb = left_path->nodes[0]; 5091 slot = left_path->slots[0]; 5092 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 5093 left_type = btrfs_file_extent_type(eb, ei); 5094 5095 if (left_type != BTRFS_FILE_EXTENT_REG) { 5096 ret = 0; 5097 goto out; 5098 } 5099 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 5100 left_len = btrfs_file_extent_num_bytes(eb, ei); 5101 left_offset = btrfs_file_extent_offset(eb, ei); 5102 left_gen = btrfs_file_extent_generation(eb, ei); 5103 5104 /* 5105 * Following comments will refer to these graphics. L is the left 5106 * extents which we are checking at the moment. 1-8 are the right 5107 * extents that we iterate. 5108 * 5109 * |-----L-----| 5110 * |-1-|-2a-|-3-|-4-|-5-|-6-| 5111 * 5112 * |-----L-----| 5113 * |--1--|-2b-|...(same as above) 5114 * 5115 * Alternative situation. Happens on files where extents got split. 5116 * |-----L-----| 5117 * |-----------7-----------|-6-| 5118 * 5119 * Alternative situation. Happens on files which got larger. 5120 * |-----L-----| 5121 * |-8-| 5122 * Nothing follows after 8. 5123 */ 5124 5125 key.objectid = ekey->objectid; 5126 key.type = BTRFS_EXTENT_DATA_KEY; 5127 key.offset = ekey->offset; 5128 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); 5129 if (ret < 0) 5130 goto out; 5131 if (ret) { 5132 ret = 0; 5133 goto out; 5134 } 5135 5136 /* 5137 * Handle special case where the right side has no extents at all. 5138 */ 5139 eb = path->nodes[0]; 5140 slot = path->slots[0]; 5141 btrfs_item_key_to_cpu(eb, &found_key, slot); 5142 if (found_key.objectid != key.objectid || 5143 found_key.type != key.type) { 5144 /* If we're a hole then just pretend nothing changed */ 5145 ret = (left_disknr) ? 0 : 1; 5146 goto out; 5147 } 5148 5149 /* 5150 * We're now on 2a, 2b or 7. 5151 */ 5152 key = found_key; 5153 while (key.offset < ekey->offset + left_len) { 5154 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 5155 right_type = btrfs_file_extent_type(eb, ei); 5156 if (right_type != BTRFS_FILE_EXTENT_REG) { 5157 ret = 0; 5158 goto out; 5159 } 5160 5161 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 5162 right_len = btrfs_file_extent_num_bytes(eb, ei); 5163 right_offset = btrfs_file_extent_offset(eb, ei); 5164 right_gen = btrfs_file_extent_generation(eb, ei); 5165 5166 /* 5167 * Are we at extent 8? If yes, we know the extent is changed. 5168 * This may only happen on the first iteration. 5169 */ 5170 if (found_key.offset + right_len <= ekey->offset) { 5171 /* If we're a hole just pretend nothing changed */ 5172 ret = (left_disknr) ? 0 : 1; 5173 goto out; 5174 } 5175 5176 left_offset_fixed = left_offset; 5177 if (key.offset < ekey->offset) { 5178 /* Fix the right offset for 2a and 7. */ 5179 right_offset += ekey->offset - key.offset; 5180 } else { 5181 /* Fix the left offset for all behind 2a and 2b */ 5182 left_offset_fixed += key.offset - ekey->offset; 5183 } 5184 5185 /* 5186 * Check if we have the same extent. 5187 */ 5188 if (left_disknr != right_disknr || 5189 left_offset_fixed != right_offset || 5190 left_gen != right_gen) { 5191 ret = 0; 5192 goto out; 5193 } 5194 5195 /* 5196 * Go to the next extent. 5197 */ 5198 ret = btrfs_next_item(sctx->parent_root, path); 5199 if (ret < 0) 5200 goto out; 5201 if (!ret) { 5202 eb = path->nodes[0]; 5203 slot = path->slots[0]; 5204 btrfs_item_key_to_cpu(eb, &found_key, slot); 5205 } 5206 if (ret || found_key.objectid != key.objectid || 5207 found_key.type != key.type) { 5208 key.offset += right_len; 5209 break; 5210 } 5211 if (found_key.offset != key.offset + right_len) { 5212 ret = 0; 5213 goto out; 5214 } 5215 key = found_key; 5216 } 5217 5218 /* 5219 * We're now behind the left extent (treat as unchanged) or at the end 5220 * of the right side (treat as changed). 5221 */ 5222 if (key.offset >= ekey->offset + left_len) 5223 ret = 1; 5224 else 5225 ret = 0; 5226 5227 5228 out: 5229 btrfs_free_path(path); 5230 return ret; 5231 } 5232 5233 static int get_last_extent(struct send_ctx *sctx, u64 offset) 5234 { 5235 struct btrfs_path *path; 5236 struct btrfs_root *root = sctx->send_root; 5237 struct btrfs_file_extent_item *fi; 5238 struct btrfs_key key; 5239 u64 extent_end; 5240 u8 type; 5241 int ret; 5242 5243 path = alloc_path_for_send(); 5244 if (!path) 5245 return -ENOMEM; 5246 5247 sctx->cur_inode_last_extent = 0; 5248 5249 key.objectid = sctx->cur_ino; 5250 key.type = BTRFS_EXTENT_DATA_KEY; 5251 key.offset = offset; 5252 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); 5253 if (ret < 0) 5254 goto out; 5255 ret = 0; 5256 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 5257 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) 5258 goto out; 5259 5260 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 5261 struct btrfs_file_extent_item); 5262 type = btrfs_file_extent_type(path->nodes[0], fi); 5263 if (type == BTRFS_FILE_EXTENT_INLINE) { 5264 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5265 path->slots[0], fi); 5266 extent_end = ALIGN(key.offset + size, 5267 sctx->send_root->sectorsize); 5268 } else { 5269 extent_end = key.offset + 5270 btrfs_file_extent_num_bytes(path->nodes[0], fi); 5271 } 5272 sctx->cur_inode_last_extent = extent_end; 5273 out: 5274 btrfs_free_path(path); 5275 return ret; 5276 } 5277 5278 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, 5279 struct btrfs_key *key) 5280 { 5281 struct btrfs_file_extent_item *fi; 5282 u64 extent_end; 5283 u8 type; 5284 int ret = 0; 5285 5286 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) 5287 return 0; 5288 5289 if (sctx->cur_inode_last_extent == (u64)-1) { 5290 ret = get_last_extent(sctx, key->offset - 1); 5291 if (ret) 5292 return ret; 5293 } 5294 5295 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 5296 struct btrfs_file_extent_item); 5297 type = btrfs_file_extent_type(path->nodes[0], fi); 5298 if (type == BTRFS_FILE_EXTENT_INLINE) { 5299 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 5300 path->slots[0], fi); 5301 extent_end = ALIGN(key->offset + size, 5302 sctx->send_root->sectorsize); 5303 } else { 5304 extent_end = key->offset + 5305 btrfs_file_extent_num_bytes(path->nodes[0], fi); 5306 } 5307 5308 if (path->slots[0] == 0 && 5309 sctx->cur_inode_last_extent < key->offset) { 5310 /* 5311 * We might have skipped entire leafs that contained only 5312 * file extent items for our current inode. These leafs have 5313 * a generation number smaller (older) than the one in the 5314 * current leaf and the leaf our last extent came from, and 5315 * are located between these 2 leafs. 5316 */ 5317 ret = get_last_extent(sctx, key->offset - 1); 5318 if (ret) 5319 return ret; 5320 } 5321 5322 if (sctx->cur_inode_last_extent < key->offset) 5323 ret = send_hole(sctx, key->offset); 5324 sctx->cur_inode_last_extent = extent_end; 5325 return ret; 5326 } 5327 5328 static int process_extent(struct send_ctx *sctx, 5329 struct btrfs_path *path, 5330 struct btrfs_key *key) 5331 { 5332 struct clone_root *found_clone = NULL; 5333 int ret = 0; 5334 5335 if (S_ISLNK(sctx->cur_inode_mode)) 5336 return 0; 5337 5338 if (sctx->parent_root && !sctx->cur_inode_new) { 5339 ret = is_extent_unchanged(sctx, path, key); 5340 if (ret < 0) 5341 goto out; 5342 if (ret) { 5343 ret = 0; 5344 goto out_hole; 5345 } 5346 } else { 5347 struct btrfs_file_extent_item *ei; 5348 u8 type; 5349 5350 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 5351 struct btrfs_file_extent_item); 5352 type = btrfs_file_extent_type(path->nodes[0], ei); 5353 if (type == BTRFS_FILE_EXTENT_PREALLOC || 5354 type == BTRFS_FILE_EXTENT_REG) { 5355 /* 5356 * The send spec does not have a prealloc command yet, 5357 * so just leave a hole for prealloc'ed extents until 5358 * we have enough commands queued up to justify rev'ing 5359 * the send spec. 5360 */ 5361 if (type == BTRFS_FILE_EXTENT_PREALLOC) { 5362 ret = 0; 5363 goto out; 5364 } 5365 5366 /* Have a hole, just skip it. */ 5367 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { 5368 ret = 0; 5369 goto out; 5370 } 5371 } 5372 } 5373 5374 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 5375 sctx->cur_inode_size, &found_clone); 5376 if (ret != -ENOENT && ret < 0) 5377 goto out; 5378 5379 ret = send_write_or_clone(sctx, path, key, found_clone); 5380 if (ret) 5381 goto out; 5382 out_hole: 5383 ret = maybe_send_hole(sctx, path, key); 5384 out: 5385 return ret; 5386 } 5387 5388 static int process_all_extents(struct send_ctx *sctx) 5389 { 5390 int ret; 5391 struct btrfs_root *root; 5392 struct btrfs_path *path; 5393 struct btrfs_key key; 5394 struct btrfs_key found_key; 5395 struct extent_buffer *eb; 5396 int slot; 5397 5398 root = sctx->send_root; 5399 path = alloc_path_for_send(); 5400 if (!path) 5401 return -ENOMEM; 5402 5403 key.objectid = sctx->cmp_key->objectid; 5404 key.type = BTRFS_EXTENT_DATA_KEY; 5405 key.offset = 0; 5406 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5407 if (ret < 0) 5408 goto out; 5409 5410 while (1) { 5411 eb = path->nodes[0]; 5412 slot = path->slots[0]; 5413 5414 if (slot >= btrfs_header_nritems(eb)) { 5415 ret = btrfs_next_leaf(root, path); 5416 if (ret < 0) { 5417 goto out; 5418 } else if (ret > 0) { 5419 ret = 0; 5420 break; 5421 } 5422 continue; 5423 } 5424 5425 btrfs_item_key_to_cpu(eb, &found_key, slot); 5426 5427 if (found_key.objectid != key.objectid || 5428 found_key.type != key.type) { 5429 ret = 0; 5430 goto out; 5431 } 5432 5433 ret = process_extent(sctx, path, &found_key); 5434 if (ret < 0) 5435 goto out; 5436 5437 path->slots[0]++; 5438 } 5439 5440 out: 5441 btrfs_free_path(path); 5442 return ret; 5443 } 5444 5445 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, 5446 int *pending_move, 5447 int *refs_processed) 5448 { 5449 int ret = 0; 5450 5451 if (sctx->cur_ino == 0) 5452 goto out; 5453 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && 5454 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) 5455 goto out; 5456 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 5457 goto out; 5458 5459 ret = process_recorded_refs(sctx, pending_move); 5460 if (ret < 0) 5461 goto out; 5462 5463 *refs_processed = 1; 5464 out: 5465 return ret; 5466 } 5467 5468 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) 5469 { 5470 int ret = 0; 5471 u64 left_mode; 5472 u64 left_uid; 5473 u64 left_gid; 5474 u64 right_mode; 5475 u64 right_uid; 5476 u64 right_gid; 5477 int need_chmod = 0; 5478 int need_chown = 0; 5479 int pending_move = 0; 5480 int refs_processed = 0; 5481 5482 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 5483 &refs_processed); 5484 if (ret < 0) 5485 goto out; 5486 5487 /* 5488 * We have processed the refs and thus need to advance send_progress. 5489 * Now, calls to get_cur_xxx will take the updated refs of the current 5490 * inode into account. 5491 * 5492 * On the other hand, if our current inode is a directory and couldn't 5493 * be moved/renamed because its parent was renamed/moved too and it has 5494 * a higher inode number, we can only move/rename our current inode 5495 * after we moved/renamed its parent. Therefore in this case operate on 5496 * the old path (pre move/rename) of our current inode, and the 5497 * move/rename will be performed later. 5498 */ 5499 if (refs_processed && !pending_move) 5500 sctx->send_progress = sctx->cur_ino + 1; 5501 5502 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 5503 goto out; 5504 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 5505 goto out; 5506 5507 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, 5508 &left_mode, &left_uid, &left_gid, NULL); 5509 if (ret < 0) 5510 goto out; 5511 5512 if (!sctx->parent_root || sctx->cur_inode_new) { 5513 need_chown = 1; 5514 if (!S_ISLNK(sctx->cur_inode_mode)) 5515 need_chmod = 1; 5516 } else { 5517 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, 5518 NULL, NULL, &right_mode, &right_uid, 5519 &right_gid, NULL); 5520 if (ret < 0) 5521 goto out; 5522 5523 if (left_uid != right_uid || left_gid != right_gid) 5524 need_chown = 1; 5525 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) 5526 need_chmod = 1; 5527 } 5528 5529 if (S_ISREG(sctx->cur_inode_mode)) { 5530 if (need_send_hole(sctx)) { 5531 if (sctx->cur_inode_last_extent == (u64)-1 || 5532 sctx->cur_inode_last_extent < 5533 sctx->cur_inode_size) { 5534 ret = get_last_extent(sctx, (u64)-1); 5535 if (ret) 5536 goto out; 5537 } 5538 if (sctx->cur_inode_last_extent < 5539 sctx->cur_inode_size) { 5540 ret = send_hole(sctx, sctx->cur_inode_size); 5541 if (ret) 5542 goto out; 5543 } 5544 } 5545 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5546 sctx->cur_inode_size); 5547 if (ret < 0) 5548 goto out; 5549 } 5550 5551 if (need_chown) { 5552 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5553 left_uid, left_gid); 5554 if (ret < 0) 5555 goto out; 5556 } 5557 if (need_chmod) { 5558 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, 5559 left_mode); 5560 if (ret < 0) 5561 goto out; 5562 } 5563 5564 /* 5565 * If other directory inodes depended on our current directory 5566 * inode's move/rename, now do their move/rename operations. 5567 */ 5568 if (!is_waiting_for_move(sctx, sctx->cur_ino)) { 5569 ret = apply_children_dir_moves(sctx); 5570 if (ret) 5571 goto out; 5572 /* 5573 * Need to send that every time, no matter if it actually 5574 * changed between the two trees as we have done changes to 5575 * the inode before. If our inode is a directory and it's 5576 * waiting to be moved/renamed, we will send its utimes when 5577 * it's moved/renamed, therefore we don't need to do it here. 5578 */ 5579 sctx->send_progress = sctx->cur_ino + 1; 5580 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 5581 if (ret < 0) 5582 goto out; 5583 } 5584 5585 out: 5586 return ret; 5587 } 5588 5589 static int changed_inode(struct send_ctx *sctx, 5590 enum btrfs_compare_tree_result result) 5591 { 5592 int ret = 0; 5593 struct btrfs_key *key = sctx->cmp_key; 5594 struct btrfs_inode_item *left_ii = NULL; 5595 struct btrfs_inode_item *right_ii = NULL; 5596 u64 left_gen = 0; 5597 u64 right_gen = 0; 5598 5599 sctx->cur_ino = key->objectid; 5600 sctx->cur_inode_new_gen = 0; 5601 sctx->cur_inode_last_extent = (u64)-1; 5602 5603 /* 5604 * Set send_progress to current inode. This will tell all get_cur_xxx 5605 * functions that the current inode's refs are not updated yet. Later, 5606 * when process_recorded_refs is finished, it is set to cur_ino + 1. 5607 */ 5608 sctx->send_progress = sctx->cur_ino; 5609 5610 if (result == BTRFS_COMPARE_TREE_NEW || 5611 result == BTRFS_COMPARE_TREE_CHANGED) { 5612 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], 5613 sctx->left_path->slots[0], 5614 struct btrfs_inode_item); 5615 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], 5616 left_ii); 5617 } else { 5618 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5619 sctx->right_path->slots[0], 5620 struct btrfs_inode_item); 5621 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5622 right_ii); 5623 } 5624 if (result == BTRFS_COMPARE_TREE_CHANGED) { 5625 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 5626 sctx->right_path->slots[0], 5627 struct btrfs_inode_item); 5628 5629 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 5630 right_ii); 5631 5632 /* 5633 * The cur_ino = root dir case is special here. We can't treat 5634 * the inode as deleted+reused because it would generate a 5635 * stream that tries to delete/mkdir the root dir. 5636 */ 5637 if (left_gen != right_gen && 5638 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5639 sctx->cur_inode_new_gen = 1; 5640 } 5641 5642 if (result == BTRFS_COMPARE_TREE_NEW) { 5643 sctx->cur_inode_gen = left_gen; 5644 sctx->cur_inode_new = 1; 5645 sctx->cur_inode_deleted = 0; 5646 sctx->cur_inode_size = btrfs_inode_size( 5647 sctx->left_path->nodes[0], left_ii); 5648 sctx->cur_inode_mode = btrfs_inode_mode( 5649 sctx->left_path->nodes[0], left_ii); 5650 sctx->cur_inode_rdev = btrfs_inode_rdev( 5651 sctx->left_path->nodes[0], left_ii); 5652 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 5653 ret = send_create_inode_if_needed(sctx); 5654 } else if (result == BTRFS_COMPARE_TREE_DELETED) { 5655 sctx->cur_inode_gen = right_gen; 5656 sctx->cur_inode_new = 0; 5657 sctx->cur_inode_deleted = 1; 5658 sctx->cur_inode_size = btrfs_inode_size( 5659 sctx->right_path->nodes[0], right_ii); 5660 sctx->cur_inode_mode = btrfs_inode_mode( 5661 sctx->right_path->nodes[0], right_ii); 5662 } else if (result == BTRFS_COMPARE_TREE_CHANGED) { 5663 /* 5664 * We need to do some special handling in case the inode was 5665 * reported as changed with a changed generation number. This 5666 * means that the original inode was deleted and new inode 5667 * reused the same inum. So we have to treat the old inode as 5668 * deleted and the new one as new. 5669 */ 5670 if (sctx->cur_inode_new_gen) { 5671 /* 5672 * First, process the inode as if it was deleted. 5673 */ 5674 sctx->cur_inode_gen = right_gen; 5675 sctx->cur_inode_new = 0; 5676 sctx->cur_inode_deleted = 1; 5677 sctx->cur_inode_size = btrfs_inode_size( 5678 sctx->right_path->nodes[0], right_ii); 5679 sctx->cur_inode_mode = btrfs_inode_mode( 5680 sctx->right_path->nodes[0], right_ii); 5681 ret = process_all_refs(sctx, 5682 BTRFS_COMPARE_TREE_DELETED); 5683 if (ret < 0) 5684 goto out; 5685 5686 /* 5687 * Now process the inode as if it was new. 5688 */ 5689 sctx->cur_inode_gen = left_gen; 5690 sctx->cur_inode_new = 1; 5691 sctx->cur_inode_deleted = 0; 5692 sctx->cur_inode_size = btrfs_inode_size( 5693 sctx->left_path->nodes[0], left_ii); 5694 sctx->cur_inode_mode = btrfs_inode_mode( 5695 sctx->left_path->nodes[0], left_ii); 5696 sctx->cur_inode_rdev = btrfs_inode_rdev( 5697 sctx->left_path->nodes[0], left_ii); 5698 ret = send_create_inode_if_needed(sctx); 5699 if (ret < 0) 5700 goto out; 5701 5702 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); 5703 if (ret < 0) 5704 goto out; 5705 /* 5706 * Advance send_progress now as we did not get into 5707 * process_recorded_refs_if_needed in the new_gen case. 5708 */ 5709 sctx->send_progress = sctx->cur_ino + 1; 5710 5711 /* 5712 * Now process all extents and xattrs of the inode as if 5713 * they were all new. 5714 */ 5715 ret = process_all_extents(sctx); 5716 if (ret < 0) 5717 goto out; 5718 ret = process_all_new_xattrs(sctx); 5719 if (ret < 0) 5720 goto out; 5721 } else { 5722 sctx->cur_inode_gen = left_gen; 5723 sctx->cur_inode_new = 0; 5724 sctx->cur_inode_new_gen = 0; 5725 sctx->cur_inode_deleted = 0; 5726 sctx->cur_inode_size = btrfs_inode_size( 5727 sctx->left_path->nodes[0], left_ii); 5728 sctx->cur_inode_mode = btrfs_inode_mode( 5729 sctx->left_path->nodes[0], left_ii); 5730 } 5731 } 5732 5733 out: 5734 return ret; 5735 } 5736 5737 /* 5738 * We have to process new refs before deleted refs, but compare_trees gives us 5739 * the new and deleted refs mixed. To fix this, we record the new/deleted refs 5740 * first and later process them in process_recorded_refs. 5741 * For the cur_inode_new_gen case, we skip recording completely because 5742 * changed_inode did already initiate processing of refs. The reason for this is 5743 * that in this case, compare_tree actually compares the refs of 2 different 5744 * inodes. To fix this, process_all_refs is used in changed_inode to handle all 5745 * refs of the right tree as deleted and all refs of the left tree as new. 5746 */ 5747 static int changed_ref(struct send_ctx *sctx, 5748 enum btrfs_compare_tree_result result) 5749 { 5750 int ret = 0; 5751 5752 if (sctx->cur_ino != sctx->cmp_key->objectid) { 5753 inconsistent_snapshot_error(sctx, result, "reference"); 5754 return -EIO; 5755 } 5756 5757 if (!sctx->cur_inode_new_gen && 5758 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 5759 if (result == BTRFS_COMPARE_TREE_NEW) 5760 ret = record_new_ref(sctx); 5761 else if (result == BTRFS_COMPARE_TREE_DELETED) 5762 ret = record_deleted_ref(sctx); 5763 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5764 ret = record_changed_ref(sctx); 5765 } 5766 5767 return ret; 5768 } 5769 5770 /* 5771 * Process new/deleted/changed xattrs. We skip processing in the 5772 * cur_inode_new_gen case because changed_inode did already initiate processing 5773 * of xattrs. The reason is the same as in changed_ref 5774 */ 5775 static int changed_xattr(struct send_ctx *sctx, 5776 enum btrfs_compare_tree_result result) 5777 { 5778 int ret = 0; 5779 5780 if (sctx->cur_ino != sctx->cmp_key->objectid) { 5781 inconsistent_snapshot_error(sctx, result, "xattr"); 5782 return -EIO; 5783 } 5784 5785 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5786 if (result == BTRFS_COMPARE_TREE_NEW) 5787 ret = process_new_xattr(sctx); 5788 else if (result == BTRFS_COMPARE_TREE_DELETED) 5789 ret = process_deleted_xattr(sctx); 5790 else if (result == BTRFS_COMPARE_TREE_CHANGED) 5791 ret = process_changed_xattr(sctx); 5792 } 5793 5794 return ret; 5795 } 5796 5797 /* 5798 * Process new/deleted/changed extents. We skip processing in the 5799 * cur_inode_new_gen case because changed_inode did already initiate processing 5800 * of extents. The reason is the same as in changed_ref 5801 */ 5802 static int changed_extent(struct send_ctx *sctx, 5803 enum btrfs_compare_tree_result result) 5804 { 5805 int ret = 0; 5806 5807 if (sctx->cur_ino != sctx->cmp_key->objectid) { 5808 inconsistent_snapshot_error(sctx, result, "extent"); 5809 return -EIO; 5810 } 5811 5812 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 5813 if (result != BTRFS_COMPARE_TREE_DELETED) 5814 ret = process_extent(sctx, sctx->left_path, 5815 sctx->cmp_key); 5816 } 5817 5818 return ret; 5819 } 5820 5821 static int dir_changed(struct send_ctx *sctx, u64 dir) 5822 { 5823 u64 orig_gen, new_gen; 5824 int ret; 5825 5826 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, 5827 NULL, NULL); 5828 if (ret) 5829 return ret; 5830 5831 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, 5832 NULL, NULL, NULL); 5833 if (ret) 5834 return ret; 5835 5836 return (orig_gen != new_gen) ? 1 : 0; 5837 } 5838 5839 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, 5840 struct btrfs_key *key) 5841 { 5842 struct btrfs_inode_extref *extref; 5843 struct extent_buffer *leaf; 5844 u64 dirid = 0, last_dirid = 0; 5845 unsigned long ptr; 5846 u32 item_size; 5847 u32 cur_offset = 0; 5848 int ref_name_len; 5849 int ret = 0; 5850 5851 /* Easy case, just check this one dirid */ 5852 if (key->type == BTRFS_INODE_REF_KEY) { 5853 dirid = key->offset; 5854 5855 ret = dir_changed(sctx, dirid); 5856 goto out; 5857 } 5858 5859 leaf = path->nodes[0]; 5860 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 5861 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 5862 while (cur_offset < item_size) { 5863 extref = (struct btrfs_inode_extref *)(ptr + 5864 cur_offset); 5865 dirid = btrfs_inode_extref_parent(leaf, extref); 5866 ref_name_len = btrfs_inode_extref_name_len(leaf, extref); 5867 cur_offset += ref_name_len + sizeof(*extref); 5868 if (dirid == last_dirid) 5869 continue; 5870 ret = dir_changed(sctx, dirid); 5871 if (ret) 5872 break; 5873 last_dirid = dirid; 5874 } 5875 out: 5876 return ret; 5877 } 5878 5879 /* 5880 * Updates compare related fields in sctx and simply forwards to the actual 5881 * changed_xxx functions. 5882 */ 5883 static int changed_cb(struct btrfs_root *left_root, 5884 struct btrfs_root *right_root, 5885 struct btrfs_path *left_path, 5886 struct btrfs_path *right_path, 5887 struct btrfs_key *key, 5888 enum btrfs_compare_tree_result result, 5889 void *ctx) 5890 { 5891 int ret = 0; 5892 struct send_ctx *sctx = ctx; 5893 5894 if (result == BTRFS_COMPARE_TREE_SAME) { 5895 if (key->type == BTRFS_INODE_REF_KEY || 5896 key->type == BTRFS_INODE_EXTREF_KEY) { 5897 ret = compare_refs(sctx, left_path, key); 5898 if (!ret) 5899 return 0; 5900 if (ret < 0) 5901 return ret; 5902 } else if (key->type == BTRFS_EXTENT_DATA_KEY) { 5903 return maybe_send_hole(sctx, left_path, key); 5904 } else { 5905 return 0; 5906 } 5907 result = BTRFS_COMPARE_TREE_CHANGED; 5908 ret = 0; 5909 } 5910 5911 sctx->left_path = left_path; 5912 sctx->right_path = right_path; 5913 sctx->cmp_key = key; 5914 5915 ret = finish_inode_if_needed(sctx, 0); 5916 if (ret < 0) 5917 goto out; 5918 5919 /* Ignore non-FS objects */ 5920 if (key->objectid == BTRFS_FREE_INO_OBJECTID || 5921 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 5922 goto out; 5923 5924 if (key->type == BTRFS_INODE_ITEM_KEY) 5925 ret = changed_inode(sctx, result); 5926 else if (key->type == BTRFS_INODE_REF_KEY || 5927 key->type == BTRFS_INODE_EXTREF_KEY) 5928 ret = changed_ref(sctx, result); 5929 else if (key->type == BTRFS_XATTR_ITEM_KEY) 5930 ret = changed_xattr(sctx, result); 5931 else if (key->type == BTRFS_EXTENT_DATA_KEY) 5932 ret = changed_extent(sctx, result); 5933 5934 out: 5935 return ret; 5936 } 5937 5938 static int full_send_tree(struct send_ctx *sctx) 5939 { 5940 int ret; 5941 struct btrfs_root *send_root = sctx->send_root; 5942 struct btrfs_key key; 5943 struct btrfs_key found_key; 5944 struct btrfs_path *path; 5945 struct extent_buffer *eb; 5946 int slot; 5947 5948 path = alloc_path_for_send(); 5949 if (!path) 5950 return -ENOMEM; 5951 5952 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 5953 key.type = BTRFS_INODE_ITEM_KEY; 5954 key.offset = 0; 5955 5956 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 5957 if (ret < 0) 5958 goto out; 5959 if (ret) 5960 goto out_finish; 5961 5962 while (1) { 5963 eb = path->nodes[0]; 5964 slot = path->slots[0]; 5965 btrfs_item_key_to_cpu(eb, &found_key, slot); 5966 5967 ret = changed_cb(send_root, NULL, path, NULL, 5968 &found_key, BTRFS_COMPARE_TREE_NEW, sctx); 5969 if (ret < 0) 5970 goto out; 5971 5972 key.objectid = found_key.objectid; 5973 key.type = found_key.type; 5974 key.offset = found_key.offset + 1; 5975 5976 ret = btrfs_next_item(send_root, path); 5977 if (ret < 0) 5978 goto out; 5979 if (ret) { 5980 ret = 0; 5981 break; 5982 } 5983 } 5984 5985 out_finish: 5986 ret = finish_inode_if_needed(sctx, 1); 5987 5988 out: 5989 btrfs_free_path(path); 5990 return ret; 5991 } 5992 5993 static int send_subvol(struct send_ctx *sctx) 5994 { 5995 int ret; 5996 5997 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) { 5998 ret = send_header(sctx); 5999 if (ret < 0) 6000 goto out; 6001 } 6002 6003 ret = send_subvol_begin(sctx); 6004 if (ret < 0) 6005 goto out; 6006 6007 if (sctx->parent_root) { 6008 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, 6009 changed_cb, sctx); 6010 if (ret < 0) 6011 goto out; 6012 ret = finish_inode_if_needed(sctx, 1); 6013 if (ret < 0) 6014 goto out; 6015 } else { 6016 ret = full_send_tree(sctx); 6017 if (ret < 0) 6018 goto out; 6019 } 6020 6021 out: 6022 free_recorded_refs(sctx); 6023 return ret; 6024 } 6025 6026 /* 6027 * If orphan cleanup did remove any orphans from a root, it means the tree 6028 * was modified and therefore the commit root is not the same as the current 6029 * root anymore. This is a problem, because send uses the commit root and 6030 * therefore can see inode items that don't exist in the current root anymore, 6031 * and for example make calls to btrfs_iget, which will do tree lookups based 6032 * on the current root and not on the commit root. Those lookups will fail, 6033 * returning a -ESTALE error, and making send fail with that error. So make 6034 * sure a send does not see any orphans we have just removed, and that it will 6035 * see the same inodes regardless of whether a transaction commit happened 6036 * before it started (meaning that the commit root will be the same as the 6037 * current root) or not. 6038 */ 6039 static int ensure_commit_roots_uptodate(struct send_ctx *sctx) 6040 { 6041 int i; 6042 struct btrfs_trans_handle *trans = NULL; 6043 6044 again: 6045 if (sctx->parent_root && 6046 sctx->parent_root->node != sctx->parent_root->commit_root) 6047 goto commit_trans; 6048 6049 for (i = 0; i < sctx->clone_roots_cnt; i++) 6050 if (sctx->clone_roots[i].root->node != 6051 sctx->clone_roots[i].root->commit_root) 6052 goto commit_trans; 6053 6054 if (trans) 6055 return btrfs_end_transaction(trans, sctx->send_root); 6056 6057 return 0; 6058 6059 commit_trans: 6060 /* Use any root, all fs roots will get their commit roots updated. */ 6061 if (!trans) { 6062 trans = btrfs_join_transaction(sctx->send_root); 6063 if (IS_ERR(trans)) 6064 return PTR_ERR(trans); 6065 goto again; 6066 } 6067 6068 return btrfs_commit_transaction(trans, sctx->send_root); 6069 } 6070 6071 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) 6072 { 6073 spin_lock(&root->root_item_lock); 6074 root->send_in_progress--; 6075 /* 6076 * Not much left to do, we don't know why it's unbalanced and 6077 * can't blindly reset it to 0. 6078 */ 6079 if (root->send_in_progress < 0) 6080 btrfs_err(root->fs_info, 6081 "send_in_progres unbalanced %d root %llu", 6082 root->send_in_progress, root->root_key.objectid); 6083 spin_unlock(&root->root_item_lock); 6084 } 6085 6086 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 6087 { 6088 int ret = 0; 6089 struct btrfs_root *send_root; 6090 struct btrfs_root *clone_root; 6091 struct btrfs_fs_info *fs_info; 6092 struct btrfs_ioctl_send_args *arg = NULL; 6093 struct btrfs_key key; 6094 struct send_ctx *sctx = NULL; 6095 u32 i; 6096 u64 *clone_sources_tmp = NULL; 6097 int clone_sources_to_rollback = 0; 6098 unsigned alloc_size; 6099 int sort_clone_roots = 0; 6100 int index; 6101 6102 if (!capable(CAP_SYS_ADMIN)) 6103 return -EPERM; 6104 6105 send_root = BTRFS_I(file_inode(mnt_file))->root; 6106 fs_info = send_root->fs_info; 6107 6108 /* 6109 * The subvolume must remain read-only during send, protect against 6110 * making it RW. This also protects against deletion. 6111 */ 6112 spin_lock(&send_root->root_item_lock); 6113 send_root->send_in_progress++; 6114 spin_unlock(&send_root->root_item_lock); 6115 6116 /* 6117 * This is done when we lookup the root, it should already be complete 6118 * by the time we get here. 6119 */ 6120 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 6121 6122 /* 6123 * Userspace tools do the checks and warn the user if it's 6124 * not RO. 6125 */ 6126 if (!btrfs_root_readonly(send_root)) { 6127 ret = -EPERM; 6128 goto out; 6129 } 6130 6131 arg = memdup_user(arg_, sizeof(*arg)); 6132 if (IS_ERR(arg)) { 6133 ret = PTR_ERR(arg); 6134 arg = NULL; 6135 goto out; 6136 } 6137 6138 if (arg->clone_sources_count > 6139 ULLONG_MAX / sizeof(*arg->clone_sources)) { 6140 ret = -EINVAL; 6141 goto out; 6142 } 6143 6144 if (!access_ok(VERIFY_READ, arg->clone_sources, 6145 sizeof(*arg->clone_sources) * 6146 arg->clone_sources_count)) { 6147 ret = -EFAULT; 6148 goto out; 6149 } 6150 6151 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { 6152 ret = -EINVAL; 6153 goto out; 6154 } 6155 6156 sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL); 6157 if (!sctx) { 6158 ret = -ENOMEM; 6159 goto out; 6160 } 6161 6162 INIT_LIST_HEAD(&sctx->new_refs); 6163 INIT_LIST_HEAD(&sctx->deleted_refs); 6164 INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL); 6165 INIT_LIST_HEAD(&sctx->name_cache_list); 6166 6167 sctx->flags = arg->flags; 6168 6169 sctx->send_filp = fget(arg->send_fd); 6170 if (!sctx->send_filp) { 6171 ret = -EBADF; 6172 goto out; 6173 } 6174 6175 sctx->send_root = send_root; 6176 /* 6177 * Unlikely but possible, if the subvolume is marked for deletion but 6178 * is slow to remove the directory entry, send can still be started 6179 */ 6180 if (btrfs_root_dead(sctx->send_root)) { 6181 ret = -EPERM; 6182 goto out; 6183 } 6184 6185 sctx->clone_roots_cnt = arg->clone_sources_count; 6186 6187 sctx->send_max_size = BTRFS_SEND_BUF_SIZE; 6188 sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN); 6189 if (!sctx->send_buf) { 6190 sctx->send_buf = vmalloc(sctx->send_max_size); 6191 if (!sctx->send_buf) { 6192 ret = -ENOMEM; 6193 goto out; 6194 } 6195 } 6196 6197 sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN); 6198 if (!sctx->read_buf) { 6199 sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); 6200 if (!sctx->read_buf) { 6201 ret = -ENOMEM; 6202 goto out; 6203 } 6204 } 6205 6206 sctx->pending_dir_moves = RB_ROOT; 6207 sctx->waiting_dir_moves = RB_ROOT; 6208 sctx->orphan_dirs = RB_ROOT; 6209 6210 alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1); 6211 6212 sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); 6213 if (!sctx->clone_roots) { 6214 sctx->clone_roots = vzalloc(alloc_size); 6215 if (!sctx->clone_roots) { 6216 ret = -ENOMEM; 6217 goto out; 6218 } 6219 } 6220 6221 alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources); 6222 6223 if (arg->clone_sources_count) { 6224 clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN); 6225 if (!clone_sources_tmp) { 6226 clone_sources_tmp = vmalloc(alloc_size); 6227 if (!clone_sources_tmp) { 6228 ret = -ENOMEM; 6229 goto out; 6230 } 6231 } 6232 6233 ret = copy_from_user(clone_sources_tmp, arg->clone_sources, 6234 alloc_size); 6235 if (ret) { 6236 ret = -EFAULT; 6237 goto out; 6238 } 6239 6240 for (i = 0; i < arg->clone_sources_count; i++) { 6241 key.objectid = clone_sources_tmp[i]; 6242 key.type = BTRFS_ROOT_ITEM_KEY; 6243 key.offset = (u64)-1; 6244 6245 index = srcu_read_lock(&fs_info->subvol_srcu); 6246 6247 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 6248 if (IS_ERR(clone_root)) { 6249 srcu_read_unlock(&fs_info->subvol_srcu, index); 6250 ret = PTR_ERR(clone_root); 6251 goto out; 6252 } 6253 spin_lock(&clone_root->root_item_lock); 6254 if (!btrfs_root_readonly(clone_root) || 6255 btrfs_root_dead(clone_root)) { 6256 spin_unlock(&clone_root->root_item_lock); 6257 srcu_read_unlock(&fs_info->subvol_srcu, index); 6258 ret = -EPERM; 6259 goto out; 6260 } 6261 clone_root->send_in_progress++; 6262 spin_unlock(&clone_root->root_item_lock); 6263 srcu_read_unlock(&fs_info->subvol_srcu, index); 6264 6265 sctx->clone_roots[i].root = clone_root; 6266 clone_sources_to_rollback = i + 1; 6267 } 6268 kvfree(clone_sources_tmp); 6269 clone_sources_tmp = NULL; 6270 } 6271 6272 if (arg->parent_root) { 6273 key.objectid = arg->parent_root; 6274 key.type = BTRFS_ROOT_ITEM_KEY; 6275 key.offset = (u64)-1; 6276 6277 index = srcu_read_lock(&fs_info->subvol_srcu); 6278 6279 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 6280 if (IS_ERR(sctx->parent_root)) { 6281 srcu_read_unlock(&fs_info->subvol_srcu, index); 6282 ret = PTR_ERR(sctx->parent_root); 6283 goto out; 6284 } 6285 6286 spin_lock(&sctx->parent_root->root_item_lock); 6287 sctx->parent_root->send_in_progress++; 6288 if (!btrfs_root_readonly(sctx->parent_root) || 6289 btrfs_root_dead(sctx->parent_root)) { 6290 spin_unlock(&sctx->parent_root->root_item_lock); 6291 srcu_read_unlock(&fs_info->subvol_srcu, index); 6292 ret = -EPERM; 6293 goto out; 6294 } 6295 spin_unlock(&sctx->parent_root->root_item_lock); 6296 6297 srcu_read_unlock(&fs_info->subvol_srcu, index); 6298 } 6299 6300 /* 6301 * Clones from send_root are allowed, but only if the clone source 6302 * is behind the current send position. This is checked while searching 6303 * for possible clone sources. 6304 */ 6305 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; 6306 6307 /* We do a bsearch later */ 6308 sort(sctx->clone_roots, sctx->clone_roots_cnt, 6309 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 6310 NULL); 6311 sort_clone_roots = 1; 6312 6313 ret = ensure_commit_roots_uptodate(sctx); 6314 if (ret) 6315 goto out; 6316 6317 current->journal_info = BTRFS_SEND_TRANS_STUB; 6318 ret = send_subvol(sctx); 6319 current->journal_info = NULL; 6320 if (ret < 0) 6321 goto out; 6322 6323 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) { 6324 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 6325 if (ret < 0) 6326 goto out; 6327 ret = send_cmd(sctx); 6328 if (ret < 0) 6329 goto out; 6330 } 6331 6332 out: 6333 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)); 6334 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) { 6335 struct rb_node *n; 6336 struct pending_dir_move *pm; 6337 6338 n = rb_first(&sctx->pending_dir_moves); 6339 pm = rb_entry(n, struct pending_dir_move, node); 6340 while (!list_empty(&pm->list)) { 6341 struct pending_dir_move *pm2; 6342 6343 pm2 = list_first_entry(&pm->list, 6344 struct pending_dir_move, list); 6345 free_pending_move(sctx, pm2); 6346 } 6347 free_pending_move(sctx, pm); 6348 } 6349 6350 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)); 6351 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) { 6352 struct rb_node *n; 6353 struct waiting_dir_move *dm; 6354 6355 n = rb_first(&sctx->waiting_dir_moves); 6356 dm = rb_entry(n, struct waiting_dir_move, node); 6357 rb_erase(&dm->node, &sctx->waiting_dir_moves); 6358 kfree(dm); 6359 } 6360 6361 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); 6362 while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { 6363 struct rb_node *n; 6364 struct orphan_dir_info *odi; 6365 6366 n = rb_first(&sctx->orphan_dirs); 6367 odi = rb_entry(n, struct orphan_dir_info, node); 6368 free_orphan_dir_info(sctx, odi); 6369 } 6370 6371 if (sort_clone_roots) { 6372 for (i = 0; i < sctx->clone_roots_cnt; i++) 6373 btrfs_root_dec_send_in_progress( 6374 sctx->clone_roots[i].root); 6375 } else { 6376 for (i = 0; sctx && i < clone_sources_to_rollback; i++) 6377 btrfs_root_dec_send_in_progress( 6378 sctx->clone_roots[i].root); 6379 6380 btrfs_root_dec_send_in_progress(send_root); 6381 } 6382 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) 6383 btrfs_root_dec_send_in_progress(sctx->parent_root); 6384 6385 kfree(arg); 6386 kvfree(clone_sources_tmp); 6387 6388 if (sctx) { 6389 if (sctx->send_filp) 6390 fput(sctx->send_filp); 6391 6392 kvfree(sctx->clone_roots); 6393 kvfree(sctx->send_buf); 6394 kvfree(sctx->read_buf); 6395 6396 name_cache_free(sctx); 6397 6398 kfree(sctx); 6399 } 6400 6401 return ret; 6402 } 6403