1 /* 2 * Copyright (C) 2012 Alexander Block. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/bsearch.h> 20 #include <linux/fs.h> 21 #include <linux/file.h> 22 #include <linux/sort.h> 23 #include <linux/mount.h> 24 #include <linux/xattr.h> 25 #include <linux/posix_acl_xattr.h> 26 #include <linux/radix-tree.h> 27 #include <linux/vmalloc.h> 28 #include <linux/string.h> 29 30 #include "send.h" 31 #include "backref.h" 32 #include "hash.h" 33 #include "locking.h" 34 #include "disk-io.h" 35 #include "btrfs_inode.h" 36 #include "transaction.h" 37 38 static int g_verbose = 0; 39 40 #define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__) 41 42 /* 43 * A fs_path is a helper to dynamically build path names with unknown size. 44 * It reallocates the internal buffer on demand. 45 * It allows fast adding of path elements on the right side (normal path) and 46 * fast adding to the left side (reversed path). A reversed path can also be 47 * unreversed if needed. 48 */ 49 struct fs_path { 50 union { 51 struct { 52 char *start; 53 char *end; 54 char *prepared; 55 56 char *buf; 57 int buf_len; 58 unsigned int reversed:1; 59 unsigned int virtual_mem:1; 60 char inline_buf[]; 61 }; 62 char pad[PAGE_SIZE]; 63 }; 64 }; 65 #define FS_PATH_INLINE_SIZE \ 66 (sizeof(struct fs_path) - offsetof(struct fs_path, inline_buf)) 67 68 69 /* reused for each extent */ 70 struct clone_root { 71 struct btrfs_root *root; 72 u64 ino; 73 u64 offset; 74 75 u64 found_refs; 76 }; 77 78 #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 79 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) 80 81 struct send_ctx { 82 struct file *send_filp; 83 loff_t send_off; 84 char *send_buf; 85 u32 send_size; 86 u32 send_max_size; 87 u64 total_send_size; 88 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 89 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ 90 91 struct btrfs_root *send_root; 92 struct btrfs_root *parent_root; 93 struct clone_root *clone_roots; 94 int clone_roots_cnt; 95 96 /* current state of the compare_tree call */ 97 struct btrfs_path *left_path; 98 struct btrfs_path *right_path; 99 struct btrfs_key *cmp_key; 100 101 /* 102 * infos of the currently processed inode. In case of deleted inodes, 103 * these are the values from the deleted inode. 104 */ 105 u64 cur_ino; 106 u64 cur_inode_gen; 107 int cur_inode_new; 108 int cur_inode_new_gen; 109 int cur_inode_deleted; 110 u64 cur_inode_size; 111 u64 cur_inode_mode; 112 u64 cur_inode_last_extent; 113 114 u64 send_progress; 115 116 struct list_head new_refs; 117 struct list_head deleted_refs; 118 119 struct radix_tree_root name_cache; 120 struct list_head name_cache_list; 121 int name_cache_size; 122 123 char *read_buf; 124 125 /* 126 * We process inodes by their increasing order, so if before an 127 * incremental send we reverse the parent/child relationship of 128 * directories such that a directory with a lower inode number was 129 * the parent of a directory with a higher inode number, and the one 130 * becoming the new parent got renamed too, we can't rename/move the 131 * directory with lower inode number when we finish processing it - we 132 * must process the directory with higher inode number first, then 133 * rename/move it and then rename/move the directory with lower inode 134 * number. Example follows. 135 * 136 * Tree state when the first send was performed: 137 * 138 * . 139 * |-- a (ino 257) 140 * |-- b (ino 258) 141 * | 142 * | 143 * |-- c (ino 259) 144 * | |-- d (ino 260) 145 * | 146 * |-- c2 (ino 261) 147 * 148 * Tree state when the second (incremental) send is performed: 149 * 150 * . 151 * |-- a (ino 257) 152 * |-- b (ino 258) 153 * |-- c2 (ino 261) 154 * |-- d2 (ino 260) 155 * |-- cc (ino 259) 156 * 157 * The sequence of steps that lead to the second state was: 158 * 159 * mv /a/b/c/d /a/b/c2/d2 160 * mv /a/b/c /a/b/c2/d2/cc 161 * 162 * "c" has lower inode number, but we can't move it (2nd mv operation) 163 * before we move "d", which has higher inode number. 164 * 165 * So we just memorize which move/rename operations must be performed 166 * later when their respective parent is processed and moved/renamed. 167 */ 168 169 /* Indexed by parent directory inode number. */ 170 struct rb_root pending_dir_moves; 171 172 /* 173 * Reverse index, indexed by the inode number of a directory that 174 * is waiting for the move/rename of its immediate parent before its 175 * own move/rename can be performed. 176 */ 177 struct rb_root waiting_dir_moves; 178 }; 179 180 struct pending_dir_move { 181 struct rb_node node; 182 struct list_head list; 183 u64 parent_ino; 184 u64 ino; 185 u64 gen; 186 struct list_head update_refs; 187 }; 188 189 struct waiting_dir_move { 190 struct rb_node node; 191 u64 ino; 192 }; 193 194 struct name_cache_entry { 195 struct list_head list; 196 /* 197 * radix_tree has only 32bit entries but we need to handle 64bit inums. 198 * We use the lower 32bit of the 64bit inum to store it in the tree. If 199 * more then one inum would fall into the same entry, we use radix_list 200 * to store the additional entries. radix_list is also used to store 201 * entries where two entries have the same inum but different 202 * generations. 203 */ 204 struct list_head radix_list; 205 u64 ino; 206 u64 gen; 207 u64 parent_ino; 208 u64 parent_gen; 209 int ret; 210 int need_later_update; 211 int name_len; 212 char name[]; 213 }; 214 215 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); 216 217 static int need_send_hole(struct send_ctx *sctx) 218 { 219 return (sctx->parent_root && !sctx->cur_inode_new && 220 !sctx->cur_inode_new_gen && !sctx->cur_inode_deleted && 221 S_ISREG(sctx->cur_inode_mode)); 222 } 223 224 static void fs_path_reset(struct fs_path *p) 225 { 226 if (p->reversed) { 227 p->start = p->buf + p->buf_len - 1; 228 p->end = p->start; 229 *p->start = 0; 230 } else { 231 p->start = p->buf; 232 p->end = p->start; 233 *p->start = 0; 234 } 235 } 236 237 static struct fs_path *fs_path_alloc(void) 238 { 239 struct fs_path *p; 240 241 p = kmalloc(sizeof(*p), GFP_NOFS); 242 if (!p) 243 return NULL; 244 p->reversed = 0; 245 p->virtual_mem = 0; 246 p->buf = p->inline_buf; 247 p->buf_len = FS_PATH_INLINE_SIZE; 248 fs_path_reset(p); 249 return p; 250 } 251 252 static struct fs_path *fs_path_alloc_reversed(void) 253 { 254 struct fs_path *p; 255 256 p = fs_path_alloc(); 257 if (!p) 258 return NULL; 259 p->reversed = 1; 260 fs_path_reset(p); 261 return p; 262 } 263 264 static void fs_path_free(struct fs_path *p) 265 { 266 if (!p) 267 return; 268 if (p->buf != p->inline_buf) { 269 if (p->virtual_mem) 270 vfree(p->buf); 271 else 272 kfree(p->buf); 273 } 274 kfree(p); 275 } 276 277 static int fs_path_len(struct fs_path *p) 278 { 279 return p->end - p->start; 280 } 281 282 static int fs_path_ensure_buf(struct fs_path *p, int len) 283 { 284 char *tmp_buf; 285 int path_len; 286 int old_buf_len; 287 288 len++; 289 290 if (p->buf_len >= len) 291 return 0; 292 293 path_len = p->end - p->start; 294 old_buf_len = p->buf_len; 295 len = PAGE_ALIGN(len); 296 297 if (p->buf == p->inline_buf) { 298 tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN); 299 if (!tmp_buf) { 300 tmp_buf = vmalloc(len); 301 if (!tmp_buf) 302 return -ENOMEM; 303 p->virtual_mem = 1; 304 } 305 memcpy(tmp_buf, p->buf, p->buf_len); 306 p->buf = tmp_buf; 307 p->buf_len = len; 308 } else { 309 if (p->virtual_mem) { 310 tmp_buf = vmalloc(len); 311 if (!tmp_buf) 312 return -ENOMEM; 313 memcpy(tmp_buf, p->buf, p->buf_len); 314 vfree(p->buf); 315 } else { 316 tmp_buf = krealloc(p->buf, len, GFP_NOFS); 317 if (!tmp_buf) { 318 tmp_buf = vmalloc(len); 319 if (!tmp_buf) 320 return -ENOMEM; 321 memcpy(tmp_buf, p->buf, p->buf_len); 322 kfree(p->buf); 323 p->virtual_mem = 1; 324 } 325 } 326 p->buf = tmp_buf; 327 p->buf_len = len; 328 } 329 if (p->reversed) { 330 tmp_buf = p->buf + old_buf_len - path_len - 1; 331 p->end = p->buf + p->buf_len - 1; 332 p->start = p->end - path_len; 333 memmove(p->start, tmp_buf, path_len + 1); 334 } else { 335 p->start = p->buf; 336 p->end = p->start + path_len; 337 } 338 return 0; 339 } 340 341 static int fs_path_prepare_for_add(struct fs_path *p, int name_len) 342 { 343 int ret; 344 int new_len; 345 346 new_len = p->end - p->start + name_len; 347 if (p->start != p->end) 348 new_len++; 349 ret = fs_path_ensure_buf(p, new_len); 350 if (ret < 0) 351 goto out; 352 353 if (p->reversed) { 354 if (p->start != p->end) 355 *--p->start = '/'; 356 p->start -= name_len; 357 p->prepared = p->start; 358 } else { 359 if (p->start != p->end) 360 *p->end++ = '/'; 361 p->prepared = p->end; 362 p->end += name_len; 363 *p->end = 0; 364 } 365 366 out: 367 return ret; 368 } 369 370 static int fs_path_add(struct fs_path *p, const char *name, int name_len) 371 { 372 int ret; 373 374 ret = fs_path_prepare_for_add(p, name_len); 375 if (ret < 0) 376 goto out; 377 memcpy(p->prepared, name, name_len); 378 p->prepared = NULL; 379 380 out: 381 return ret; 382 } 383 384 static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) 385 { 386 int ret; 387 388 ret = fs_path_prepare_for_add(p, p2->end - p2->start); 389 if (ret < 0) 390 goto out; 391 memcpy(p->prepared, p2->start, p2->end - p2->start); 392 p->prepared = NULL; 393 394 out: 395 return ret; 396 } 397 398 static int fs_path_add_from_extent_buffer(struct fs_path *p, 399 struct extent_buffer *eb, 400 unsigned long off, int len) 401 { 402 int ret; 403 404 ret = fs_path_prepare_for_add(p, len); 405 if (ret < 0) 406 goto out; 407 408 read_extent_buffer(eb, p->prepared, off, len); 409 p->prepared = NULL; 410 411 out: 412 return ret; 413 } 414 415 static int fs_path_copy(struct fs_path *p, struct fs_path *from) 416 { 417 int ret; 418 419 p->reversed = from->reversed; 420 fs_path_reset(p); 421 422 ret = fs_path_add_path(p, from); 423 424 return ret; 425 } 426 427 428 static void fs_path_unreverse(struct fs_path *p) 429 { 430 char *tmp; 431 int len; 432 433 if (!p->reversed) 434 return; 435 436 tmp = p->start; 437 len = p->end - p->start; 438 p->start = p->buf; 439 p->end = p->start + len; 440 memmove(p->start, tmp, len + 1); 441 p->reversed = 0; 442 } 443 444 static struct btrfs_path *alloc_path_for_send(void) 445 { 446 struct btrfs_path *path; 447 448 path = btrfs_alloc_path(); 449 if (!path) 450 return NULL; 451 path->search_commit_root = 1; 452 path->skip_locking = 1; 453 return path; 454 } 455 456 static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off) 457 { 458 int ret; 459 mm_segment_t old_fs; 460 u32 pos = 0; 461 462 old_fs = get_fs(); 463 set_fs(KERNEL_DS); 464 465 while (pos < len) { 466 ret = vfs_write(filp, (char *)buf + pos, len - pos, off); 467 /* TODO handle that correctly */ 468 /*if (ret == -ERESTARTSYS) { 469 continue; 470 }*/ 471 if (ret < 0) 472 goto out; 473 if (ret == 0) { 474 ret = -EIO; 475 goto out; 476 } 477 pos += ret; 478 } 479 480 ret = 0; 481 482 out: 483 set_fs(old_fs); 484 return ret; 485 } 486 487 static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len) 488 { 489 struct btrfs_tlv_header *hdr; 490 int total_len = sizeof(*hdr) + len; 491 int left = sctx->send_max_size - sctx->send_size; 492 493 if (unlikely(left < total_len)) 494 return -EOVERFLOW; 495 496 hdr = (struct btrfs_tlv_header *) (sctx->send_buf + sctx->send_size); 497 hdr->tlv_type = cpu_to_le16(attr); 498 hdr->tlv_len = cpu_to_le16(len); 499 memcpy(hdr + 1, data, len); 500 sctx->send_size += total_len; 501 502 return 0; 503 } 504 505 #define TLV_PUT_DEFINE_INT(bits) \ 506 static int tlv_put_u##bits(struct send_ctx *sctx, \ 507 u##bits attr, u##bits value) \ 508 { \ 509 __le##bits __tmp = cpu_to_le##bits(value); \ 510 return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \ 511 } 512 513 TLV_PUT_DEFINE_INT(64) 514 515 static int tlv_put_string(struct send_ctx *sctx, u16 attr, 516 const char *str, int len) 517 { 518 if (len == -1) 519 len = strlen(str); 520 return tlv_put(sctx, attr, str, len); 521 } 522 523 static int tlv_put_uuid(struct send_ctx *sctx, u16 attr, 524 const u8 *uuid) 525 { 526 return tlv_put(sctx, attr, uuid, BTRFS_UUID_SIZE); 527 } 528 529 static int tlv_put_btrfs_timespec(struct send_ctx *sctx, u16 attr, 530 struct extent_buffer *eb, 531 struct btrfs_timespec *ts) 532 { 533 struct btrfs_timespec bts; 534 read_extent_buffer(eb, &bts, (unsigned long)ts, sizeof(bts)); 535 return tlv_put(sctx, attr, &bts, sizeof(bts)); 536 } 537 538 539 #define TLV_PUT(sctx, attrtype, attrlen, data) \ 540 do { \ 541 ret = tlv_put(sctx, attrtype, attrlen, data); \ 542 if (ret < 0) \ 543 goto tlv_put_failure; \ 544 } while (0) 545 546 #define TLV_PUT_INT(sctx, attrtype, bits, value) \ 547 do { \ 548 ret = tlv_put_u##bits(sctx, attrtype, value); \ 549 if (ret < 0) \ 550 goto tlv_put_failure; \ 551 } while (0) 552 553 #define TLV_PUT_U8(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 8, data) 554 #define TLV_PUT_U16(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 16, data) 555 #define TLV_PUT_U32(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 32, data) 556 #define TLV_PUT_U64(sctx, attrtype, data) TLV_PUT_INT(sctx, attrtype, 64, data) 557 #define TLV_PUT_STRING(sctx, attrtype, str, len) \ 558 do { \ 559 ret = tlv_put_string(sctx, attrtype, str, len); \ 560 if (ret < 0) \ 561 goto tlv_put_failure; \ 562 } while (0) 563 #define TLV_PUT_PATH(sctx, attrtype, p) \ 564 do { \ 565 ret = tlv_put_string(sctx, attrtype, p->start, \ 566 p->end - p->start); \ 567 if (ret < 0) \ 568 goto tlv_put_failure; \ 569 } while(0) 570 #define TLV_PUT_UUID(sctx, attrtype, uuid) \ 571 do { \ 572 ret = tlv_put_uuid(sctx, attrtype, uuid); \ 573 if (ret < 0) \ 574 goto tlv_put_failure; \ 575 } while (0) 576 #define TLV_PUT_BTRFS_TIMESPEC(sctx, attrtype, eb, ts) \ 577 do { \ 578 ret = tlv_put_btrfs_timespec(sctx, attrtype, eb, ts); \ 579 if (ret < 0) \ 580 goto tlv_put_failure; \ 581 } while (0) 582 583 static int send_header(struct send_ctx *sctx) 584 { 585 struct btrfs_stream_header hdr; 586 587 strcpy(hdr.magic, BTRFS_SEND_STREAM_MAGIC); 588 hdr.version = cpu_to_le32(BTRFS_SEND_STREAM_VERSION); 589 590 return write_buf(sctx->send_filp, &hdr, sizeof(hdr), 591 &sctx->send_off); 592 } 593 594 /* 595 * For each command/item we want to send to userspace, we call this function. 596 */ 597 static int begin_cmd(struct send_ctx *sctx, int cmd) 598 { 599 struct btrfs_cmd_header *hdr; 600 601 if (WARN_ON(!sctx->send_buf)) 602 return -EINVAL; 603 604 BUG_ON(sctx->send_size); 605 606 sctx->send_size += sizeof(*hdr); 607 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 608 hdr->cmd = cpu_to_le16(cmd); 609 610 return 0; 611 } 612 613 static int send_cmd(struct send_ctx *sctx) 614 { 615 int ret; 616 struct btrfs_cmd_header *hdr; 617 u32 crc; 618 619 hdr = (struct btrfs_cmd_header *)sctx->send_buf; 620 hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr)); 621 hdr->crc = 0; 622 623 crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size); 624 hdr->crc = cpu_to_le32(crc); 625 626 ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size, 627 &sctx->send_off); 628 629 sctx->total_send_size += sctx->send_size; 630 sctx->cmd_send_size[le16_to_cpu(hdr->cmd)] += sctx->send_size; 631 sctx->send_size = 0; 632 633 return ret; 634 } 635 636 /* 637 * Sends a move instruction to user space 638 */ 639 static int send_rename(struct send_ctx *sctx, 640 struct fs_path *from, struct fs_path *to) 641 { 642 int ret; 643 644 verbose_printk("btrfs: send_rename %s -> %s\n", from->start, to->start); 645 646 ret = begin_cmd(sctx, BTRFS_SEND_C_RENAME); 647 if (ret < 0) 648 goto out; 649 650 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, from); 651 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_TO, to); 652 653 ret = send_cmd(sctx); 654 655 tlv_put_failure: 656 out: 657 return ret; 658 } 659 660 /* 661 * Sends a link instruction to user space 662 */ 663 static int send_link(struct send_ctx *sctx, 664 struct fs_path *path, struct fs_path *lnk) 665 { 666 int ret; 667 668 verbose_printk("btrfs: send_link %s -> %s\n", path->start, lnk->start); 669 670 ret = begin_cmd(sctx, BTRFS_SEND_C_LINK); 671 if (ret < 0) 672 goto out; 673 674 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 675 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, lnk); 676 677 ret = send_cmd(sctx); 678 679 tlv_put_failure: 680 out: 681 return ret; 682 } 683 684 /* 685 * Sends an unlink instruction to user space 686 */ 687 static int send_unlink(struct send_ctx *sctx, struct fs_path *path) 688 { 689 int ret; 690 691 verbose_printk("btrfs: send_unlink %s\n", path->start); 692 693 ret = begin_cmd(sctx, BTRFS_SEND_C_UNLINK); 694 if (ret < 0) 695 goto out; 696 697 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 698 699 ret = send_cmd(sctx); 700 701 tlv_put_failure: 702 out: 703 return ret; 704 } 705 706 /* 707 * Sends a rmdir instruction to user space 708 */ 709 static int send_rmdir(struct send_ctx *sctx, struct fs_path *path) 710 { 711 int ret; 712 713 verbose_printk("btrfs: send_rmdir %s\n", path->start); 714 715 ret = begin_cmd(sctx, BTRFS_SEND_C_RMDIR); 716 if (ret < 0) 717 goto out; 718 719 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 720 721 ret = send_cmd(sctx); 722 723 tlv_put_failure: 724 out: 725 return ret; 726 } 727 728 /* 729 * Helper function to retrieve some fields from an inode item. 730 */ 731 static int get_inode_info(struct btrfs_root *root, 732 u64 ino, u64 *size, u64 *gen, 733 u64 *mode, u64 *uid, u64 *gid, 734 u64 *rdev) 735 { 736 int ret; 737 struct btrfs_inode_item *ii; 738 struct btrfs_key key; 739 struct btrfs_path *path; 740 741 path = alloc_path_for_send(); 742 if (!path) 743 return -ENOMEM; 744 745 key.objectid = ino; 746 key.type = BTRFS_INODE_ITEM_KEY; 747 key.offset = 0; 748 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 749 if (ret < 0) 750 goto out; 751 if (ret) { 752 ret = -ENOENT; 753 goto out; 754 } 755 756 ii = btrfs_item_ptr(path->nodes[0], path->slots[0], 757 struct btrfs_inode_item); 758 if (size) 759 *size = btrfs_inode_size(path->nodes[0], ii); 760 if (gen) 761 *gen = btrfs_inode_generation(path->nodes[0], ii); 762 if (mode) 763 *mode = btrfs_inode_mode(path->nodes[0], ii); 764 if (uid) 765 *uid = btrfs_inode_uid(path->nodes[0], ii); 766 if (gid) 767 *gid = btrfs_inode_gid(path->nodes[0], ii); 768 if (rdev) 769 *rdev = btrfs_inode_rdev(path->nodes[0], ii); 770 771 out: 772 btrfs_free_path(path); 773 return ret; 774 } 775 776 typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index, 777 struct fs_path *p, 778 void *ctx); 779 780 /* 781 * Helper function to iterate the entries in ONE btrfs_inode_ref or 782 * btrfs_inode_extref. 783 * The iterate callback may return a non zero value to stop iteration. This can 784 * be a negative value for error codes or 1 to simply stop it. 785 * 786 * path must point to the INODE_REF or INODE_EXTREF when called. 787 */ 788 static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path, 789 struct btrfs_key *found_key, int resolve, 790 iterate_inode_ref_t iterate, void *ctx) 791 { 792 struct extent_buffer *eb = path->nodes[0]; 793 struct btrfs_item *item; 794 struct btrfs_inode_ref *iref; 795 struct btrfs_inode_extref *extref; 796 struct btrfs_path *tmp_path; 797 struct fs_path *p; 798 u32 cur = 0; 799 u32 total; 800 int slot = path->slots[0]; 801 u32 name_len; 802 char *start; 803 int ret = 0; 804 int num = 0; 805 int index; 806 u64 dir; 807 unsigned long name_off; 808 unsigned long elem_size; 809 unsigned long ptr; 810 811 p = fs_path_alloc_reversed(); 812 if (!p) 813 return -ENOMEM; 814 815 tmp_path = alloc_path_for_send(); 816 if (!tmp_path) { 817 fs_path_free(p); 818 return -ENOMEM; 819 } 820 821 822 if (found_key->type == BTRFS_INODE_REF_KEY) { 823 ptr = (unsigned long)btrfs_item_ptr(eb, slot, 824 struct btrfs_inode_ref); 825 item = btrfs_item_nr(slot); 826 total = btrfs_item_size(eb, item); 827 elem_size = sizeof(*iref); 828 } else { 829 ptr = btrfs_item_ptr_offset(eb, slot); 830 total = btrfs_item_size_nr(eb, slot); 831 elem_size = sizeof(*extref); 832 } 833 834 while (cur < total) { 835 fs_path_reset(p); 836 837 if (found_key->type == BTRFS_INODE_REF_KEY) { 838 iref = (struct btrfs_inode_ref *)(ptr + cur); 839 name_len = btrfs_inode_ref_name_len(eb, iref); 840 name_off = (unsigned long)(iref + 1); 841 index = btrfs_inode_ref_index(eb, iref); 842 dir = found_key->offset; 843 } else { 844 extref = (struct btrfs_inode_extref *)(ptr + cur); 845 name_len = btrfs_inode_extref_name_len(eb, extref); 846 name_off = (unsigned long)&extref->name; 847 index = btrfs_inode_extref_index(eb, extref); 848 dir = btrfs_inode_extref_parent(eb, extref); 849 } 850 851 if (resolve) { 852 start = btrfs_ref_to_path(root, tmp_path, name_len, 853 name_off, eb, dir, 854 p->buf, p->buf_len); 855 if (IS_ERR(start)) { 856 ret = PTR_ERR(start); 857 goto out; 858 } 859 if (start < p->buf) { 860 /* overflow , try again with larger buffer */ 861 ret = fs_path_ensure_buf(p, 862 p->buf_len + p->buf - start); 863 if (ret < 0) 864 goto out; 865 start = btrfs_ref_to_path(root, tmp_path, 866 name_len, name_off, 867 eb, dir, 868 p->buf, p->buf_len); 869 if (IS_ERR(start)) { 870 ret = PTR_ERR(start); 871 goto out; 872 } 873 BUG_ON(start < p->buf); 874 } 875 p->start = start; 876 } else { 877 ret = fs_path_add_from_extent_buffer(p, eb, name_off, 878 name_len); 879 if (ret < 0) 880 goto out; 881 } 882 883 cur += elem_size + name_len; 884 ret = iterate(num, dir, index, p, ctx); 885 if (ret) 886 goto out; 887 num++; 888 } 889 890 out: 891 btrfs_free_path(tmp_path); 892 fs_path_free(p); 893 return ret; 894 } 895 896 typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, 897 const char *name, int name_len, 898 const char *data, int data_len, 899 u8 type, void *ctx); 900 901 /* 902 * Helper function to iterate the entries in ONE btrfs_dir_item. 903 * The iterate callback may return a non zero value to stop iteration. This can 904 * be a negative value for error codes or 1 to simply stop it. 905 * 906 * path must point to the dir item when called. 907 */ 908 static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, 909 struct btrfs_key *found_key, 910 iterate_dir_item_t iterate, void *ctx) 911 { 912 int ret = 0; 913 struct extent_buffer *eb; 914 struct btrfs_item *item; 915 struct btrfs_dir_item *di; 916 struct btrfs_key di_key; 917 char *buf = NULL; 918 char *buf2 = NULL; 919 int buf_len; 920 int buf_virtual = 0; 921 u32 name_len; 922 u32 data_len; 923 u32 cur; 924 u32 len; 925 u32 total; 926 int slot; 927 int num; 928 u8 type; 929 930 buf_len = PAGE_SIZE; 931 buf = kmalloc(buf_len, GFP_NOFS); 932 if (!buf) { 933 ret = -ENOMEM; 934 goto out; 935 } 936 937 eb = path->nodes[0]; 938 slot = path->slots[0]; 939 item = btrfs_item_nr(slot); 940 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 941 cur = 0; 942 len = 0; 943 total = btrfs_item_size(eb, item); 944 945 num = 0; 946 while (cur < total) { 947 name_len = btrfs_dir_name_len(eb, di); 948 data_len = btrfs_dir_data_len(eb, di); 949 type = btrfs_dir_type(eb, di); 950 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 951 952 if (name_len + data_len > buf_len) { 953 buf_len = PAGE_ALIGN(name_len + data_len); 954 if (buf_virtual) { 955 buf2 = vmalloc(buf_len); 956 if (!buf2) { 957 ret = -ENOMEM; 958 goto out; 959 } 960 vfree(buf); 961 } else { 962 buf2 = krealloc(buf, buf_len, GFP_NOFS); 963 if (!buf2) { 964 buf2 = vmalloc(buf_len); 965 if (!buf2) { 966 ret = -ENOMEM; 967 goto out; 968 } 969 kfree(buf); 970 buf_virtual = 1; 971 } 972 } 973 974 buf = buf2; 975 buf2 = NULL; 976 } 977 978 read_extent_buffer(eb, buf, (unsigned long)(di + 1), 979 name_len + data_len); 980 981 len = sizeof(*di) + name_len + data_len; 982 di = (struct btrfs_dir_item *)((char *)di + len); 983 cur += len; 984 985 ret = iterate(num, &di_key, buf, name_len, buf + name_len, 986 data_len, type, ctx); 987 if (ret < 0) 988 goto out; 989 if (ret) { 990 ret = 0; 991 goto out; 992 } 993 994 num++; 995 } 996 997 out: 998 if (buf_virtual) 999 vfree(buf); 1000 else 1001 kfree(buf); 1002 return ret; 1003 } 1004 1005 static int __copy_first_ref(int num, u64 dir, int index, 1006 struct fs_path *p, void *ctx) 1007 { 1008 int ret; 1009 struct fs_path *pt = ctx; 1010 1011 ret = fs_path_copy(pt, p); 1012 if (ret < 0) 1013 return ret; 1014 1015 /* we want the first only */ 1016 return 1; 1017 } 1018 1019 /* 1020 * Retrieve the first path of an inode. If an inode has more then one 1021 * ref/hardlink, this is ignored. 1022 */ 1023 static int get_inode_path(struct btrfs_root *root, 1024 u64 ino, struct fs_path *path) 1025 { 1026 int ret; 1027 struct btrfs_key key, found_key; 1028 struct btrfs_path *p; 1029 1030 p = alloc_path_for_send(); 1031 if (!p) 1032 return -ENOMEM; 1033 1034 fs_path_reset(path); 1035 1036 key.objectid = ino; 1037 key.type = BTRFS_INODE_REF_KEY; 1038 key.offset = 0; 1039 1040 ret = btrfs_search_slot_for_read(root, &key, p, 1, 0); 1041 if (ret < 0) 1042 goto out; 1043 if (ret) { 1044 ret = 1; 1045 goto out; 1046 } 1047 btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]); 1048 if (found_key.objectid != ino || 1049 (found_key.type != BTRFS_INODE_REF_KEY && 1050 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1051 ret = -ENOENT; 1052 goto out; 1053 } 1054 1055 ret = iterate_inode_ref(root, p, &found_key, 1, 1056 __copy_first_ref, path); 1057 if (ret < 0) 1058 goto out; 1059 ret = 0; 1060 1061 out: 1062 btrfs_free_path(p); 1063 return ret; 1064 } 1065 1066 struct backref_ctx { 1067 struct send_ctx *sctx; 1068 1069 /* number of total found references */ 1070 u64 found; 1071 1072 /* 1073 * used for clones found in send_root. clones found behind cur_objectid 1074 * and cur_offset are not considered as allowed clones. 1075 */ 1076 u64 cur_objectid; 1077 u64 cur_offset; 1078 1079 /* may be truncated in case it's the last extent in a file */ 1080 u64 extent_len; 1081 1082 /* Just to check for bugs in backref resolving */ 1083 int found_itself; 1084 }; 1085 1086 static int __clone_root_cmp_bsearch(const void *key, const void *elt) 1087 { 1088 u64 root = (u64)(uintptr_t)key; 1089 struct clone_root *cr = (struct clone_root *)elt; 1090 1091 if (root < cr->root->objectid) 1092 return -1; 1093 if (root > cr->root->objectid) 1094 return 1; 1095 return 0; 1096 } 1097 1098 static int __clone_root_cmp_sort(const void *e1, const void *e2) 1099 { 1100 struct clone_root *cr1 = (struct clone_root *)e1; 1101 struct clone_root *cr2 = (struct clone_root *)e2; 1102 1103 if (cr1->root->objectid < cr2->root->objectid) 1104 return -1; 1105 if (cr1->root->objectid > cr2->root->objectid) 1106 return 1; 1107 return 0; 1108 } 1109 1110 /* 1111 * Called for every backref that is found for the current extent. 1112 * Results are collected in sctx->clone_roots->ino/offset/found_refs 1113 */ 1114 static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) 1115 { 1116 struct backref_ctx *bctx = ctx_; 1117 struct clone_root *found; 1118 int ret; 1119 u64 i_size; 1120 1121 /* First check if the root is in the list of accepted clone sources */ 1122 found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, 1123 bctx->sctx->clone_roots_cnt, 1124 sizeof(struct clone_root), 1125 __clone_root_cmp_bsearch); 1126 if (!found) 1127 return 0; 1128 1129 if (found->root == bctx->sctx->send_root && 1130 ino == bctx->cur_objectid && 1131 offset == bctx->cur_offset) { 1132 bctx->found_itself = 1; 1133 } 1134 1135 /* 1136 * There are inodes that have extents that lie behind its i_size. Don't 1137 * accept clones from these extents. 1138 */ 1139 ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL, 1140 NULL); 1141 if (ret < 0) 1142 return ret; 1143 1144 if (offset + bctx->extent_len > i_size) 1145 return 0; 1146 1147 /* 1148 * Make sure we don't consider clones from send_root that are 1149 * behind the current inode/offset. 1150 */ 1151 if (found->root == bctx->sctx->send_root) { 1152 /* 1153 * TODO for the moment we don't accept clones from the inode 1154 * that is currently send. We may change this when 1155 * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same 1156 * file. 1157 */ 1158 if (ino >= bctx->cur_objectid) 1159 return 0; 1160 #if 0 1161 if (ino > bctx->cur_objectid) 1162 return 0; 1163 if (offset + bctx->extent_len > bctx->cur_offset) 1164 return 0; 1165 #endif 1166 } 1167 1168 bctx->found++; 1169 found->found_refs++; 1170 if (ino < found->ino) { 1171 found->ino = ino; 1172 found->offset = offset; 1173 } else if (found->ino == ino) { 1174 /* 1175 * same extent found more then once in the same file. 1176 */ 1177 if (found->offset > offset + bctx->extent_len) 1178 found->offset = offset; 1179 } 1180 1181 return 0; 1182 } 1183 1184 /* 1185 * Given an inode, offset and extent item, it finds a good clone for a clone 1186 * instruction. Returns -ENOENT when none could be found. The function makes 1187 * sure that the returned clone is usable at the point where sending is at the 1188 * moment. This means, that no clones are accepted which lie behind the current 1189 * inode+offset. 1190 * 1191 * path must point to the extent item when called. 1192 */ 1193 static int find_extent_clone(struct send_ctx *sctx, 1194 struct btrfs_path *path, 1195 u64 ino, u64 data_offset, 1196 u64 ino_size, 1197 struct clone_root **found) 1198 { 1199 int ret; 1200 int extent_type; 1201 u64 logical; 1202 u64 disk_byte; 1203 u64 num_bytes; 1204 u64 extent_item_pos; 1205 u64 flags = 0; 1206 struct btrfs_file_extent_item *fi; 1207 struct extent_buffer *eb = path->nodes[0]; 1208 struct backref_ctx *backref_ctx = NULL; 1209 struct clone_root *cur_clone_root; 1210 struct btrfs_key found_key; 1211 struct btrfs_path *tmp_path; 1212 int compressed; 1213 u32 i; 1214 1215 tmp_path = alloc_path_for_send(); 1216 if (!tmp_path) 1217 return -ENOMEM; 1218 1219 backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); 1220 if (!backref_ctx) { 1221 ret = -ENOMEM; 1222 goto out; 1223 } 1224 1225 if (data_offset >= ino_size) { 1226 /* 1227 * There may be extents that lie behind the file's size. 1228 * I at least had this in combination with snapshotting while 1229 * writing large files. 1230 */ 1231 ret = 0; 1232 goto out; 1233 } 1234 1235 fi = btrfs_item_ptr(eb, path->slots[0], 1236 struct btrfs_file_extent_item); 1237 extent_type = btrfs_file_extent_type(eb, fi); 1238 if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 1239 ret = -ENOENT; 1240 goto out; 1241 } 1242 compressed = btrfs_file_extent_compression(eb, fi); 1243 1244 num_bytes = btrfs_file_extent_num_bytes(eb, fi); 1245 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 1246 if (disk_byte == 0) { 1247 ret = -ENOENT; 1248 goto out; 1249 } 1250 logical = disk_byte + btrfs_file_extent_offset(eb, fi); 1251 1252 ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path, 1253 &found_key, &flags); 1254 btrfs_release_path(tmp_path); 1255 1256 if (ret < 0) 1257 goto out; 1258 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1259 ret = -EIO; 1260 goto out; 1261 } 1262 1263 /* 1264 * Setup the clone roots. 1265 */ 1266 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1267 cur_clone_root = sctx->clone_roots + i; 1268 cur_clone_root->ino = (u64)-1; 1269 cur_clone_root->offset = 0; 1270 cur_clone_root->found_refs = 0; 1271 } 1272 1273 backref_ctx->sctx = sctx; 1274 backref_ctx->found = 0; 1275 backref_ctx->cur_objectid = ino; 1276 backref_ctx->cur_offset = data_offset; 1277 backref_ctx->found_itself = 0; 1278 backref_ctx->extent_len = num_bytes; 1279 1280 /* 1281 * The last extent of a file may be too large due to page alignment. 1282 * We need to adjust extent_len in this case so that the checks in 1283 * __iterate_backrefs work. 1284 */ 1285 if (data_offset + num_bytes >= ino_size) 1286 backref_ctx->extent_len = ino_size - data_offset; 1287 1288 /* 1289 * Now collect all backrefs. 1290 */ 1291 if (compressed == BTRFS_COMPRESS_NONE) 1292 extent_item_pos = logical - found_key.objectid; 1293 else 1294 extent_item_pos = 0; 1295 1296 extent_item_pos = logical - found_key.objectid; 1297 ret = iterate_extent_inodes(sctx->send_root->fs_info, 1298 found_key.objectid, extent_item_pos, 1, 1299 __iterate_backrefs, backref_ctx); 1300 1301 if (ret < 0) 1302 goto out; 1303 1304 if (!backref_ctx->found_itself) { 1305 /* found a bug in backref code? */ 1306 ret = -EIO; 1307 btrfs_err(sctx->send_root->fs_info, "did not find backref in " 1308 "send_root. inode=%llu, offset=%llu, " 1309 "disk_byte=%llu found extent=%llu\n", 1310 ino, data_offset, disk_byte, found_key.objectid); 1311 goto out; 1312 } 1313 1314 verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " 1315 "ino=%llu, " 1316 "num_bytes=%llu, logical=%llu\n", 1317 data_offset, ino, num_bytes, logical); 1318 1319 if (!backref_ctx->found) 1320 verbose_printk("btrfs: no clones found\n"); 1321 1322 cur_clone_root = NULL; 1323 for (i = 0; i < sctx->clone_roots_cnt; i++) { 1324 if (sctx->clone_roots[i].found_refs) { 1325 if (!cur_clone_root) 1326 cur_clone_root = sctx->clone_roots + i; 1327 else if (sctx->clone_roots[i].root == sctx->send_root) 1328 /* prefer clones from send_root over others */ 1329 cur_clone_root = sctx->clone_roots + i; 1330 } 1331 1332 } 1333 1334 if (cur_clone_root) { 1335 if (compressed != BTRFS_COMPRESS_NONE) { 1336 /* 1337 * Offsets given by iterate_extent_inodes() are relative 1338 * to the start of the extent, we need to add logical 1339 * offset from the file extent item. 1340 * (See why at backref.c:check_extent_in_eb()) 1341 */ 1342 cur_clone_root->offset += btrfs_file_extent_offset(eb, 1343 fi); 1344 } 1345 *found = cur_clone_root; 1346 ret = 0; 1347 } else { 1348 ret = -ENOENT; 1349 } 1350 1351 out: 1352 btrfs_free_path(tmp_path); 1353 kfree(backref_ctx); 1354 return ret; 1355 } 1356 1357 static int read_symlink(struct btrfs_root *root, 1358 u64 ino, 1359 struct fs_path *dest) 1360 { 1361 int ret; 1362 struct btrfs_path *path; 1363 struct btrfs_key key; 1364 struct btrfs_file_extent_item *ei; 1365 u8 type; 1366 u8 compression; 1367 unsigned long off; 1368 int len; 1369 1370 path = alloc_path_for_send(); 1371 if (!path) 1372 return -ENOMEM; 1373 1374 key.objectid = ino; 1375 key.type = BTRFS_EXTENT_DATA_KEY; 1376 key.offset = 0; 1377 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1378 if (ret < 0) 1379 goto out; 1380 BUG_ON(ret); 1381 1382 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 1383 struct btrfs_file_extent_item); 1384 type = btrfs_file_extent_type(path->nodes[0], ei); 1385 compression = btrfs_file_extent_compression(path->nodes[0], ei); 1386 BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); 1387 BUG_ON(compression); 1388 1389 off = btrfs_file_extent_inline_start(ei); 1390 len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); 1391 1392 ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); 1393 1394 out: 1395 btrfs_free_path(path); 1396 return ret; 1397 } 1398 1399 /* 1400 * Helper function to generate a file name that is unique in the root of 1401 * send_root and parent_root. This is used to generate names for orphan inodes. 1402 */ 1403 static int gen_unique_name(struct send_ctx *sctx, 1404 u64 ino, u64 gen, 1405 struct fs_path *dest) 1406 { 1407 int ret = 0; 1408 struct btrfs_path *path; 1409 struct btrfs_dir_item *di; 1410 char tmp[64]; 1411 int len; 1412 u64 idx = 0; 1413 1414 path = alloc_path_for_send(); 1415 if (!path) 1416 return -ENOMEM; 1417 1418 while (1) { 1419 len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", 1420 ino, gen, idx); 1421 if (len >= sizeof(tmp)) { 1422 /* should really not happen */ 1423 ret = -EOVERFLOW; 1424 goto out; 1425 } 1426 1427 di = btrfs_lookup_dir_item(NULL, sctx->send_root, 1428 path, BTRFS_FIRST_FREE_OBJECTID, 1429 tmp, strlen(tmp), 0); 1430 btrfs_release_path(path); 1431 if (IS_ERR(di)) { 1432 ret = PTR_ERR(di); 1433 goto out; 1434 } 1435 if (di) { 1436 /* not unique, try again */ 1437 idx++; 1438 continue; 1439 } 1440 1441 if (!sctx->parent_root) { 1442 /* unique */ 1443 ret = 0; 1444 break; 1445 } 1446 1447 di = btrfs_lookup_dir_item(NULL, sctx->parent_root, 1448 path, BTRFS_FIRST_FREE_OBJECTID, 1449 tmp, strlen(tmp), 0); 1450 btrfs_release_path(path); 1451 if (IS_ERR(di)) { 1452 ret = PTR_ERR(di); 1453 goto out; 1454 } 1455 if (di) { 1456 /* not unique, try again */ 1457 idx++; 1458 continue; 1459 } 1460 /* unique */ 1461 break; 1462 } 1463 1464 ret = fs_path_add(dest, tmp, strlen(tmp)); 1465 1466 out: 1467 btrfs_free_path(path); 1468 return ret; 1469 } 1470 1471 enum inode_state { 1472 inode_state_no_change, 1473 inode_state_will_create, 1474 inode_state_did_create, 1475 inode_state_will_delete, 1476 inode_state_did_delete, 1477 }; 1478 1479 static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) 1480 { 1481 int ret; 1482 int left_ret; 1483 int right_ret; 1484 u64 left_gen; 1485 u64 right_gen; 1486 1487 ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL, 1488 NULL, NULL); 1489 if (ret < 0 && ret != -ENOENT) 1490 goto out; 1491 left_ret = ret; 1492 1493 if (!sctx->parent_root) { 1494 right_ret = -ENOENT; 1495 } else { 1496 ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen, 1497 NULL, NULL, NULL, NULL); 1498 if (ret < 0 && ret != -ENOENT) 1499 goto out; 1500 right_ret = ret; 1501 } 1502 1503 if (!left_ret && !right_ret) { 1504 if (left_gen == gen && right_gen == gen) { 1505 ret = inode_state_no_change; 1506 } else if (left_gen == gen) { 1507 if (ino < sctx->send_progress) 1508 ret = inode_state_did_create; 1509 else 1510 ret = inode_state_will_create; 1511 } else if (right_gen == gen) { 1512 if (ino < sctx->send_progress) 1513 ret = inode_state_did_delete; 1514 else 1515 ret = inode_state_will_delete; 1516 } else { 1517 ret = -ENOENT; 1518 } 1519 } else if (!left_ret) { 1520 if (left_gen == gen) { 1521 if (ino < sctx->send_progress) 1522 ret = inode_state_did_create; 1523 else 1524 ret = inode_state_will_create; 1525 } else { 1526 ret = -ENOENT; 1527 } 1528 } else if (!right_ret) { 1529 if (right_gen == gen) { 1530 if (ino < sctx->send_progress) 1531 ret = inode_state_did_delete; 1532 else 1533 ret = inode_state_will_delete; 1534 } else { 1535 ret = -ENOENT; 1536 } 1537 } else { 1538 ret = -ENOENT; 1539 } 1540 1541 out: 1542 return ret; 1543 } 1544 1545 static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) 1546 { 1547 int ret; 1548 1549 ret = get_cur_inode_state(sctx, ino, gen); 1550 if (ret < 0) 1551 goto out; 1552 1553 if (ret == inode_state_no_change || 1554 ret == inode_state_did_create || 1555 ret == inode_state_will_delete) 1556 ret = 1; 1557 else 1558 ret = 0; 1559 1560 out: 1561 return ret; 1562 } 1563 1564 /* 1565 * Helper function to lookup a dir item in a dir. 1566 */ 1567 static int lookup_dir_item_inode(struct btrfs_root *root, 1568 u64 dir, const char *name, int name_len, 1569 u64 *found_inode, 1570 u8 *found_type) 1571 { 1572 int ret = 0; 1573 struct btrfs_dir_item *di; 1574 struct btrfs_key key; 1575 struct btrfs_path *path; 1576 1577 path = alloc_path_for_send(); 1578 if (!path) 1579 return -ENOMEM; 1580 1581 di = btrfs_lookup_dir_item(NULL, root, path, 1582 dir, name, name_len, 0); 1583 if (!di) { 1584 ret = -ENOENT; 1585 goto out; 1586 } 1587 if (IS_ERR(di)) { 1588 ret = PTR_ERR(di); 1589 goto out; 1590 } 1591 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1592 *found_inode = key.objectid; 1593 *found_type = btrfs_dir_type(path->nodes[0], di); 1594 1595 out: 1596 btrfs_free_path(path); 1597 return ret; 1598 } 1599 1600 /* 1601 * Looks up the first btrfs_inode_ref of a given ino. It returns the parent dir, 1602 * generation of the parent dir and the name of the dir entry. 1603 */ 1604 static int get_first_ref(struct btrfs_root *root, u64 ino, 1605 u64 *dir, u64 *dir_gen, struct fs_path *name) 1606 { 1607 int ret; 1608 struct btrfs_key key; 1609 struct btrfs_key found_key; 1610 struct btrfs_path *path; 1611 int len; 1612 u64 parent_dir; 1613 1614 path = alloc_path_for_send(); 1615 if (!path) 1616 return -ENOMEM; 1617 1618 key.objectid = ino; 1619 key.type = BTRFS_INODE_REF_KEY; 1620 key.offset = 0; 1621 1622 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 1623 if (ret < 0) 1624 goto out; 1625 if (!ret) 1626 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1627 path->slots[0]); 1628 if (ret || found_key.objectid != ino || 1629 (found_key.type != BTRFS_INODE_REF_KEY && 1630 found_key.type != BTRFS_INODE_EXTREF_KEY)) { 1631 ret = -ENOENT; 1632 goto out; 1633 } 1634 1635 if (key.type == BTRFS_INODE_REF_KEY) { 1636 struct btrfs_inode_ref *iref; 1637 iref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1638 struct btrfs_inode_ref); 1639 len = btrfs_inode_ref_name_len(path->nodes[0], iref); 1640 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1641 (unsigned long)(iref + 1), 1642 len); 1643 parent_dir = found_key.offset; 1644 } else { 1645 struct btrfs_inode_extref *extref; 1646 extref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1647 struct btrfs_inode_extref); 1648 len = btrfs_inode_extref_name_len(path->nodes[0], extref); 1649 ret = fs_path_add_from_extent_buffer(name, path->nodes[0], 1650 (unsigned long)&extref->name, len); 1651 parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref); 1652 } 1653 if (ret < 0) 1654 goto out; 1655 btrfs_release_path(path); 1656 1657 ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL, NULL, 1658 NULL, NULL); 1659 if (ret < 0) 1660 goto out; 1661 1662 *dir = parent_dir; 1663 1664 out: 1665 btrfs_free_path(path); 1666 return ret; 1667 } 1668 1669 static int is_first_ref(struct btrfs_root *root, 1670 u64 ino, u64 dir, 1671 const char *name, int name_len) 1672 { 1673 int ret; 1674 struct fs_path *tmp_name; 1675 u64 tmp_dir; 1676 u64 tmp_dir_gen; 1677 1678 tmp_name = fs_path_alloc(); 1679 if (!tmp_name) 1680 return -ENOMEM; 1681 1682 ret = get_first_ref(root, ino, &tmp_dir, &tmp_dir_gen, tmp_name); 1683 if (ret < 0) 1684 goto out; 1685 1686 if (dir != tmp_dir || name_len != fs_path_len(tmp_name)) { 1687 ret = 0; 1688 goto out; 1689 } 1690 1691 ret = !memcmp(tmp_name->start, name, name_len); 1692 1693 out: 1694 fs_path_free(tmp_name); 1695 return ret; 1696 } 1697 1698 /* 1699 * Used by process_recorded_refs to determine if a new ref would overwrite an 1700 * already existing ref. In case it detects an overwrite, it returns the 1701 * inode/gen in who_ino/who_gen. 1702 * When an overwrite is detected, process_recorded_refs does proper orphanizing 1703 * to make sure later references to the overwritten inode are possible. 1704 * Orphanizing is however only required for the first ref of an inode. 1705 * process_recorded_refs does an additional is_first_ref check to see if 1706 * orphanizing is really required. 1707 */ 1708 static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, 1709 const char *name, int name_len, 1710 u64 *who_ino, u64 *who_gen) 1711 { 1712 int ret = 0; 1713 u64 gen; 1714 u64 other_inode = 0; 1715 u8 other_type = 0; 1716 1717 if (!sctx->parent_root) 1718 goto out; 1719 1720 ret = is_inode_existent(sctx, dir, dir_gen); 1721 if (ret <= 0) 1722 goto out; 1723 1724 /* 1725 * If we have a parent root we need to verify that the parent dir was 1726 * not delted and then re-created, if it was then we have no overwrite 1727 * and we can just unlink this entry. 1728 */ 1729 if (sctx->parent_root) { 1730 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, 1731 NULL, NULL, NULL); 1732 if (ret < 0 && ret != -ENOENT) 1733 goto out; 1734 if (ret) { 1735 ret = 0; 1736 goto out; 1737 } 1738 if (gen != dir_gen) 1739 goto out; 1740 } 1741 1742 ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, 1743 &other_inode, &other_type); 1744 if (ret < 0 && ret != -ENOENT) 1745 goto out; 1746 if (ret) { 1747 ret = 0; 1748 goto out; 1749 } 1750 1751 /* 1752 * Check if the overwritten ref was already processed. If yes, the ref 1753 * was already unlinked/moved, so we can safely assume that we will not 1754 * overwrite anything at this point in time. 1755 */ 1756 if (other_inode > sctx->send_progress) { 1757 ret = get_inode_info(sctx->parent_root, other_inode, NULL, 1758 who_gen, NULL, NULL, NULL, NULL); 1759 if (ret < 0) 1760 goto out; 1761 1762 ret = 1; 1763 *who_ino = other_inode; 1764 } else { 1765 ret = 0; 1766 } 1767 1768 out: 1769 return ret; 1770 } 1771 1772 /* 1773 * Checks if the ref was overwritten by an already processed inode. This is 1774 * used by __get_cur_name_and_parent to find out if the ref was orphanized and 1775 * thus the orphan name needs be used. 1776 * process_recorded_refs also uses it to avoid unlinking of refs that were 1777 * overwritten. 1778 */ 1779 static int did_overwrite_ref(struct send_ctx *sctx, 1780 u64 dir, u64 dir_gen, 1781 u64 ino, u64 ino_gen, 1782 const char *name, int name_len) 1783 { 1784 int ret = 0; 1785 u64 gen; 1786 u64 ow_inode; 1787 u8 other_type; 1788 1789 if (!sctx->parent_root) 1790 goto out; 1791 1792 ret = is_inode_existent(sctx, dir, dir_gen); 1793 if (ret <= 0) 1794 goto out; 1795 1796 /* check if the ref was overwritten by another ref */ 1797 ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, 1798 &ow_inode, &other_type); 1799 if (ret < 0 && ret != -ENOENT) 1800 goto out; 1801 if (ret) { 1802 /* was never and will never be overwritten */ 1803 ret = 0; 1804 goto out; 1805 } 1806 1807 ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL, 1808 NULL, NULL); 1809 if (ret < 0) 1810 goto out; 1811 1812 if (ow_inode == ino && gen == ino_gen) { 1813 ret = 0; 1814 goto out; 1815 } 1816 1817 /* we know that it is or will be overwritten. check this now */ 1818 if (ow_inode < sctx->send_progress) 1819 ret = 1; 1820 else 1821 ret = 0; 1822 1823 out: 1824 return ret; 1825 } 1826 1827 /* 1828 * Same as did_overwrite_ref, but also checks if it is the first ref of an inode 1829 * that got overwritten. This is used by process_recorded_refs to determine 1830 * if it has to use the path as returned by get_cur_path or the orphan name. 1831 */ 1832 static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) 1833 { 1834 int ret = 0; 1835 struct fs_path *name = NULL; 1836 u64 dir; 1837 u64 dir_gen; 1838 1839 if (!sctx->parent_root) 1840 goto out; 1841 1842 name = fs_path_alloc(); 1843 if (!name) 1844 return -ENOMEM; 1845 1846 ret = get_first_ref(sctx->parent_root, ino, &dir, &dir_gen, name); 1847 if (ret < 0) 1848 goto out; 1849 1850 ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, 1851 name->start, fs_path_len(name)); 1852 1853 out: 1854 fs_path_free(name); 1855 return ret; 1856 } 1857 1858 /* 1859 * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit, 1860 * so we need to do some special handling in case we have clashes. This function 1861 * takes care of this with the help of name_cache_entry::radix_list. 1862 * In case of error, nce is kfreed. 1863 */ 1864 static int name_cache_insert(struct send_ctx *sctx, 1865 struct name_cache_entry *nce) 1866 { 1867 int ret = 0; 1868 struct list_head *nce_head; 1869 1870 nce_head = radix_tree_lookup(&sctx->name_cache, 1871 (unsigned long)nce->ino); 1872 if (!nce_head) { 1873 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1874 if (!nce_head) { 1875 kfree(nce); 1876 return -ENOMEM; 1877 } 1878 INIT_LIST_HEAD(nce_head); 1879 1880 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1881 if (ret < 0) { 1882 kfree(nce_head); 1883 kfree(nce); 1884 return ret; 1885 } 1886 } 1887 list_add_tail(&nce->radix_list, nce_head); 1888 list_add_tail(&nce->list, &sctx->name_cache_list); 1889 sctx->name_cache_size++; 1890 1891 return ret; 1892 } 1893 1894 static void name_cache_delete(struct send_ctx *sctx, 1895 struct name_cache_entry *nce) 1896 { 1897 struct list_head *nce_head; 1898 1899 nce_head = radix_tree_lookup(&sctx->name_cache, 1900 (unsigned long)nce->ino); 1901 BUG_ON(!nce_head); 1902 1903 list_del(&nce->radix_list); 1904 list_del(&nce->list); 1905 sctx->name_cache_size--; 1906 1907 if (list_empty(nce_head)) { 1908 radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); 1909 kfree(nce_head); 1910 } 1911 } 1912 1913 static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, 1914 u64 ino, u64 gen) 1915 { 1916 struct list_head *nce_head; 1917 struct name_cache_entry *cur; 1918 1919 nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino); 1920 if (!nce_head) 1921 return NULL; 1922 1923 list_for_each_entry(cur, nce_head, radix_list) { 1924 if (cur->ino == ino && cur->gen == gen) 1925 return cur; 1926 } 1927 return NULL; 1928 } 1929 1930 /* 1931 * Removes the entry from the list and adds it back to the end. This marks the 1932 * entry as recently used so that name_cache_clean_unused does not remove it. 1933 */ 1934 static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) 1935 { 1936 list_del(&nce->list); 1937 list_add_tail(&nce->list, &sctx->name_cache_list); 1938 } 1939 1940 /* 1941 * Remove some entries from the beginning of name_cache_list. 1942 */ 1943 static void name_cache_clean_unused(struct send_ctx *sctx) 1944 { 1945 struct name_cache_entry *nce; 1946 1947 if (sctx->name_cache_size < SEND_CTX_NAME_CACHE_CLEAN_SIZE) 1948 return; 1949 1950 while (sctx->name_cache_size > SEND_CTX_MAX_NAME_CACHE_SIZE) { 1951 nce = list_entry(sctx->name_cache_list.next, 1952 struct name_cache_entry, list); 1953 name_cache_delete(sctx, nce); 1954 kfree(nce); 1955 } 1956 } 1957 1958 static void name_cache_free(struct send_ctx *sctx) 1959 { 1960 struct name_cache_entry *nce; 1961 1962 while (!list_empty(&sctx->name_cache_list)) { 1963 nce = list_entry(sctx->name_cache_list.next, 1964 struct name_cache_entry, list); 1965 name_cache_delete(sctx, nce); 1966 kfree(nce); 1967 } 1968 } 1969 1970 /* 1971 * Used by get_cur_path for each ref up to the root. 1972 * Returns 0 if it succeeded. 1973 * Returns 1 if the inode is not existent or got overwritten. In that case, the 1974 * name is an orphan name. This instructs get_cur_path to stop iterating. If 1 1975 * is returned, parent_ino/parent_gen are not guaranteed to be valid. 1976 * Returns <0 in case of error. 1977 */ 1978 static int __get_cur_name_and_parent(struct send_ctx *sctx, 1979 u64 ino, u64 gen, 1980 int skip_name_cache, 1981 u64 *parent_ino, 1982 u64 *parent_gen, 1983 struct fs_path *dest) 1984 { 1985 int ret; 1986 int nce_ret; 1987 struct btrfs_path *path = NULL; 1988 struct name_cache_entry *nce = NULL; 1989 1990 if (skip_name_cache) 1991 goto get_ref; 1992 /* 1993 * First check if we already did a call to this function with the same 1994 * ino/gen. If yes, check if the cache entry is still up-to-date. If yes 1995 * return the cached result. 1996 */ 1997 nce = name_cache_search(sctx, ino, gen); 1998 if (nce) { 1999 if (ino < sctx->send_progress && nce->need_later_update) { 2000 name_cache_delete(sctx, nce); 2001 kfree(nce); 2002 nce = NULL; 2003 } else { 2004 name_cache_used(sctx, nce); 2005 *parent_ino = nce->parent_ino; 2006 *parent_gen = nce->parent_gen; 2007 ret = fs_path_add(dest, nce->name, nce->name_len); 2008 if (ret < 0) 2009 goto out; 2010 ret = nce->ret; 2011 goto out; 2012 } 2013 } 2014 2015 path = alloc_path_for_send(); 2016 if (!path) 2017 return -ENOMEM; 2018 2019 /* 2020 * If the inode is not existent yet, add the orphan name and return 1. 2021 * This should only happen for the parent dir that we determine in 2022 * __record_new_ref 2023 */ 2024 ret = is_inode_existent(sctx, ino, gen); 2025 if (ret < 0) 2026 goto out; 2027 2028 if (!ret) { 2029 ret = gen_unique_name(sctx, ino, gen, dest); 2030 if (ret < 0) 2031 goto out; 2032 ret = 1; 2033 goto out_cache; 2034 } 2035 2036 get_ref: 2037 /* 2038 * Depending on whether the inode was already processed or not, use 2039 * send_root or parent_root for ref lookup. 2040 */ 2041 if (ino < sctx->send_progress && !skip_name_cache) 2042 ret = get_first_ref(sctx->send_root, ino, 2043 parent_ino, parent_gen, dest); 2044 else 2045 ret = get_first_ref(sctx->parent_root, ino, 2046 parent_ino, parent_gen, dest); 2047 if (ret < 0) 2048 goto out; 2049 2050 /* 2051 * Check if the ref was overwritten by an inode's ref that was processed 2052 * earlier. If yes, treat as orphan and return 1. 2053 */ 2054 ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, 2055 dest->start, dest->end - dest->start); 2056 if (ret < 0) 2057 goto out; 2058 if (ret) { 2059 fs_path_reset(dest); 2060 ret = gen_unique_name(sctx, ino, gen, dest); 2061 if (ret < 0) 2062 goto out; 2063 ret = 1; 2064 } 2065 if (skip_name_cache) 2066 goto out; 2067 2068 out_cache: 2069 /* 2070 * Store the result of the lookup in the name cache. 2071 */ 2072 nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); 2073 if (!nce) { 2074 ret = -ENOMEM; 2075 goto out; 2076 } 2077 2078 nce->ino = ino; 2079 nce->gen = gen; 2080 nce->parent_ino = *parent_ino; 2081 nce->parent_gen = *parent_gen; 2082 nce->name_len = fs_path_len(dest); 2083 nce->ret = ret; 2084 strcpy(nce->name, dest->start); 2085 2086 if (ino < sctx->send_progress) 2087 nce->need_later_update = 0; 2088 else 2089 nce->need_later_update = 1; 2090 2091 nce_ret = name_cache_insert(sctx, nce); 2092 if (nce_ret < 0) 2093 ret = nce_ret; 2094 name_cache_clean_unused(sctx); 2095 2096 out: 2097 btrfs_free_path(path); 2098 return ret; 2099 } 2100 2101 /* 2102 * Magic happens here. This function returns the first ref to an inode as it 2103 * would look like while receiving the stream at this point in time. 2104 * We walk the path up to the root. For every inode in between, we check if it 2105 * was already processed/sent. If yes, we continue with the parent as found 2106 * in send_root. If not, we continue with the parent as found in parent_root. 2107 * If we encounter an inode that was deleted at this point in time, we use the 2108 * inodes "orphan" name instead of the real name and stop. Same with new inodes 2109 * that were not created yet and overwritten inodes/refs. 2110 * 2111 * When do we have have orphan inodes: 2112 * 1. When an inode is freshly created and thus no valid refs are available yet 2113 * 2. When a directory lost all it's refs (deleted) but still has dir items 2114 * inside which were not processed yet (pending for move/delete). If anyone 2115 * tried to get the path to the dir items, it would get a path inside that 2116 * orphan directory. 2117 * 3. When an inode is moved around or gets new links, it may overwrite the ref 2118 * of an unprocessed inode. If in that case the first ref would be 2119 * overwritten, the overwritten inode gets "orphanized". Later when we 2120 * process this overwritten inode, it is restored at a new place by moving 2121 * the orphan inode. 2122 * 2123 * sctx->send_progress tells this function at which point in time receiving 2124 * would be. 2125 */ 2126 static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, 2127 struct fs_path *dest) 2128 { 2129 int ret = 0; 2130 struct fs_path *name = NULL; 2131 u64 parent_inode = 0; 2132 u64 parent_gen = 0; 2133 int stop = 0; 2134 u64 start_ino = ino; 2135 u64 start_gen = gen; 2136 int skip_name_cache = 0; 2137 2138 name = fs_path_alloc(); 2139 if (!name) { 2140 ret = -ENOMEM; 2141 goto out; 2142 } 2143 2144 if (is_waiting_for_move(sctx, ino)) 2145 skip_name_cache = 1; 2146 2147 again: 2148 dest->reversed = 1; 2149 fs_path_reset(dest); 2150 2151 while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { 2152 fs_path_reset(name); 2153 2154 ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache, 2155 &parent_inode, &parent_gen, name); 2156 if (ret < 0) 2157 goto out; 2158 if (ret) 2159 stop = 1; 2160 2161 if (!skip_name_cache && 2162 is_waiting_for_move(sctx, parent_inode)) { 2163 ino = start_ino; 2164 gen = start_gen; 2165 stop = 0; 2166 skip_name_cache = 1; 2167 goto again; 2168 } 2169 2170 ret = fs_path_add_path(dest, name); 2171 if (ret < 0) 2172 goto out; 2173 2174 ino = parent_inode; 2175 gen = parent_gen; 2176 } 2177 2178 out: 2179 fs_path_free(name); 2180 if (!ret) 2181 fs_path_unreverse(dest); 2182 return ret; 2183 } 2184 2185 /* 2186 * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace 2187 */ 2188 static int send_subvol_begin(struct send_ctx *sctx) 2189 { 2190 int ret; 2191 struct btrfs_root *send_root = sctx->send_root; 2192 struct btrfs_root *parent_root = sctx->parent_root; 2193 struct btrfs_path *path; 2194 struct btrfs_key key; 2195 struct btrfs_root_ref *ref; 2196 struct extent_buffer *leaf; 2197 char *name = NULL; 2198 int namelen; 2199 2200 path = btrfs_alloc_path(); 2201 if (!path) 2202 return -ENOMEM; 2203 2204 name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); 2205 if (!name) { 2206 btrfs_free_path(path); 2207 return -ENOMEM; 2208 } 2209 2210 key.objectid = send_root->objectid; 2211 key.type = BTRFS_ROOT_BACKREF_KEY; 2212 key.offset = 0; 2213 2214 ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, 2215 &key, path, 1, 0); 2216 if (ret < 0) 2217 goto out; 2218 if (ret) { 2219 ret = -ENOENT; 2220 goto out; 2221 } 2222 2223 leaf = path->nodes[0]; 2224 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2225 if (key.type != BTRFS_ROOT_BACKREF_KEY || 2226 key.objectid != send_root->objectid) { 2227 ret = -ENOENT; 2228 goto out; 2229 } 2230 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); 2231 namelen = btrfs_root_ref_name_len(leaf, ref); 2232 read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); 2233 btrfs_release_path(path); 2234 2235 if (parent_root) { 2236 ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); 2237 if (ret < 0) 2238 goto out; 2239 } else { 2240 ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); 2241 if (ret < 0) 2242 goto out; 2243 } 2244 2245 TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); 2246 TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, 2247 sctx->send_root->root_item.uuid); 2248 TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, 2249 le64_to_cpu(sctx->send_root->root_item.ctransid)); 2250 if (parent_root) { 2251 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 2252 sctx->parent_root->root_item.uuid); 2253 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 2254 le64_to_cpu(sctx->parent_root->root_item.ctransid)); 2255 } 2256 2257 ret = send_cmd(sctx); 2258 2259 tlv_put_failure: 2260 out: 2261 btrfs_free_path(path); 2262 kfree(name); 2263 return ret; 2264 } 2265 2266 static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) 2267 { 2268 int ret = 0; 2269 struct fs_path *p; 2270 2271 verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); 2272 2273 p = fs_path_alloc(); 2274 if (!p) 2275 return -ENOMEM; 2276 2277 ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); 2278 if (ret < 0) 2279 goto out; 2280 2281 ret = get_cur_path(sctx, ino, gen, p); 2282 if (ret < 0) 2283 goto out; 2284 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2285 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); 2286 2287 ret = send_cmd(sctx); 2288 2289 tlv_put_failure: 2290 out: 2291 fs_path_free(p); 2292 return ret; 2293 } 2294 2295 static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) 2296 { 2297 int ret = 0; 2298 struct fs_path *p; 2299 2300 verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); 2301 2302 p = fs_path_alloc(); 2303 if (!p) 2304 return -ENOMEM; 2305 2306 ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); 2307 if (ret < 0) 2308 goto out; 2309 2310 ret = get_cur_path(sctx, ino, gen, p); 2311 if (ret < 0) 2312 goto out; 2313 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2314 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); 2315 2316 ret = send_cmd(sctx); 2317 2318 tlv_put_failure: 2319 out: 2320 fs_path_free(p); 2321 return ret; 2322 } 2323 2324 static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) 2325 { 2326 int ret = 0; 2327 struct fs_path *p; 2328 2329 verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); 2330 2331 p = fs_path_alloc(); 2332 if (!p) 2333 return -ENOMEM; 2334 2335 ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); 2336 if (ret < 0) 2337 goto out; 2338 2339 ret = get_cur_path(sctx, ino, gen, p); 2340 if (ret < 0) 2341 goto out; 2342 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2343 TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); 2344 TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); 2345 2346 ret = send_cmd(sctx); 2347 2348 tlv_put_failure: 2349 out: 2350 fs_path_free(p); 2351 return ret; 2352 } 2353 2354 static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) 2355 { 2356 int ret = 0; 2357 struct fs_path *p = NULL; 2358 struct btrfs_inode_item *ii; 2359 struct btrfs_path *path = NULL; 2360 struct extent_buffer *eb; 2361 struct btrfs_key key; 2362 int slot; 2363 2364 verbose_printk("btrfs: send_utimes %llu\n", ino); 2365 2366 p = fs_path_alloc(); 2367 if (!p) 2368 return -ENOMEM; 2369 2370 path = alloc_path_for_send(); 2371 if (!path) { 2372 ret = -ENOMEM; 2373 goto out; 2374 } 2375 2376 key.objectid = ino; 2377 key.type = BTRFS_INODE_ITEM_KEY; 2378 key.offset = 0; 2379 ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); 2380 if (ret < 0) 2381 goto out; 2382 2383 eb = path->nodes[0]; 2384 slot = path->slots[0]; 2385 ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 2386 2387 ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); 2388 if (ret < 0) 2389 goto out; 2390 2391 ret = get_cur_path(sctx, ino, gen, p); 2392 if (ret < 0) 2393 goto out; 2394 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2395 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, 2396 btrfs_inode_atime(ii)); 2397 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, 2398 btrfs_inode_mtime(ii)); 2399 TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, 2400 btrfs_inode_ctime(ii)); 2401 /* TODO Add otime support when the otime patches get into upstream */ 2402 2403 ret = send_cmd(sctx); 2404 2405 tlv_put_failure: 2406 out: 2407 fs_path_free(p); 2408 btrfs_free_path(path); 2409 return ret; 2410 } 2411 2412 /* 2413 * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have 2414 * a valid path yet because we did not process the refs yet. So, the inode 2415 * is created as orphan. 2416 */ 2417 static int send_create_inode(struct send_ctx *sctx, u64 ino) 2418 { 2419 int ret = 0; 2420 struct fs_path *p; 2421 int cmd; 2422 u64 gen; 2423 u64 mode; 2424 u64 rdev; 2425 2426 verbose_printk("btrfs: send_create_inode %llu\n", ino); 2427 2428 p = fs_path_alloc(); 2429 if (!p) 2430 return -ENOMEM; 2431 2432 ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, 2433 NULL, &rdev); 2434 if (ret < 0) 2435 goto out; 2436 2437 if (S_ISREG(mode)) { 2438 cmd = BTRFS_SEND_C_MKFILE; 2439 } else if (S_ISDIR(mode)) { 2440 cmd = BTRFS_SEND_C_MKDIR; 2441 } else if (S_ISLNK(mode)) { 2442 cmd = BTRFS_SEND_C_SYMLINK; 2443 } else if (S_ISCHR(mode) || S_ISBLK(mode)) { 2444 cmd = BTRFS_SEND_C_MKNOD; 2445 } else if (S_ISFIFO(mode)) { 2446 cmd = BTRFS_SEND_C_MKFIFO; 2447 } else if (S_ISSOCK(mode)) { 2448 cmd = BTRFS_SEND_C_MKSOCK; 2449 } else { 2450 printk(KERN_WARNING "btrfs: unexpected inode type %o", 2451 (int)(mode & S_IFMT)); 2452 ret = -ENOTSUPP; 2453 goto out; 2454 } 2455 2456 ret = begin_cmd(sctx, cmd); 2457 if (ret < 0) 2458 goto out; 2459 2460 ret = gen_unique_name(sctx, ino, gen, p); 2461 if (ret < 0) 2462 goto out; 2463 2464 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 2465 TLV_PUT_U64(sctx, BTRFS_SEND_A_INO, ino); 2466 2467 if (S_ISLNK(mode)) { 2468 fs_path_reset(p); 2469 ret = read_symlink(sctx->send_root, ino, p); 2470 if (ret < 0) 2471 goto out; 2472 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); 2473 } else if (S_ISCHR(mode) || S_ISBLK(mode) || 2474 S_ISFIFO(mode) || S_ISSOCK(mode)) { 2475 TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev)); 2476 TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode); 2477 } 2478 2479 ret = send_cmd(sctx); 2480 if (ret < 0) 2481 goto out; 2482 2483 2484 tlv_put_failure: 2485 out: 2486 fs_path_free(p); 2487 return ret; 2488 } 2489 2490 /* 2491 * We need some special handling for inodes that get processed before the parent 2492 * directory got created. See process_recorded_refs for details. 2493 * This function does the check if we already created the dir out of order. 2494 */ 2495 static int did_create_dir(struct send_ctx *sctx, u64 dir) 2496 { 2497 int ret = 0; 2498 struct btrfs_path *path = NULL; 2499 struct btrfs_key key; 2500 struct btrfs_key found_key; 2501 struct btrfs_key di_key; 2502 struct extent_buffer *eb; 2503 struct btrfs_dir_item *di; 2504 int slot; 2505 2506 path = alloc_path_for_send(); 2507 if (!path) { 2508 ret = -ENOMEM; 2509 goto out; 2510 } 2511 2512 key.objectid = dir; 2513 key.type = BTRFS_DIR_INDEX_KEY; 2514 key.offset = 0; 2515 while (1) { 2516 ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, 2517 1, 0); 2518 if (ret < 0) 2519 goto out; 2520 if (!ret) { 2521 eb = path->nodes[0]; 2522 slot = path->slots[0]; 2523 btrfs_item_key_to_cpu(eb, &found_key, slot); 2524 } 2525 if (ret || found_key.objectid != key.objectid || 2526 found_key.type != key.type) { 2527 ret = 0; 2528 goto out; 2529 } 2530 2531 di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); 2532 btrfs_dir_item_key_to_cpu(eb, di, &di_key); 2533 2534 if (di_key.type != BTRFS_ROOT_ITEM_KEY && 2535 di_key.objectid < sctx->send_progress) { 2536 ret = 1; 2537 goto out; 2538 } 2539 2540 key.offset = found_key.offset + 1; 2541 btrfs_release_path(path); 2542 } 2543 2544 out: 2545 btrfs_free_path(path); 2546 return ret; 2547 } 2548 2549 /* 2550 * Only creates the inode if it is: 2551 * 1. Not a directory 2552 * 2. Or a directory which was not created already due to out of order 2553 * directories. See did_create_dir and process_recorded_refs for details. 2554 */ 2555 static int send_create_inode_if_needed(struct send_ctx *sctx) 2556 { 2557 int ret; 2558 2559 if (S_ISDIR(sctx->cur_inode_mode)) { 2560 ret = did_create_dir(sctx, sctx->cur_ino); 2561 if (ret < 0) 2562 goto out; 2563 if (ret) { 2564 ret = 0; 2565 goto out; 2566 } 2567 } 2568 2569 ret = send_create_inode(sctx, sctx->cur_ino); 2570 if (ret < 0) 2571 goto out; 2572 2573 out: 2574 return ret; 2575 } 2576 2577 struct recorded_ref { 2578 struct list_head list; 2579 char *dir_path; 2580 char *name; 2581 struct fs_path *full_path; 2582 u64 dir; 2583 u64 dir_gen; 2584 int dir_path_len; 2585 int name_len; 2586 }; 2587 2588 /* 2589 * We need to process new refs before deleted refs, but compare_tree gives us 2590 * everything mixed. So we first record all refs and later process them. 2591 * This function is a helper to record one ref. 2592 */ 2593 static int record_ref(struct list_head *head, u64 dir, 2594 u64 dir_gen, struct fs_path *path) 2595 { 2596 struct recorded_ref *ref; 2597 2598 ref = kmalloc(sizeof(*ref), GFP_NOFS); 2599 if (!ref) 2600 return -ENOMEM; 2601 2602 ref->dir = dir; 2603 ref->dir_gen = dir_gen; 2604 ref->full_path = path; 2605 2606 ref->name = (char *)kbasename(ref->full_path->start); 2607 ref->name_len = ref->full_path->end - ref->name; 2608 ref->dir_path = ref->full_path->start; 2609 if (ref->name == ref->full_path->start) 2610 ref->dir_path_len = 0; 2611 else 2612 ref->dir_path_len = ref->full_path->end - 2613 ref->full_path->start - 1 - ref->name_len; 2614 2615 list_add_tail(&ref->list, head); 2616 return 0; 2617 } 2618 2619 static int dup_ref(struct recorded_ref *ref, struct list_head *list) 2620 { 2621 struct recorded_ref *new; 2622 2623 new = kmalloc(sizeof(*ref), GFP_NOFS); 2624 if (!new) 2625 return -ENOMEM; 2626 2627 new->dir = ref->dir; 2628 new->dir_gen = ref->dir_gen; 2629 new->full_path = NULL; 2630 INIT_LIST_HEAD(&new->list); 2631 list_add_tail(&new->list, list); 2632 return 0; 2633 } 2634 2635 static void __free_recorded_refs(struct list_head *head) 2636 { 2637 struct recorded_ref *cur; 2638 2639 while (!list_empty(head)) { 2640 cur = list_entry(head->next, struct recorded_ref, list); 2641 fs_path_free(cur->full_path); 2642 list_del(&cur->list); 2643 kfree(cur); 2644 } 2645 } 2646 2647 static void free_recorded_refs(struct send_ctx *sctx) 2648 { 2649 __free_recorded_refs(&sctx->new_refs); 2650 __free_recorded_refs(&sctx->deleted_refs); 2651 } 2652 2653 /* 2654 * Renames/moves a file/dir to its orphan name. Used when the first 2655 * ref of an unprocessed inode gets overwritten and for all non empty 2656 * directories. 2657 */ 2658 static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, 2659 struct fs_path *path) 2660 { 2661 int ret; 2662 struct fs_path *orphan; 2663 2664 orphan = fs_path_alloc(); 2665 if (!orphan) 2666 return -ENOMEM; 2667 2668 ret = gen_unique_name(sctx, ino, gen, orphan); 2669 if (ret < 0) 2670 goto out; 2671 2672 ret = send_rename(sctx, path, orphan); 2673 2674 out: 2675 fs_path_free(orphan); 2676 return ret; 2677 } 2678 2679 /* 2680 * Returns 1 if a directory can be removed at this point in time. 2681 * We check this by iterating all dir items and checking if the inode behind 2682 * the dir item was already processed. 2683 */ 2684 static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) 2685 { 2686 int ret = 0; 2687 struct btrfs_root *root = sctx->parent_root; 2688 struct btrfs_path *path; 2689 struct btrfs_key key; 2690 struct btrfs_key found_key; 2691 struct btrfs_key loc; 2692 struct btrfs_dir_item *di; 2693 2694 /* 2695 * Don't try to rmdir the top/root subvolume dir. 2696 */ 2697 if (dir == BTRFS_FIRST_FREE_OBJECTID) 2698 return 0; 2699 2700 path = alloc_path_for_send(); 2701 if (!path) 2702 return -ENOMEM; 2703 2704 key.objectid = dir; 2705 key.type = BTRFS_DIR_INDEX_KEY; 2706 key.offset = 0; 2707 2708 while (1) { 2709 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 2710 if (ret < 0) 2711 goto out; 2712 if (!ret) { 2713 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2714 path->slots[0]); 2715 } 2716 if (ret || found_key.objectid != key.objectid || 2717 found_key.type != key.type) { 2718 break; 2719 } 2720 2721 di = btrfs_item_ptr(path->nodes[0], path->slots[0], 2722 struct btrfs_dir_item); 2723 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); 2724 2725 if (loc.objectid > send_progress) { 2726 ret = 0; 2727 goto out; 2728 } 2729 2730 btrfs_release_path(path); 2731 key.offset = found_key.offset + 1; 2732 } 2733 2734 ret = 1; 2735 2736 out: 2737 btrfs_free_path(path); 2738 return ret; 2739 } 2740 2741 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) 2742 { 2743 struct rb_node *n = sctx->waiting_dir_moves.rb_node; 2744 struct waiting_dir_move *entry; 2745 2746 while (n) { 2747 entry = rb_entry(n, struct waiting_dir_move, node); 2748 if (ino < entry->ino) 2749 n = n->rb_left; 2750 else if (ino > entry->ino) 2751 n = n->rb_right; 2752 else 2753 return 1; 2754 } 2755 return 0; 2756 } 2757 2758 static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) 2759 { 2760 struct rb_node **p = &sctx->waiting_dir_moves.rb_node; 2761 struct rb_node *parent = NULL; 2762 struct waiting_dir_move *entry, *dm; 2763 2764 dm = kmalloc(sizeof(*dm), GFP_NOFS); 2765 if (!dm) 2766 return -ENOMEM; 2767 dm->ino = ino; 2768 2769 while (*p) { 2770 parent = *p; 2771 entry = rb_entry(parent, struct waiting_dir_move, node); 2772 if (ino < entry->ino) { 2773 p = &(*p)->rb_left; 2774 } else if (ino > entry->ino) { 2775 p = &(*p)->rb_right; 2776 } else { 2777 kfree(dm); 2778 return -EEXIST; 2779 } 2780 } 2781 2782 rb_link_node(&dm->node, parent, p); 2783 rb_insert_color(&dm->node, &sctx->waiting_dir_moves); 2784 return 0; 2785 } 2786 2787 static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) 2788 { 2789 struct rb_node *n = sctx->waiting_dir_moves.rb_node; 2790 struct waiting_dir_move *entry; 2791 2792 while (n) { 2793 entry = rb_entry(n, struct waiting_dir_move, node); 2794 if (ino < entry->ino) { 2795 n = n->rb_left; 2796 } else if (ino > entry->ino) { 2797 n = n->rb_right; 2798 } else { 2799 rb_erase(&entry->node, &sctx->waiting_dir_moves); 2800 kfree(entry); 2801 return 0; 2802 } 2803 } 2804 return -ENOENT; 2805 } 2806 2807 static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) 2808 { 2809 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 2810 struct rb_node *parent = NULL; 2811 struct pending_dir_move *entry, *pm; 2812 struct recorded_ref *cur; 2813 int exists = 0; 2814 int ret; 2815 2816 pm = kmalloc(sizeof(*pm), GFP_NOFS); 2817 if (!pm) 2818 return -ENOMEM; 2819 pm->parent_ino = parent_ino; 2820 pm->ino = sctx->cur_ino; 2821 pm->gen = sctx->cur_inode_gen; 2822 INIT_LIST_HEAD(&pm->list); 2823 INIT_LIST_HEAD(&pm->update_refs); 2824 RB_CLEAR_NODE(&pm->node); 2825 2826 while (*p) { 2827 parent = *p; 2828 entry = rb_entry(parent, struct pending_dir_move, node); 2829 if (parent_ino < entry->parent_ino) { 2830 p = &(*p)->rb_left; 2831 } else if (parent_ino > entry->parent_ino) { 2832 p = &(*p)->rb_right; 2833 } else { 2834 exists = 1; 2835 break; 2836 } 2837 } 2838 2839 list_for_each_entry(cur, &sctx->deleted_refs, list) { 2840 ret = dup_ref(cur, &pm->update_refs); 2841 if (ret < 0) 2842 goto out; 2843 } 2844 list_for_each_entry(cur, &sctx->new_refs, list) { 2845 ret = dup_ref(cur, &pm->update_refs); 2846 if (ret < 0) 2847 goto out; 2848 } 2849 2850 ret = add_waiting_dir_move(sctx, pm->ino); 2851 if (ret) 2852 goto out; 2853 2854 if (exists) { 2855 list_add_tail(&pm->list, &entry->list); 2856 } else { 2857 rb_link_node(&pm->node, parent, p); 2858 rb_insert_color(&pm->node, &sctx->pending_dir_moves); 2859 } 2860 ret = 0; 2861 out: 2862 if (ret) { 2863 __free_recorded_refs(&pm->update_refs); 2864 kfree(pm); 2865 } 2866 return ret; 2867 } 2868 2869 static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx, 2870 u64 parent_ino) 2871 { 2872 struct rb_node *n = sctx->pending_dir_moves.rb_node; 2873 struct pending_dir_move *entry; 2874 2875 while (n) { 2876 entry = rb_entry(n, struct pending_dir_move, node); 2877 if (parent_ino < entry->parent_ino) 2878 n = n->rb_left; 2879 else if (parent_ino > entry->parent_ino) 2880 n = n->rb_right; 2881 else 2882 return entry; 2883 } 2884 return NULL; 2885 } 2886 2887 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) 2888 { 2889 struct fs_path *from_path = NULL; 2890 struct fs_path *to_path = NULL; 2891 u64 orig_progress = sctx->send_progress; 2892 struct recorded_ref *cur; 2893 int ret; 2894 2895 from_path = fs_path_alloc(); 2896 if (!from_path) 2897 return -ENOMEM; 2898 2899 sctx->send_progress = pm->ino; 2900 ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); 2901 if (ret < 0) 2902 goto out; 2903 2904 to_path = fs_path_alloc(); 2905 if (!to_path) { 2906 ret = -ENOMEM; 2907 goto out; 2908 } 2909 2910 sctx->send_progress = sctx->cur_ino + 1; 2911 ret = del_waiting_dir_move(sctx, pm->ino); 2912 ASSERT(ret == 0); 2913 2914 ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); 2915 if (ret < 0) 2916 goto out; 2917 2918 ret = send_rename(sctx, from_path, to_path); 2919 if (ret < 0) 2920 goto out; 2921 2922 ret = send_utimes(sctx, pm->ino, pm->gen); 2923 if (ret < 0) 2924 goto out; 2925 2926 /* 2927 * After rename/move, need to update the utimes of both new parent(s) 2928 * and old parent(s). 2929 */ 2930 list_for_each_entry(cur, &pm->update_refs, list) { 2931 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 2932 if (ret < 0) 2933 goto out; 2934 } 2935 2936 out: 2937 fs_path_free(from_path); 2938 fs_path_free(to_path); 2939 sctx->send_progress = orig_progress; 2940 2941 return ret; 2942 } 2943 2944 static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m) 2945 { 2946 if (!list_empty(&m->list)) 2947 list_del(&m->list); 2948 if (!RB_EMPTY_NODE(&m->node)) 2949 rb_erase(&m->node, &sctx->pending_dir_moves); 2950 __free_recorded_refs(&m->update_refs); 2951 kfree(m); 2952 } 2953 2954 static void tail_append_pending_moves(struct pending_dir_move *moves, 2955 struct list_head *stack) 2956 { 2957 if (list_empty(&moves->list)) { 2958 list_add_tail(&moves->list, stack); 2959 } else { 2960 LIST_HEAD(list); 2961 list_splice_init(&moves->list, &list); 2962 list_add_tail(&moves->list, stack); 2963 list_splice_tail(&list, stack); 2964 } 2965 } 2966 2967 static int apply_children_dir_moves(struct send_ctx *sctx) 2968 { 2969 struct pending_dir_move *pm; 2970 struct list_head stack; 2971 u64 parent_ino = sctx->cur_ino; 2972 int ret = 0; 2973 2974 pm = get_pending_dir_moves(sctx, parent_ino); 2975 if (!pm) 2976 return 0; 2977 2978 INIT_LIST_HEAD(&stack); 2979 tail_append_pending_moves(pm, &stack); 2980 2981 while (!list_empty(&stack)) { 2982 pm = list_first_entry(&stack, struct pending_dir_move, list); 2983 parent_ino = pm->ino; 2984 ret = apply_dir_move(sctx, pm); 2985 free_pending_move(sctx, pm); 2986 if (ret) 2987 goto out; 2988 pm = get_pending_dir_moves(sctx, parent_ino); 2989 if (pm) 2990 tail_append_pending_moves(pm, &stack); 2991 } 2992 return 0; 2993 2994 out: 2995 while (!list_empty(&stack)) { 2996 pm = list_first_entry(&stack, struct pending_dir_move, list); 2997 free_pending_move(sctx, pm); 2998 } 2999 return ret; 3000 } 3001 3002 static int wait_for_parent_move(struct send_ctx *sctx, 3003 struct recorded_ref *parent_ref) 3004 { 3005 int ret; 3006 u64 ino = parent_ref->dir; 3007 u64 parent_ino_before, parent_ino_after; 3008 u64 new_gen, old_gen; 3009 struct fs_path *path_before = NULL; 3010 struct fs_path *path_after = NULL; 3011 int len1, len2; 3012 3013 if (parent_ref->dir <= sctx->cur_ino) 3014 return 0; 3015 3016 if (is_waiting_for_move(sctx, ino)) 3017 return 1; 3018 3019 ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, 3020 NULL, NULL, NULL, NULL); 3021 if (ret == -ENOENT) 3022 return 0; 3023 else if (ret < 0) 3024 return ret; 3025 3026 ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen, 3027 NULL, NULL, NULL, NULL); 3028 if (ret < 0) 3029 return ret; 3030 3031 if (new_gen != old_gen) 3032 return 0; 3033 3034 path_before = fs_path_alloc(); 3035 if (!path_before) 3036 return -ENOMEM; 3037 3038 ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, 3039 NULL, path_before); 3040 if (ret == -ENOENT) { 3041 ret = 0; 3042 goto out; 3043 } else if (ret < 0) { 3044 goto out; 3045 } 3046 3047 path_after = fs_path_alloc(); 3048 if (!path_after) { 3049 ret = -ENOMEM; 3050 goto out; 3051 } 3052 3053 ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, 3054 NULL, path_after); 3055 if (ret == -ENOENT) { 3056 ret = 0; 3057 goto out; 3058 } else if (ret < 0) { 3059 goto out; 3060 } 3061 3062 len1 = fs_path_len(path_before); 3063 len2 = fs_path_len(path_after); 3064 if ((parent_ino_before != parent_ino_after) && (len1 != len2 || 3065 memcmp(path_before->start, path_after->start, len1))) { 3066 ret = 1; 3067 goto out; 3068 } 3069 ret = 0; 3070 3071 out: 3072 fs_path_free(path_before); 3073 fs_path_free(path_after); 3074 3075 return ret; 3076 } 3077 3078 /* 3079 * This does all the move/link/unlink/rmdir magic. 3080 */ 3081 static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) 3082 { 3083 int ret = 0; 3084 struct recorded_ref *cur; 3085 struct recorded_ref *cur2; 3086 struct list_head check_dirs; 3087 struct fs_path *valid_path = NULL; 3088 u64 ow_inode = 0; 3089 u64 ow_gen; 3090 int did_overwrite = 0; 3091 int is_orphan = 0; 3092 3093 verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3094 3095 /* 3096 * This should never happen as the root dir always has the same ref 3097 * which is always '..' 3098 */ 3099 BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID); 3100 INIT_LIST_HEAD(&check_dirs); 3101 3102 valid_path = fs_path_alloc(); 3103 if (!valid_path) { 3104 ret = -ENOMEM; 3105 goto out; 3106 } 3107 3108 /* 3109 * First, check if the first ref of the current inode was overwritten 3110 * before. If yes, we know that the current inode was already orphanized 3111 * and thus use the orphan name. If not, we can use get_cur_path to 3112 * get the path of the first ref as it would like while receiving at 3113 * this point in time. 3114 * New inodes are always orphan at the beginning, so force to use the 3115 * orphan name in this case. 3116 * The first ref is stored in valid_path and will be updated if it 3117 * gets moved around. 3118 */ 3119 if (!sctx->cur_inode_new) { 3120 ret = did_overwrite_first_ref(sctx, sctx->cur_ino, 3121 sctx->cur_inode_gen); 3122 if (ret < 0) 3123 goto out; 3124 if (ret) 3125 did_overwrite = 1; 3126 } 3127 if (sctx->cur_inode_new || did_overwrite) { 3128 ret = gen_unique_name(sctx, sctx->cur_ino, 3129 sctx->cur_inode_gen, valid_path); 3130 if (ret < 0) 3131 goto out; 3132 is_orphan = 1; 3133 } else { 3134 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, 3135 valid_path); 3136 if (ret < 0) 3137 goto out; 3138 } 3139 3140 list_for_each_entry(cur, &sctx->new_refs, list) { 3141 /* 3142 * We may have refs where the parent directory does not exist 3143 * yet. This happens if the parent directories inum is higher 3144 * the the current inum. To handle this case, we create the 3145 * parent directory out of order. But we need to check if this 3146 * did already happen before due to other refs in the same dir. 3147 */ 3148 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3149 if (ret < 0) 3150 goto out; 3151 if (ret == inode_state_will_create) { 3152 ret = 0; 3153 /* 3154 * First check if any of the current inodes refs did 3155 * already create the dir. 3156 */ 3157 list_for_each_entry(cur2, &sctx->new_refs, list) { 3158 if (cur == cur2) 3159 break; 3160 if (cur2->dir == cur->dir) { 3161 ret = 1; 3162 break; 3163 } 3164 } 3165 3166 /* 3167 * If that did not happen, check if a previous inode 3168 * did already create the dir. 3169 */ 3170 if (!ret) 3171 ret = did_create_dir(sctx, cur->dir); 3172 if (ret < 0) 3173 goto out; 3174 if (!ret) { 3175 ret = send_create_inode(sctx, cur->dir); 3176 if (ret < 0) 3177 goto out; 3178 } 3179 } 3180 3181 /* 3182 * Check if this new ref would overwrite the first ref of 3183 * another unprocessed inode. If yes, orphanize the 3184 * overwritten inode. If we find an overwritten ref that is 3185 * not the first ref, simply unlink it. 3186 */ 3187 ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3188 cur->name, cur->name_len, 3189 &ow_inode, &ow_gen); 3190 if (ret < 0) 3191 goto out; 3192 if (ret) { 3193 ret = is_first_ref(sctx->parent_root, 3194 ow_inode, cur->dir, cur->name, 3195 cur->name_len); 3196 if (ret < 0) 3197 goto out; 3198 if (ret) { 3199 ret = orphanize_inode(sctx, ow_inode, ow_gen, 3200 cur->full_path); 3201 if (ret < 0) 3202 goto out; 3203 } else { 3204 ret = send_unlink(sctx, cur->full_path); 3205 if (ret < 0) 3206 goto out; 3207 } 3208 } 3209 3210 /* 3211 * link/move the ref to the new place. If we have an orphan 3212 * inode, move it and update valid_path. If not, link or move 3213 * it depending on the inode mode. 3214 */ 3215 if (is_orphan) { 3216 ret = send_rename(sctx, valid_path, cur->full_path); 3217 if (ret < 0) 3218 goto out; 3219 is_orphan = 0; 3220 ret = fs_path_copy(valid_path, cur->full_path); 3221 if (ret < 0) 3222 goto out; 3223 } else { 3224 if (S_ISDIR(sctx->cur_inode_mode)) { 3225 /* 3226 * Dirs can't be linked, so move it. For moved 3227 * dirs, we always have one new and one deleted 3228 * ref. The deleted ref is ignored later. 3229 */ 3230 if (wait_for_parent_move(sctx, cur)) { 3231 ret = add_pending_dir_move(sctx, 3232 cur->dir); 3233 *pending_move = 1; 3234 } else { 3235 ret = send_rename(sctx, valid_path, 3236 cur->full_path); 3237 if (!ret) 3238 ret = fs_path_copy(valid_path, 3239 cur->full_path); 3240 } 3241 if (ret < 0) 3242 goto out; 3243 } else { 3244 ret = send_link(sctx, cur->full_path, 3245 valid_path); 3246 if (ret < 0) 3247 goto out; 3248 } 3249 } 3250 ret = dup_ref(cur, &check_dirs); 3251 if (ret < 0) 3252 goto out; 3253 } 3254 3255 if (S_ISDIR(sctx->cur_inode_mode) && sctx->cur_inode_deleted) { 3256 /* 3257 * Check if we can already rmdir the directory. If not, 3258 * orphanize it. For every dir item inside that gets deleted 3259 * later, we do this check again and rmdir it then if possible. 3260 * See the use of check_dirs for more details. 3261 */ 3262 ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); 3263 if (ret < 0) 3264 goto out; 3265 if (ret) { 3266 ret = send_rmdir(sctx, valid_path); 3267 if (ret < 0) 3268 goto out; 3269 } else if (!is_orphan) { 3270 ret = orphanize_inode(sctx, sctx->cur_ino, 3271 sctx->cur_inode_gen, valid_path); 3272 if (ret < 0) 3273 goto out; 3274 is_orphan = 1; 3275 } 3276 3277 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3278 ret = dup_ref(cur, &check_dirs); 3279 if (ret < 0) 3280 goto out; 3281 } 3282 } else if (S_ISDIR(sctx->cur_inode_mode) && 3283 !list_empty(&sctx->deleted_refs)) { 3284 /* 3285 * We have a moved dir. Add the old parent to check_dirs 3286 */ 3287 cur = list_entry(sctx->deleted_refs.next, struct recorded_ref, 3288 list); 3289 ret = dup_ref(cur, &check_dirs); 3290 if (ret < 0) 3291 goto out; 3292 } else if (!S_ISDIR(sctx->cur_inode_mode)) { 3293 /* 3294 * We have a non dir inode. Go through all deleted refs and 3295 * unlink them if they were not already overwritten by other 3296 * inodes. 3297 */ 3298 list_for_each_entry(cur, &sctx->deleted_refs, list) { 3299 ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, 3300 sctx->cur_ino, sctx->cur_inode_gen, 3301 cur->name, cur->name_len); 3302 if (ret < 0) 3303 goto out; 3304 if (!ret) { 3305 ret = send_unlink(sctx, cur->full_path); 3306 if (ret < 0) 3307 goto out; 3308 } 3309 ret = dup_ref(cur, &check_dirs); 3310 if (ret < 0) 3311 goto out; 3312 } 3313 /* 3314 * If the inode is still orphan, unlink the orphan. This may 3315 * happen when a previous inode did overwrite the first ref 3316 * of this inode and no new refs were added for the current 3317 * inode. Unlinking does not mean that the inode is deleted in 3318 * all cases. There may still be links to this inode in other 3319 * places. 3320 */ 3321 if (is_orphan) { 3322 ret = send_unlink(sctx, valid_path); 3323 if (ret < 0) 3324 goto out; 3325 } 3326 } 3327 3328 /* 3329 * We did collect all parent dirs where cur_inode was once located. We 3330 * now go through all these dirs and check if they are pending for 3331 * deletion and if it's finally possible to perform the rmdir now. 3332 * We also update the inode stats of the parent dirs here. 3333 */ 3334 list_for_each_entry(cur, &check_dirs, list) { 3335 /* 3336 * In case we had refs into dirs that were not processed yet, 3337 * we don't need to do the utime and rmdir logic for these dirs. 3338 * The dir will be processed later. 3339 */ 3340 if (cur->dir > sctx->cur_ino) 3341 continue; 3342 3343 ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen); 3344 if (ret < 0) 3345 goto out; 3346 3347 if (ret == inode_state_did_create || 3348 ret == inode_state_no_change) { 3349 /* TODO delayed utimes */ 3350 ret = send_utimes(sctx, cur->dir, cur->dir_gen); 3351 if (ret < 0) 3352 goto out; 3353 } else if (ret == inode_state_did_delete) { 3354 ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); 3355 if (ret < 0) 3356 goto out; 3357 if (ret) { 3358 ret = get_cur_path(sctx, cur->dir, 3359 cur->dir_gen, valid_path); 3360 if (ret < 0) 3361 goto out; 3362 ret = send_rmdir(sctx, valid_path); 3363 if (ret < 0) 3364 goto out; 3365 } 3366 } 3367 } 3368 3369 ret = 0; 3370 3371 out: 3372 __free_recorded_refs(&check_dirs); 3373 free_recorded_refs(sctx); 3374 fs_path_free(valid_path); 3375 return ret; 3376 } 3377 3378 static int __record_new_ref(int num, u64 dir, int index, 3379 struct fs_path *name, 3380 void *ctx) 3381 { 3382 int ret = 0; 3383 struct send_ctx *sctx = ctx; 3384 struct fs_path *p; 3385 u64 gen; 3386 3387 p = fs_path_alloc(); 3388 if (!p) 3389 return -ENOMEM; 3390 3391 ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, 3392 NULL, NULL); 3393 if (ret < 0) 3394 goto out; 3395 3396 ret = get_cur_path(sctx, dir, gen, p); 3397 if (ret < 0) 3398 goto out; 3399 ret = fs_path_add_path(p, name); 3400 if (ret < 0) 3401 goto out; 3402 3403 ret = record_ref(&sctx->new_refs, dir, gen, p); 3404 3405 out: 3406 if (ret) 3407 fs_path_free(p); 3408 return ret; 3409 } 3410 3411 static int __record_deleted_ref(int num, u64 dir, int index, 3412 struct fs_path *name, 3413 void *ctx) 3414 { 3415 int ret = 0; 3416 struct send_ctx *sctx = ctx; 3417 struct fs_path *p; 3418 u64 gen; 3419 3420 p = fs_path_alloc(); 3421 if (!p) 3422 return -ENOMEM; 3423 3424 ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, 3425 NULL, NULL); 3426 if (ret < 0) 3427 goto out; 3428 3429 ret = get_cur_path(sctx, dir, gen, p); 3430 if (ret < 0) 3431 goto out; 3432 ret = fs_path_add_path(p, name); 3433 if (ret < 0) 3434 goto out; 3435 3436 ret = record_ref(&sctx->deleted_refs, dir, gen, p); 3437 3438 out: 3439 if (ret) 3440 fs_path_free(p); 3441 return ret; 3442 } 3443 3444 static int record_new_ref(struct send_ctx *sctx) 3445 { 3446 int ret; 3447 3448 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3449 sctx->cmp_key, 0, __record_new_ref, sctx); 3450 if (ret < 0) 3451 goto out; 3452 ret = 0; 3453 3454 out: 3455 return ret; 3456 } 3457 3458 static int record_deleted_ref(struct send_ctx *sctx) 3459 { 3460 int ret; 3461 3462 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3463 sctx->cmp_key, 0, __record_deleted_ref, sctx); 3464 if (ret < 0) 3465 goto out; 3466 ret = 0; 3467 3468 out: 3469 return ret; 3470 } 3471 3472 struct find_ref_ctx { 3473 u64 dir; 3474 u64 dir_gen; 3475 struct btrfs_root *root; 3476 struct fs_path *name; 3477 int found_idx; 3478 }; 3479 3480 static int __find_iref(int num, u64 dir, int index, 3481 struct fs_path *name, 3482 void *ctx_) 3483 { 3484 struct find_ref_ctx *ctx = ctx_; 3485 u64 dir_gen; 3486 int ret; 3487 3488 if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) && 3489 strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { 3490 /* 3491 * To avoid doing extra lookups we'll only do this if everything 3492 * else matches. 3493 */ 3494 ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL, 3495 NULL, NULL, NULL); 3496 if (ret) 3497 return ret; 3498 if (dir_gen != ctx->dir_gen) 3499 return 0; 3500 ctx->found_idx = num; 3501 return 1; 3502 } 3503 return 0; 3504 } 3505 3506 static int find_iref(struct btrfs_root *root, 3507 struct btrfs_path *path, 3508 struct btrfs_key *key, 3509 u64 dir, u64 dir_gen, struct fs_path *name) 3510 { 3511 int ret; 3512 struct find_ref_ctx ctx; 3513 3514 ctx.dir = dir; 3515 ctx.name = name; 3516 ctx.dir_gen = dir_gen; 3517 ctx.found_idx = -1; 3518 ctx.root = root; 3519 3520 ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx); 3521 if (ret < 0) 3522 return ret; 3523 3524 if (ctx.found_idx == -1) 3525 return -ENOENT; 3526 3527 return ctx.found_idx; 3528 } 3529 3530 static int __record_changed_new_ref(int num, u64 dir, int index, 3531 struct fs_path *name, 3532 void *ctx) 3533 { 3534 u64 dir_gen; 3535 int ret; 3536 struct send_ctx *sctx = ctx; 3537 3538 ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL, 3539 NULL, NULL, NULL); 3540 if (ret) 3541 return ret; 3542 3543 ret = find_iref(sctx->parent_root, sctx->right_path, 3544 sctx->cmp_key, dir, dir_gen, name); 3545 if (ret == -ENOENT) 3546 ret = __record_new_ref(num, dir, index, name, sctx); 3547 else if (ret > 0) 3548 ret = 0; 3549 3550 return ret; 3551 } 3552 3553 static int __record_changed_deleted_ref(int num, u64 dir, int index, 3554 struct fs_path *name, 3555 void *ctx) 3556 { 3557 u64 dir_gen; 3558 int ret; 3559 struct send_ctx *sctx = ctx; 3560 3561 ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL, 3562 NULL, NULL, NULL); 3563 if (ret) 3564 return ret; 3565 3566 ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key, 3567 dir, dir_gen, name); 3568 if (ret == -ENOENT) 3569 ret = __record_deleted_ref(num, dir, index, name, sctx); 3570 else if (ret > 0) 3571 ret = 0; 3572 3573 return ret; 3574 } 3575 3576 static int record_changed_ref(struct send_ctx *sctx) 3577 { 3578 int ret = 0; 3579 3580 ret = iterate_inode_ref(sctx->send_root, sctx->left_path, 3581 sctx->cmp_key, 0, __record_changed_new_ref, sctx); 3582 if (ret < 0) 3583 goto out; 3584 ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, 3585 sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); 3586 if (ret < 0) 3587 goto out; 3588 ret = 0; 3589 3590 out: 3591 return ret; 3592 } 3593 3594 /* 3595 * Record and process all refs at once. Needed when an inode changes the 3596 * generation number, which means that it was deleted and recreated. 3597 */ 3598 static int process_all_refs(struct send_ctx *sctx, 3599 enum btrfs_compare_tree_result cmd) 3600 { 3601 int ret; 3602 struct btrfs_root *root; 3603 struct btrfs_path *path; 3604 struct btrfs_key key; 3605 struct btrfs_key found_key; 3606 struct extent_buffer *eb; 3607 int slot; 3608 iterate_inode_ref_t cb; 3609 int pending_move = 0; 3610 3611 path = alloc_path_for_send(); 3612 if (!path) 3613 return -ENOMEM; 3614 3615 if (cmd == BTRFS_COMPARE_TREE_NEW) { 3616 root = sctx->send_root; 3617 cb = __record_new_ref; 3618 } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { 3619 root = sctx->parent_root; 3620 cb = __record_deleted_ref; 3621 } else { 3622 BUG(); 3623 } 3624 3625 key.objectid = sctx->cmp_key->objectid; 3626 key.type = BTRFS_INODE_REF_KEY; 3627 key.offset = 0; 3628 while (1) { 3629 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 3630 if (ret < 0) 3631 goto out; 3632 if (ret) 3633 break; 3634 3635 eb = path->nodes[0]; 3636 slot = path->slots[0]; 3637 btrfs_item_key_to_cpu(eb, &found_key, slot); 3638 3639 if (found_key.objectid != key.objectid || 3640 (found_key.type != BTRFS_INODE_REF_KEY && 3641 found_key.type != BTRFS_INODE_EXTREF_KEY)) 3642 break; 3643 3644 ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); 3645 btrfs_release_path(path); 3646 if (ret < 0) 3647 goto out; 3648 3649 key.offset = found_key.offset + 1; 3650 } 3651 btrfs_release_path(path); 3652 3653 ret = process_recorded_refs(sctx, &pending_move); 3654 /* Only applicable to an incremental send. */ 3655 ASSERT(pending_move == 0); 3656 3657 out: 3658 btrfs_free_path(path); 3659 return ret; 3660 } 3661 3662 static int send_set_xattr(struct send_ctx *sctx, 3663 struct fs_path *path, 3664 const char *name, int name_len, 3665 const char *data, int data_len) 3666 { 3667 int ret = 0; 3668 3669 ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); 3670 if (ret < 0) 3671 goto out; 3672 3673 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 3674 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 3675 TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); 3676 3677 ret = send_cmd(sctx); 3678 3679 tlv_put_failure: 3680 out: 3681 return ret; 3682 } 3683 3684 static int send_remove_xattr(struct send_ctx *sctx, 3685 struct fs_path *path, 3686 const char *name, int name_len) 3687 { 3688 int ret = 0; 3689 3690 ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); 3691 if (ret < 0) 3692 goto out; 3693 3694 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); 3695 TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); 3696 3697 ret = send_cmd(sctx); 3698 3699 tlv_put_failure: 3700 out: 3701 return ret; 3702 } 3703 3704 static int __process_new_xattr(int num, struct btrfs_key *di_key, 3705 const char *name, int name_len, 3706 const char *data, int data_len, 3707 u8 type, void *ctx) 3708 { 3709 int ret; 3710 struct send_ctx *sctx = ctx; 3711 struct fs_path *p; 3712 posix_acl_xattr_header dummy_acl; 3713 3714 p = fs_path_alloc(); 3715 if (!p) 3716 return -ENOMEM; 3717 3718 /* 3719 * This hack is needed because empty acl's are stored as zero byte 3720 * data in xattrs. Problem with that is, that receiving these zero byte 3721 * acl's will fail later. To fix this, we send a dummy acl list that 3722 * only contains the version number and no entries. 3723 */ 3724 if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || 3725 !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { 3726 if (data_len == 0) { 3727 dummy_acl.a_version = 3728 cpu_to_le32(POSIX_ACL_XATTR_VERSION); 3729 data = (char *)&dummy_acl; 3730 data_len = sizeof(dummy_acl); 3731 } 3732 } 3733 3734 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 3735 if (ret < 0) 3736 goto out; 3737 3738 ret = send_set_xattr(sctx, p, name, name_len, data, data_len); 3739 3740 out: 3741 fs_path_free(p); 3742 return ret; 3743 } 3744 3745 static int __process_deleted_xattr(int num, struct btrfs_key *di_key, 3746 const char *name, int name_len, 3747 const char *data, int data_len, 3748 u8 type, void *ctx) 3749 { 3750 int ret; 3751 struct send_ctx *sctx = ctx; 3752 struct fs_path *p; 3753 3754 p = fs_path_alloc(); 3755 if (!p) 3756 return -ENOMEM; 3757 3758 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 3759 if (ret < 0) 3760 goto out; 3761 3762 ret = send_remove_xattr(sctx, p, name, name_len); 3763 3764 out: 3765 fs_path_free(p); 3766 return ret; 3767 } 3768 3769 static int process_new_xattr(struct send_ctx *sctx) 3770 { 3771 int ret = 0; 3772 3773 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 3774 sctx->cmp_key, __process_new_xattr, sctx); 3775 3776 return ret; 3777 } 3778 3779 static int process_deleted_xattr(struct send_ctx *sctx) 3780 { 3781 int ret; 3782 3783 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 3784 sctx->cmp_key, __process_deleted_xattr, sctx); 3785 3786 return ret; 3787 } 3788 3789 struct find_xattr_ctx { 3790 const char *name; 3791 int name_len; 3792 int found_idx; 3793 char *found_data; 3794 int found_data_len; 3795 }; 3796 3797 static int __find_xattr(int num, struct btrfs_key *di_key, 3798 const char *name, int name_len, 3799 const char *data, int data_len, 3800 u8 type, void *vctx) 3801 { 3802 struct find_xattr_ctx *ctx = vctx; 3803 3804 if (name_len == ctx->name_len && 3805 strncmp(name, ctx->name, name_len) == 0) { 3806 ctx->found_idx = num; 3807 ctx->found_data_len = data_len; 3808 ctx->found_data = kmemdup(data, data_len, GFP_NOFS); 3809 if (!ctx->found_data) 3810 return -ENOMEM; 3811 return 1; 3812 } 3813 return 0; 3814 } 3815 3816 static int find_xattr(struct btrfs_root *root, 3817 struct btrfs_path *path, 3818 struct btrfs_key *key, 3819 const char *name, int name_len, 3820 char **data, int *data_len) 3821 { 3822 int ret; 3823 struct find_xattr_ctx ctx; 3824 3825 ctx.name = name; 3826 ctx.name_len = name_len; 3827 ctx.found_idx = -1; 3828 ctx.found_data = NULL; 3829 ctx.found_data_len = 0; 3830 3831 ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); 3832 if (ret < 0) 3833 return ret; 3834 3835 if (ctx.found_idx == -1) 3836 return -ENOENT; 3837 if (data) { 3838 *data = ctx.found_data; 3839 *data_len = ctx.found_data_len; 3840 } else { 3841 kfree(ctx.found_data); 3842 } 3843 return ctx.found_idx; 3844 } 3845 3846 3847 static int __process_changed_new_xattr(int num, struct btrfs_key *di_key, 3848 const char *name, int name_len, 3849 const char *data, int data_len, 3850 u8 type, void *ctx) 3851 { 3852 int ret; 3853 struct send_ctx *sctx = ctx; 3854 char *found_data = NULL; 3855 int found_data_len = 0; 3856 3857 ret = find_xattr(sctx->parent_root, sctx->right_path, 3858 sctx->cmp_key, name, name_len, &found_data, 3859 &found_data_len); 3860 if (ret == -ENOENT) { 3861 ret = __process_new_xattr(num, di_key, name, name_len, data, 3862 data_len, type, ctx); 3863 } else if (ret >= 0) { 3864 if (data_len != found_data_len || 3865 memcmp(data, found_data, data_len)) { 3866 ret = __process_new_xattr(num, di_key, name, name_len, 3867 data, data_len, type, ctx); 3868 } else { 3869 ret = 0; 3870 } 3871 } 3872 3873 kfree(found_data); 3874 return ret; 3875 } 3876 3877 static int __process_changed_deleted_xattr(int num, struct btrfs_key *di_key, 3878 const char *name, int name_len, 3879 const char *data, int data_len, 3880 u8 type, void *ctx) 3881 { 3882 int ret; 3883 struct send_ctx *sctx = ctx; 3884 3885 ret = find_xattr(sctx->send_root, sctx->left_path, sctx->cmp_key, 3886 name, name_len, NULL, NULL); 3887 if (ret == -ENOENT) 3888 ret = __process_deleted_xattr(num, di_key, name, name_len, data, 3889 data_len, type, ctx); 3890 else if (ret >= 0) 3891 ret = 0; 3892 3893 return ret; 3894 } 3895 3896 static int process_changed_xattr(struct send_ctx *sctx) 3897 { 3898 int ret = 0; 3899 3900 ret = iterate_dir_item(sctx->send_root, sctx->left_path, 3901 sctx->cmp_key, __process_changed_new_xattr, sctx); 3902 if (ret < 0) 3903 goto out; 3904 ret = iterate_dir_item(sctx->parent_root, sctx->right_path, 3905 sctx->cmp_key, __process_changed_deleted_xattr, sctx); 3906 3907 out: 3908 return ret; 3909 } 3910 3911 static int process_all_new_xattrs(struct send_ctx *sctx) 3912 { 3913 int ret; 3914 struct btrfs_root *root; 3915 struct btrfs_path *path; 3916 struct btrfs_key key; 3917 struct btrfs_key found_key; 3918 struct extent_buffer *eb; 3919 int slot; 3920 3921 path = alloc_path_for_send(); 3922 if (!path) 3923 return -ENOMEM; 3924 3925 root = sctx->send_root; 3926 3927 key.objectid = sctx->cmp_key->objectid; 3928 key.type = BTRFS_XATTR_ITEM_KEY; 3929 key.offset = 0; 3930 while (1) { 3931 ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); 3932 if (ret < 0) 3933 goto out; 3934 if (ret) { 3935 ret = 0; 3936 goto out; 3937 } 3938 3939 eb = path->nodes[0]; 3940 slot = path->slots[0]; 3941 btrfs_item_key_to_cpu(eb, &found_key, slot); 3942 3943 if (found_key.objectid != key.objectid || 3944 found_key.type != key.type) { 3945 ret = 0; 3946 goto out; 3947 } 3948 3949 ret = iterate_dir_item(root, path, &found_key, 3950 __process_new_xattr, sctx); 3951 if (ret < 0) 3952 goto out; 3953 3954 btrfs_release_path(path); 3955 key.offset = found_key.offset + 1; 3956 } 3957 3958 out: 3959 btrfs_free_path(path); 3960 return ret; 3961 } 3962 3963 static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) 3964 { 3965 struct btrfs_root *root = sctx->send_root; 3966 struct btrfs_fs_info *fs_info = root->fs_info; 3967 struct inode *inode; 3968 struct page *page; 3969 char *addr; 3970 struct btrfs_key key; 3971 pgoff_t index = offset >> PAGE_CACHE_SHIFT; 3972 pgoff_t last_index; 3973 unsigned pg_offset = offset & ~PAGE_CACHE_MASK; 3974 ssize_t ret = 0; 3975 3976 key.objectid = sctx->cur_ino; 3977 key.type = BTRFS_INODE_ITEM_KEY; 3978 key.offset = 0; 3979 3980 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3981 if (IS_ERR(inode)) 3982 return PTR_ERR(inode); 3983 3984 if (offset + len > i_size_read(inode)) { 3985 if (offset > i_size_read(inode)) 3986 len = 0; 3987 else 3988 len = offset - i_size_read(inode); 3989 } 3990 if (len == 0) 3991 goto out; 3992 3993 last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; 3994 while (index <= last_index) { 3995 unsigned cur_len = min_t(unsigned, len, 3996 PAGE_CACHE_SIZE - pg_offset); 3997 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 3998 if (!page) { 3999 ret = -ENOMEM; 4000 break; 4001 } 4002 4003 if (!PageUptodate(page)) { 4004 btrfs_readpage(NULL, page); 4005 lock_page(page); 4006 if (!PageUptodate(page)) { 4007 unlock_page(page); 4008 page_cache_release(page); 4009 ret = -EIO; 4010 break; 4011 } 4012 } 4013 4014 addr = kmap(page); 4015 memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len); 4016 kunmap(page); 4017 unlock_page(page); 4018 page_cache_release(page); 4019 index++; 4020 pg_offset = 0; 4021 len -= cur_len; 4022 ret += cur_len; 4023 } 4024 out: 4025 iput(inode); 4026 return ret; 4027 } 4028 4029 /* 4030 * Read some bytes from the current inode/file and send a write command to 4031 * user space. 4032 */ 4033 static int send_write(struct send_ctx *sctx, u64 offset, u32 len) 4034 { 4035 int ret = 0; 4036 struct fs_path *p; 4037 ssize_t num_read = 0; 4038 4039 p = fs_path_alloc(); 4040 if (!p) 4041 return -ENOMEM; 4042 4043 verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); 4044 4045 num_read = fill_read_buf(sctx, offset, len); 4046 if (num_read <= 0) { 4047 if (num_read < 0) 4048 ret = num_read; 4049 goto out; 4050 } 4051 4052 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4053 if (ret < 0) 4054 goto out; 4055 4056 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4057 if (ret < 0) 4058 goto out; 4059 4060 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4061 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4062 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, num_read); 4063 4064 ret = send_cmd(sctx); 4065 4066 tlv_put_failure: 4067 out: 4068 fs_path_free(p); 4069 if (ret < 0) 4070 return ret; 4071 return num_read; 4072 } 4073 4074 /* 4075 * Send a clone command to user space. 4076 */ 4077 static int send_clone(struct send_ctx *sctx, 4078 u64 offset, u32 len, 4079 struct clone_root *clone_root) 4080 { 4081 int ret = 0; 4082 struct fs_path *p; 4083 u64 gen; 4084 4085 verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " 4086 "clone_inode=%llu, clone_offset=%llu\n", offset, len, 4087 clone_root->root->objectid, clone_root->ino, 4088 clone_root->offset); 4089 4090 p = fs_path_alloc(); 4091 if (!p) 4092 return -ENOMEM; 4093 4094 ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); 4095 if (ret < 0) 4096 goto out; 4097 4098 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4099 if (ret < 0) 4100 goto out; 4101 4102 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4103 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); 4104 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4105 4106 if (clone_root->root == sctx->send_root) { 4107 ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, 4108 &gen, NULL, NULL, NULL, NULL); 4109 if (ret < 0) 4110 goto out; 4111 ret = get_cur_path(sctx, clone_root->ino, gen, p); 4112 } else { 4113 ret = get_inode_path(clone_root->root, clone_root->ino, p); 4114 } 4115 if (ret < 0) 4116 goto out; 4117 4118 TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, 4119 clone_root->root->root_item.uuid); 4120 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, 4121 le64_to_cpu(clone_root->root->root_item.ctransid)); 4122 TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); 4123 TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, 4124 clone_root->offset); 4125 4126 ret = send_cmd(sctx); 4127 4128 tlv_put_failure: 4129 out: 4130 fs_path_free(p); 4131 return ret; 4132 } 4133 4134 /* 4135 * Send an update extent command to user space. 4136 */ 4137 static int send_update_extent(struct send_ctx *sctx, 4138 u64 offset, u32 len) 4139 { 4140 int ret = 0; 4141 struct fs_path *p; 4142 4143 p = fs_path_alloc(); 4144 if (!p) 4145 return -ENOMEM; 4146 4147 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); 4148 if (ret < 0) 4149 goto out; 4150 4151 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4152 if (ret < 0) 4153 goto out; 4154 4155 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4156 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4157 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); 4158 4159 ret = send_cmd(sctx); 4160 4161 tlv_put_failure: 4162 out: 4163 fs_path_free(p); 4164 return ret; 4165 } 4166 4167 static int send_hole(struct send_ctx *sctx, u64 end) 4168 { 4169 struct fs_path *p = NULL; 4170 u64 offset = sctx->cur_inode_last_extent; 4171 u64 len; 4172 int ret = 0; 4173 4174 p = fs_path_alloc(); 4175 if (!p) 4176 return -ENOMEM; 4177 memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE); 4178 while (offset < end) { 4179 len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE); 4180 4181 ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); 4182 if (ret < 0) 4183 break; 4184 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); 4185 if (ret < 0) 4186 break; 4187 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); 4188 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); 4189 TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, len); 4190 ret = send_cmd(sctx); 4191 if (ret < 0) 4192 break; 4193 offset += len; 4194 } 4195 tlv_put_failure: 4196 fs_path_free(p); 4197 return ret; 4198 } 4199 4200 static int send_write_or_clone(struct send_ctx *sctx, 4201 struct btrfs_path *path, 4202 struct btrfs_key *key, 4203 struct clone_root *clone_root) 4204 { 4205 int ret = 0; 4206 struct btrfs_file_extent_item *ei; 4207 u64 offset = key->offset; 4208 u64 pos = 0; 4209 u64 len; 4210 u32 l; 4211 u8 type; 4212 u64 bs = sctx->send_root->fs_info->sb->s_blocksize; 4213 4214 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4215 struct btrfs_file_extent_item); 4216 type = btrfs_file_extent_type(path->nodes[0], ei); 4217 if (type == BTRFS_FILE_EXTENT_INLINE) { 4218 len = btrfs_file_extent_inline_len(path->nodes[0], 4219 path->slots[0], ei); 4220 /* 4221 * it is possible the inline item won't cover the whole page, 4222 * but there may be items after this page. Make 4223 * sure to send the whole thing 4224 */ 4225 len = PAGE_CACHE_ALIGN(len); 4226 } else { 4227 len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 4228 } 4229 4230 if (offset + len > sctx->cur_inode_size) 4231 len = sctx->cur_inode_size - offset; 4232 if (len == 0) { 4233 ret = 0; 4234 goto out; 4235 } 4236 4237 if (clone_root && IS_ALIGNED(offset + len, bs)) { 4238 ret = send_clone(sctx, offset, len, clone_root); 4239 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { 4240 ret = send_update_extent(sctx, offset, len); 4241 } else { 4242 while (pos < len) { 4243 l = len - pos; 4244 if (l > BTRFS_SEND_READ_SIZE) 4245 l = BTRFS_SEND_READ_SIZE; 4246 ret = send_write(sctx, pos + offset, l); 4247 if (ret < 0) 4248 goto out; 4249 if (!ret) 4250 break; 4251 pos += ret; 4252 } 4253 ret = 0; 4254 } 4255 out: 4256 return ret; 4257 } 4258 4259 static int is_extent_unchanged(struct send_ctx *sctx, 4260 struct btrfs_path *left_path, 4261 struct btrfs_key *ekey) 4262 { 4263 int ret = 0; 4264 struct btrfs_key key; 4265 struct btrfs_path *path = NULL; 4266 struct extent_buffer *eb; 4267 int slot; 4268 struct btrfs_key found_key; 4269 struct btrfs_file_extent_item *ei; 4270 u64 left_disknr; 4271 u64 right_disknr; 4272 u64 left_offset; 4273 u64 right_offset; 4274 u64 left_offset_fixed; 4275 u64 left_len; 4276 u64 right_len; 4277 u64 left_gen; 4278 u64 right_gen; 4279 u8 left_type; 4280 u8 right_type; 4281 4282 path = alloc_path_for_send(); 4283 if (!path) 4284 return -ENOMEM; 4285 4286 eb = left_path->nodes[0]; 4287 slot = left_path->slots[0]; 4288 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4289 left_type = btrfs_file_extent_type(eb, ei); 4290 4291 if (left_type != BTRFS_FILE_EXTENT_REG) { 4292 ret = 0; 4293 goto out; 4294 } 4295 left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4296 left_len = btrfs_file_extent_num_bytes(eb, ei); 4297 left_offset = btrfs_file_extent_offset(eb, ei); 4298 left_gen = btrfs_file_extent_generation(eb, ei); 4299 4300 /* 4301 * Following comments will refer to these graphics. L is the left 4302 * extents which we are checking at the moment. 1-8 are the right 4303 * extents that we iterate. 4304 * 4305 * |-----L-----| 4306 * |-1-|-2a-|-3-|-4-|-5-|-6-| 4307 * 4308 * |-----L-----| 4309 * |--1--|-2b-|...(same as above) 4310 * 4311 * Alternative situation. Happens on files where extents got split. 4312 * |-----L-----| 4313 * |-----------7-----------|-6-| 4314 * 4315 * Alternative situation. Happens on files which got larger. 4316 * |-----L-----| 4317 * |-8-| 4318 * Nothing follows after 8. 4319 */ 4320 4321 key.objectid = ekey->objectid; 4322 key.type = BTRFS_EXTENT_DATA_KEY; 4323 key.offset = ekey->offset; 4324 ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0); 4325 if (ret < 0) 4326 goto out; 4327 if (ret) { 4328 ret = 0; 4329 goto out; 4330 } 4331 4332 /* 4333 * Handle special case where the right side has no extents at all. 4334 */ 4335 eb = path->nodes[0]; 4336 slot = path->slots[0]; 4337 btrfs_item_key_to_cpu(eb, &found_key, slot); 4338 if (found_key.objectid != key.objectid || 4339 found_key.type != key.type) { 4340 /* If we're a hole then just pretend nothing changed */ 4341 ret = (left_disknr) ? 0 : 1; 4342 goto out; 4343 } 4344 4345 /* 4346 * We're now on 2a, 2b or 7. 4347 */ 4348 key = found_key; 4349 while (key.offset < ekey->offset + left_len) { 4350 ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 4351 right_type = btrfs_file_extent_type(eb, ei); 4352 if (right_type != BTRFS_FILE_EXTENT_REG) { 4353 ret = 0; 4354 goto out; 4355 } 4356 4357 right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); 4358 right_len = btrfs_file_extent_num_bytes(eb, ei); 4359 right_offset = btrfs_file_extent_offset(eb, ei); 4360 right_gen = btrfs_file_extent_generation(eb, ei); 4361 4362 /* 4363 * Are we at extent 8? If yes, we know the extent is changed. 4364 * This may only happen on the first iteration. 4365 */ 4366 if (found_key.offset + right_len <= ekey->offset) { 4367 /* If we're a hole just pretend nothing changed */ 4368 ret = (left_disknr) ? 0 : 1; 4369 goto out; 4370 } 4371 4372 left_offset_fixed = left_offset; 4373 if (key.offset < ekey->offset) { 4374 /* Fix the right offset for 2a and 7. */ 4375 right_offset += ekey->offset - key.offset; 4376 } else { 4377 /* Fix the left offset for all behind 2a and 2b */ 4378 left_offset_fixed += key.offset - ekey->offset; 4379 } 4380 4381 /* 4382 * Check if we have the same extent. 4383 */ 4384 if (left_disknr != right_disknr || 4385 left_offset_fixed != right_offset || 4386 left_gen != right_gen) { 4387 ret = 0; 4388 goto out; 4389 } 4390 4391 /* 4392 * Go to the next extent. 4393 */ 4394 ret = btrfs_next_item(sctx->parent_root, path); 4395 if (ret < 0) 4396 goto out; 4397 if (!ret) { 4398 eb = path->nodes[0]; 4399 slot = path->slots[0]; 4400 btrfs_item_key_to_cpu(eb, &found_key, slot); 4401 } 4402 if (ret || found_key.objectid != key.objectid || 4403 found_key.type != key.type) { 4404 key.offset += right_len; 4405 break; 4406 } 4407 if (found_key.offset != key.offset + right_len) { 4408 ret = 0; 4409 goto out; 4410 } 4411 key = found_key; 4412 } 4413 4414 /* 4415 * We're now behind the left extent (treat as unchanged) or at the end 4416 * of the right side (treat as changed). 4417 */ 4418 if (key.offset >= ekey->offset + left_len) 4419 ret = 1; 4420 else 4421 ret = 0; 4422 4423 4424 out: 4425 btrfs_free_path(path); 4426 return ret; 4427 } 4428 4429 static int get_last_extent(struct send_ctx *sctx, u64 offset) 4430 { 4431 struct btrfs_path *path; 4432 struct btrfs_root *root = sctx->send_root; 4433 struct btrfs_file_extent_item *fi; 4434 struct btrfs_key key; 4435 u64 extent_end; 4436 u8 type; 4437 int ret; 4438 4439 path = alloc_path_for_send(); 4440 if (!path) 4441 return -ENOMEM; 4442 4443 sctx->cur_inode_last_extent = 0; 4444 4445 key.objectid = sctx->cur_ino; 4446 key.type = BTRFS_EXTENT_DATA_KEY; 4447 key.offset = offset; 4448 ret = btrfs_search_slot_for_read(root, &key, path, 0, 1); 4449 if (ret < 0) 4450 goto out; 4451 ret = 0; 4452 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 4453 if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) 4454 goto out; 4455 4456 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 4457 struct btrfs_file_extent_item); 4458 type = btrfs_file_extent_type(path->nodes[0], fi); 4459 if (type == BTRFS_FILE_EXTENT_INLINE) { 4460 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 4461 path->slots[0], fi); 4462 extent_end = ALIGN(key.offset + size, 4463 sctx->send_root->sectorsize); 4464 } else { 4465 extent_end = key.offset + 4466 btrfs_file_extent_num_bytes(path->nodes[0], fi); 4467 } 4468 sctx->cur_inode_last_extent = extent_end; 4469 out: 4470 btrfs_free_path(path); 4471 return ret; 4472 } 4473 4474 static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, 4475 struct btrfs_key *key) 4476 { 4477 struct btrfs_file_extent_item *fi; 4478 u64 extent_end; 4479 u8 type; 4480 int ret = 0; 4481 4482 if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) 4483 return 0; 4484 4485 if (sctx->cur_inode_last_extent == (u64)-1) { 4486 ret = get_last_extent(sctx, key->offset - 1); 4487 if (ret) 4488 return ret; 4489 } 4490 4491 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 4492 struct btrfs_file_extent_item); 4493 type = btrfs_file_extent_type(path->nodes[0], fi); 4494 if (type == BTRFS_FILE_EXTENT_INLINE) { 4495 u64 size = btrfs_file_extent_inline_len(path->nodes[0], 4496 path->slots[0], fi); 4497 extent_end = ALIGN(key->offset + size, 4498 sctx->send_root->sectorsize); 4499 } else { 4500 extent_end = key->offset + 4501 btrfs_file_extent_num_bytes(path->nodes[0], fi); 4502 } 4503 4504 if (path->slots[0] == 0 && 4505 sctx->cur_inode_last_extent < key->offset) { 4506 /* 4507 * We might have skipped entire leafs that contained only 4508 * file extent items for our current inode. These leafs have 4509 * a generation number smaller (older) than the one in the 4510 * current leaf and the leaf our last extent came from, and 4511 * are located between these 2 leafs. 4512 */ 4513 ret = get_last_extent(sctx, key->offset - 1); 4514 if (ret) 4515 return ret; 4516 } 4517 4518 if (sctx->cur_inode_last_extent < key->offset) 4519 ret = send_hole(sctx, key->offset); 4520 sctx->cur_inode_last_extent = extent_end; 4521 return ret; 4522 } 4523 4524 static int process_extent(struct send_ctx *sctx, 4525 struct btrfs_path *path, 4526 struct btrfs_key *key) 4527 { 4528 struct clone_root *found_clone = NULL; 4529 int ret = 0; 4530 4531 if (S_ISLNK(sctx->cur_inode_mode)) 4532 return 0; 4533 4534 if (sctx->parent_root && !sctx->cur_inode_new) { 4535 ret = is_extent_unchanged(sctx, path, key); 4536 if (ret < 0) 4537 goto out; 4538 if (ret) { 4539 ret = 0; 4540 goto out_hole; 4541 } 4542 } else { 4543 struct btrfs_file_extent_item *ei; 4544 u8 type; 4545 4546 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4547 struct btrfs_file_extent_item); 4548 type = btrfs_file_extent_type(path->nodes[0], ei); 4549 if (type == BTRFS_FILE_EXTENT_PREALLOC || 4550 type == BTRFS_FILE_EXTENT_REG) { 4551 /* 4552 * The send spec does not have a prealloc command yet, 4553 * so just leave a hole for prealloc'ed extents until 4554 * we have enough commands queued up to justify rev'ing 4555 * the send spec. 4556 */ 4557 if (type == BTRFS_FILE_EXTENT_PREALLOC) { 4558 ret = 0; 4559 goto out; 4560 } 4561 4562 /* Have a hole, just skip it. */ 4563 if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) { 4564 ret = 0; 4565 goto out; 4566 } 4567 } 4568 } 4569 4570 ret = find_extent_clone(sctx, path, key->objectid, key->offset, 4571 sctx->cur_inode_size, &found_clone); 4572 if (ret != -ENOENT && ret < 0) 4573 goto out; 4574 4575 ret = send_write_or_clone(sctx, path, key, found_clone); 4576 if (ret) 4577 goto out; 4578 out_hole: 4579 ret = maybe_send_hole(sctx, path, key); 4580 out: 4581 return ret; 4582 } 4583 4584 static int process_all_extents(struct send_ctx *sctx) 4585 { 4586 int ret; 4587 struct btrfs_root *root; 4588 struct btrfs_path *path; 4589 struct btrfs_key key; 4590 struct btrfs_key found_key; 4591 struct extent_buffer *eb; 4592 int slot; 4593 4594 root = sctx->send_root; 4595 path = alloc_path_for_send(); 4596 if (!path) 4597 return -ENOMEM; 4598 4599 key.objectid = sctx->cmp_key->objectid; 4600 key.type = BTRFS_EXTENT_DATA_KEY; 4601 key.offset = 0; 4602 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4603 if (ret < 0) 4604 goto out; 4605 4606 while (1) { 4607 eb = path->nodes[0]; 4608 slot = path->slots[0]; 4609 4610 if (slot >= btrfs_header_nritems(eb)) { 4611 ret = btrfs_next_leaf(root, path); 4612 if (ret < 0) { 4613 goto out; 4614 } else if (ret > 0) { 4615 ret = 0; 4616 break; 4617 } 4618 continue; 4619 } 4620 4621 btrfs_item_key_to_cpu(eb, &found_key, slot); 4622 4623 if (found_key.objectid != key.objectid || 4624 found_key.type != key.type) { 4625 ret = 0; 4626 goto out; 4627 } 4628 4629 ret = process_extent(sctx, path, &found_key); 4630 if (ret < 0) 4631 goto out; 4632 4633 path->slots[0]++; 4634 } 4635 4636 out: 4637 btrfs_free_path(path); 4638 return ret; 4639 } 4640 4641 static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end, 4642 int *pending_move, 4643 int *refs_processed) 4644 { 4645 int ret = 0; 4646 4647 if (sctx->cur_ino == 0) 4648 goto out; 4649 if (!at_end && sctx->cur_ino == sctx->cmp_key->objectid && 4650 sctx->cmp_key->type <= BTRFS_INODE_EXTREF_KEY) 4651 goto out; 4652 if (list_empty(&sctx->new_refs) && list_empty(&sctx->deleted_refs)) 4653 goto out; 4654 4655 ret = process_recorded_refs(sctx, pending_move); 4656 if (ret < 0) 4657 goto out; 4658 4659 *refs_processed = 1; 4660 out: 4661 return ret; 4662 } 4663 4664 static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) 4665 { 4666 int ret = 0; 4667 u64 left_mode; 4668 u64 left_uid; 4669 u64 left_gid; 4670 u64 right_mode; 4671 u64 right_uid; 4672 u64 right_gid; 4673 int need_chmod = 0; 4674 int need_chown = 0; 4675 int pending_move = 0; 4676 int refs_processed = 0; 4677 4678 ret = process_recorded_refs_if_needed(sctx, at_end, &pending_move, 4679 &refs_processed); 4680 if (ret < 0) 4681 goto out; 4682 4683 /* 4684 * We have processed the refs and thus need to advance send_progress. 4685 * Now, calls to get_cur_xxx will take the updated refs of the current 4686 * inode into account. 4687 * 4688 * On the other hand, if our current inode is a directory and couldn't 4689 * be moved/renamed because its parent was renamed/moved too and it has 4690 * a higher inode number, we can only move/rename our current inode 4691 * after we moved/renamed its parent. Therefore in this case operate on 4692 * the old path (pre move/rename) of our current inode, and the 4693 * move/rename will be performed later. 4694 */ 4695 if (refs_processed && !pending_move) 4696 sctx->send_progress = sctx->cur_ino + 1; 4697 4698 if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) 4699 goto out; 4700 if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino) 4701 goto out; 4702 4703 ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, 4704 &left_mode, &left_uid, &left_gid, NULL); 4705 if (ret < 0) 4706 goto out; 4707 4708 if (!sctx->parent_root || sctx->cur_inode_new) { 4709 need_chown = 1; 4710 if (!S_ISLNK(sctx->cur_inode_mode)) 4711 need_chmod = 1; 4712 } else { 4713 ret = get_inode_info(sctx->parent_root, sctx->cur_ino, 4714 NULL, NULL, &right_mode, &right_uid, 4715 &right_gid, NULL); 4716 if (ret < 0) 4717 goto out; 4718 4719 if (left_uid != right_uid || left_gid != right_gid) 4720 need_chown = 1; 4721 if (!S_ISLNK(sctx->cur_inode_mode) && left_mode != right_mode) 4722 need_chmod = 1; 4723 } 4724 4725 if (S_ISREG(sctx->cur_inode_mode)) { 4726 if (need_send_hole(sctx)) { 4727 if (sctx->cur_inode_last_extent == (u64)-1) { 4728 ret = get_last_extent(sctx, (u64)-1); 4729 if (ret) 4730 goto out; 4731 } 4732 if (sctx->cur_inode_last_extent < 4733 sctx->cur_inode_size) { 4734 ret = send_hole(sctx, sctx->cur_inode_size); 4735 if (ret) 4736 goto out; 4737 } 4738 } 4739 ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, 4740 sctx->cur_inode_size); 4741 if (ret < 0) 4742 goto out; 4743 } 4744 4745 if (need_chown) { 4746 ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, 4747 left_uid, left_gid); 4748 if (ret < 0) 4749 goto out; 4750 } 4751 if (need_chmod) { 4752 ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, 4753 left_mode); 4754 if (ret < 0) 4755 goto out; 4756 } 4757 4758 /* 4759 * If other directory inodes depended on our current directory 4760 * inode's move/rename, now do their move/rename operations. 4761 */ 4762 if (!is_waiting_for_move(sctx, sctx->cur_ino)) { 4763 ret = apply_children_dir_moves(sctx); 4764 if (ret) 4765 goto out; 4766 } 4767 4768 /* 4769 * Need to send that every time, no matter if it actually 4770 * changed between the two trees as we have done changes to 4771 * the inode before. 4772 */ 4773 sctx->send_progress = sctx->cur_ino + 1; 4774 ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); 4775 if (ret < 0) 4776 goto out; 4777 4778 out: 4779 return ret; 4780 } 4781 4782 static int changed_inode(struct send_ctx *sctx, 4783 enum btrfs_compare_tree_result result) 4784 { 4785 int ret = 0; 4786 struct btrfs_key *key = sctx->cmp_key; 4787 struct btrfs_inode_item *left_ii = NULL; 4788 struct btrfs_inode_item *right_ii = NULL; 4789 u64 left_gen = 0; 4790 u64 right_gen = 0; 4791 4792 sctx->cur_ino = key->objectid; 4793 sctx->cur_inode_new_gen = 0; 4794 sctx->cur_inode_last_extent = (u64)-1; 4795 4796 /* 4797 * Set send_progress to current inode. This will tell all get_cur_xxx 4798 * functions that the current inode's refs are not updated yet. Later, 4799 * when process_recorded_refs is finished, it is set to cur_ino + 1. 4800 */ 4801 sctx->send_progress = sctx->cur_ino; 4802 4803 if (result == BTRFS_COMPARE_TREE_NEW || 4804 result == BTRFS_COMPARE_TREE_CHANGED) { 4805 left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], 4806 sctx->left_path->slots[0], 4807 struct btrfs_inode_item); 4808 left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], 4809 left_ii); 4810 } else { 4811 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 4812 sctx->right_path->slots[0], 4813 struct btrfs_inode_item); 4814 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 4815 right_ii); 4816 } 4817 if (result == BTRFS_COMPARE_TREE_CHANGED) { 4818 right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], 4819 sctx->right_path->slots[0], 4820 struct btrfs_inode_item); 4821 4822 right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], 4823 right_ii); 4824 4825 /* 4826 * The cur_ino = root dir case is special here. We can't treat 4827 * the inode as deleted+reused because it would generate a 4828 * stream that tries to delete/mkdir the root dir. 4829 */ 4830 if (left_gen != right_gen && 4831 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 4832 sctx->cur_inode_new_gen = 1; 4833 } 4834 4835 if (result == BTRFS_COMPARE_TREE_NEW) { 4836 sctx->cur_inode_gen = left_gen; 4837 sctx->cur_inode_new = 1; 4838 sctx->cur_inode_deleted = 0; 4839 sctx->cur_inode_size = btrfs_inode_size( 4840 sctx->left_path->nodes[0], left_ii); 4841 sctx->cur_inode_mode = btrfs_inode_mode( 4842 sctx->left_path->nodes[0], left_ii); 4843 if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) 4844 ret = send_create_inode_if_needed(sctx); 4845 } else if (result == BTRFS_COMPARE_TREE_DELETED) { 4846 sctx->cur_inode_gen = right_gen; 4847 sctx->cur_inode_new = 0; 4848 sctx->cur_inode_deleted = 1; 4849 sctx->cur_inode_size = btrfs_inode_size( 4850 sctx->right_path->nodes[0], right_ii); 4851 sctx->cur_inode_mode = btrfs_inode_mode( 4852 sctx->right_path->nodes[0], right_ii); 4853 } else if (result == BTRFS_COMPARE_TREE_CHANGED) { 4854 /* 4855 * We need to do some special handling in case the inode was 4856 * reported as changed with a changed generation number. This 4857 * means that the original inode was deleted and new inode 4858 * reused the same inum. So we have to treat the old inode as 4859 * deleted and the new one as new. 4860 */ 4861 if (sctx->cur_inode_new_gen) { 4862 /* 4863 * First, process the inode as if it was deleted. 4864 */ 4865 sctx->cur_inode_gen = right_gen; 4866 sctx->cur_inode_new = 0; 4867 sctx->cur_inode_deleted = 1; 4868 sctx->cur_inode_size = btrfs_inode_size( 4869 sctx->right_path->nodes[0], right_ii); 4870 sctx->cur_inode_mode = btrfs_inode_mode( 4871 sctx->right_path->nodes[0], right_ii); 4872 ret = process_all_refs(sctx, 4873 BTRFS_COMPARE_TREE_DELETED); 4874 if (ret < 0) 4875 goto out; 4876 4877 /* 4878 * Now process the inode as if it was new. 4879 */ 4880 sctx->cur_inode_gen = left_gen; 4881 sctx->cur_inode_new = 1; 4882 sctx->cur_inode_deleted = 0; 4883 sctx->cur_inode_size = btrfs_inode_size( 4884 sctx->left_path->nodes[0], left_ii); 4885 sctx->cur_inode_mode = btrfs_inode_mode( 4886 sctx->left_path->nodes[0], left_ii); 4887 ret = send_create_inode_if_needed(sctx); 4888 if (ret < 0) 4889 goto out; 4890 4891 ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); 4892 if (ret < 0) 4893 goto out; 4894 /* 4895 * Advance send_progress now as we did not get into 4896 * process_recorded_refs_if_needed in the new_gen case. 4897 */ 4898 sctx->send_progress = sctx->cur_ino + 1; 4899 4900 /* 4901 * Now process all extents and xattrs of the inode as if 4902 * they were all new. 4903 */ 4904 ret = process_all_extents(sctx); 4905 if (ret < 0) 4906 goto out; 4907 ret = process_all_new_xattrs(sctx); 4908 if (ret < 0) 4909 goto out; 4910 } else { 4911 sctx->cur_inode_gen = left_gen; 4912 sctx->cur_inode_new = 0; 4913 sctx->cur_inode_new_gen = 0; 4914 sctx->cur_inode_deleted = 0; 4915 sctx->cur_inode_size = btrfs_inode_size( 4916 sctx->left_path->nodes[0], left_ii); 4917 sctx->cur_inode_mode = btrfs_inode_mode( 4918 sctx->left_path->nodes[0], left_ii); 4919 } 4920 } 4921 4922 out: 4923 return ret; 4924 } 4925 4926 /* 4927 * We have to process new refs before deleted refs, but compare_trees gives us 4928 * the new and deleted refs mixed. To fix this, we record the new/deleted refs 4929 * first and later process them in process_recorded_refs. 4930 * For the cur_inode_new_gen case, we skip recording completely because 4931 * changed_inode did already initiate processing of refs. The reason for this is 4932 * that in this case, compare_tree actually compares the refs of 2 different 4933 * inodes. To fix this, process_all_refs is used in changed_inode to handle all 4934 * refs of the right tree as deleted and all refs of the left tree as new. 4935 */ 4936 static int changed_ref(struct send_ctx *sctx, 4937 enum btrfs_compare_tree_result result) 4938 { 4939 int ret = 0; 4940 4941 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 4942 4943 if (!sctx->cur_inode_new_gen && 4944 sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { 4945 if (result == BTRFS_COMPARE_TREE_NEW) 4946 ret = record_new_ref(sctx); 4947 else if (result == BTRFS_COMPARE_TREE_DELETED) 4948 ret = record_deleted_ref(sctx); 4949 else if (result == BTRFS_COMPARE_TREE_CHANGED) 4950 ret = record_changed_ref(sctx); 4951 } 4952 4953 return ret; 4954 } 4955 4956 /* 4957 * Process new/deleted/changed xattrs. We skip processing in the 4958 * cur_inode_new_gen case because changed_inode did already initiate processing 4959 * of xattrs. The reason is the same as in changed_ref 4960 */ 4961 static int changed_xattr(struct send_ctx *sctx, 4962 enum btrfs_compare_tree_result result) 4963 { 4964 int ret = 0; 4965 4966 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 4967 4968 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 4969 if (result == BTRFS_COMPARE_TREE_NEW) 4970 ret = process_new_xattr(sctx); 4971 else if (result == BTRFS_COMPARE_TREE_DELETED) 4972 ret = process_deleted_xattr(sctx); 4973 else if (result == BTRFS_COMPARE_TREE_CHANGED) 4974 ret = process_changed_xattr(sctx); 4975 } 4976 4977 return ret; 4978 } 4979 4980 /* 4981 * Process new/deleted/changed extents. We skip processing in the 4982 * cur_inode_new_gen case because changed_inode did already initiate processing 4983 * of extents. The reason is the same as in changed_ref 4984 */ 4985 static int changed_extent(struct send_ctx *sctx, 4986 enum btrfs_compare_tree_result result) 4987 { 4988 int ret = 0; 4989 4990 BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); 4991 4992 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 4993 if (result != BTRFS_COMPARE_TREE_DELETED) 4994 ret = process_extent(sctx, sctx->left_path, 4995 sctx->cmp_key); 4996 } 4997 4998 return ret; 4999 } 5000 5001 static int dir_changed(struct send_ctx *sctx, u64 dir) 5002 { 5003 u64 orig_gen, new_gen; 5004 int ret; 5005 5006 ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL, 5007 NULL, NULL); 5008 if (ret) 5009 return ret; 5010 5011 ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL, 5012 NULL, NULL, NULL); 5013 if (ret) 5014 return ret; 5015 5016 return (orig_gen != new_gen) ? 1 : 0; 5017 } 5018 5019 static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path, 5020 struct btrfs_key *key) 5021 { 5022 struct btrfs_inode_extref *extref; 5023 struct extent_buffer *leaf; 5024 u64 dirid = 0, last_dirid = 0; 5025 unsigned long ptr; 5026 u32 item_size; 5027 u32 cur_offset = 0; 5028 int ref_name_len; 5029 int ret = 0; 5030 5031 /* Easy case, just check this one dirid */ 5032 if (key->type == BTRFS_INODE_REF_KEY) { 5033 dirid = key->offset; 5034 5035 ret = dir_changed(sctx, dirid); 5036 goto out; 5037 } 5038 5039 leaf = path->nodes[0]; 5040 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 5041 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 5042 while (cur_offset < item_size) { 5043 extref = (struct btrfs_inode_extref *)(ptr + 5044 cur_offset); 5045 dirid = btrfs_inode_extref_parent(leaf, extref); 5046 ref_name_len = btrfs_inode_extref_name_len(leaf, extref); 5047 cur_offset += ref_name_len + sizeof(*extref); 5048 if (dirid == last_dirid) 5049 continue; 5050 ret = dir_changed(sctx, dirid); 5051 if (ret) 5052 break; 5053 last_dirid = dirid; 5054 } 5055 out: 5056 return ret; 5057 } 5058 5059 /* 5060 * Updates compare related fields in sctx and simply forwards to the actual 5061 * changed_xxx functions. 5062 */ 5063 static int changed_cb(struct btrfs_root *left_root, 5064 struct btrfs_root *right_root, 5065 struct btrfs_path *left_path, 5066 struct btrfs_path *right_path, 5067 struct btrfs_key *key, 5068 enum btrfs_compare_tree_result result, 5069 void *ctx) 5070 { 5071 int ret = 0; 5072 struct send_ctx *sctx = ctx; 5073 5074 if (result == BTRFS_COMPARE_TREE_SAME) { 5075 if (key->type == BTRFS_INODE_REF_KEY || 5076 key->type == BTRFS_INODE_EXTREF_KEY) { 5077 ret = compare_refs(sctx, left_path, key); 5078 if (!ret) 5079 return 0; 5080 if (ret < 0) 5081 return ret; 5082 } else if (key->type == BTRFS_EXTENT_DATA_KEY) { 5083 return maybe_send_hole(sctx, left_path, key); 5084 } else { 5085 return 0; 5086 } 5087 result = BTRFS_COMPARE_TREE_CHANGED; 5088 ret = 0; 5089 } 5090 5091 sctx->left_path = left_path; 5092 sctx->right_path = right_path; 5093 sctx->cmp_key = key; 5094 5095 ret = finish_inode_if_needed(sctx, 0); 5096 if (ret < 0) 5097 goto out; 5098 5099 /* Ignore non-FS objects */ 5100 if (key->objectid == BTRFS_FREE_INO_OBJECTID || 5101 key->objectid == BTRFS_FREE_SPACE_OBJECTID) 5102 goto out; 5103 5104 if (key->type == BTRFS_INODE_ITEM_KEY) 5105 ret = changed_inode(sctx, result); 5106 else if (key->type == BTRFS_INODE_REF_KEY || 5107 key->type == BTRFS_INODE_EXTREF_KEY) 5108 ret = changed_ref(sctx, result); 5109 else if (key->type == BTRFS_XATTR_ITEM_KEY) 5110 ret = changed_xattr(sctx, result); 5111 else if (key->type == BTRFS_EXTENT_DATA_KEY) 5112 ret = changed_extent(sctx, result); 5113 5114 out: 5115 return ret; 5116 } 5117 5118 static int full_send_tree(struct send_ctx *sctx) 5119 { 5120 int ret; 5121 struct btrfs_root *send_root = sctx->send_root; 5122 struct btrfs_key key; 5123 struct btrfs_key found_key; 5124 struct btrfs_path *path; 5125 struct extent_buffer *eb; 5126 int slot; 5127 u64 start_ctransid; 5128 u64 ctransid; 5129 5130 path = alloc_path_for_send(); 5131 if (!path) 5132 return -ENOMEM; 5133 5134 spin_lock(&send_root->root_item_lock); 5135 start_ctransid = btrfs_root_ctransid(&send_root->root_item); 5136 spin_unlock(&send_root->root_item_lock); 5137 5138 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 5139 key.type = BTRFS_INODE_ITEM_KEY; 5140 key.offset = 0; 5141 5142 /* 5143 * Make sure the tree has not changed after re-joining. We detect this 5144 * by comparing start_ctransid and ctransid. They should always match. 5145 */ 5146 spin_lock(&send_root->root_item_lock); 5147 ctransid = btrfs_root_ctransid(&send_root->root_item); 5148 spin_unlock(&send_root->root_item_lock); 5149 5150 if (ctransid != start_ctransid) { 5151 WARN(1, KERN_WARNING "BTRFS: the root that you're trying to " 5152 "send was modified in between. This is " 5153 "probably a bug.\n"); 5154 ret = -EIO; 5155 goto out; 5156 } 5157 5158 ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0); 5159 if (ret < 0) 5160 goto out; 5161 if (ret) 5162 goto out_finish; 5163 5164 while (1) { 5165 eb = path->nodes[0]; 5166 slot = path->slots[0]; 5167 btrfs_item_key_to_cpu(eb, &found_key, slot); 5168 5169 ret = changed_cb(send_root, NULL, path, NULL, 5170 &found_key, BTRFS_COMPARE_TREE_NEW, sctx); 5171 if (ret < 0) 5172 goto out; 5173 5174 key.objectid = found_key.objectid; 5175 key.type = found_key.type; 5176 key.offset = found_key.offset + 1; 5177 5178 ret = btrfs_next_item(send_root, path); 5179 if (ret < 0) 5180 goto out; 5181 if (ret) { 5182 ret = 0; 5183 break; 5184 } 5185 } 5186 5187 out_finish: 5188 ret = finish_inode_if_needed(sctx, 1); 5189 5190 out: 5191 btrfs_free_path(path); 5192 return ret; 5193 } 5194 5195 static int send_subvol(struct send_ctx *sctx) 5196 { 5197 int ret; 5198 5199 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) { 5200 ret = send_header(sctx); 5201 if (ret < 0) 5202 goto out; 5203 } 5204 5205 ret = send_subvol_begin(sctx); 5206 if (ret < 0) 5207 goto out; 5208 5209 if (sctx->parent_root) { 5210 ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, 5211 changed_cb, sctx); 5212 if (ret < 0) 5213 goto out; 5214 ret = finish_inode_if_needed(sctx, 1); 5215 if (ret < 0) 5216 goto out; 5217 } else { 5218 ret = full_send_tree(sctx); 5219 if (ret < 0) 5220 goto out; 5221 } 5222 5223 out: 5224 free_recorded_refs(sctx); 5225 return ret; 5226 } 5227 5228 static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) 5229 { 5230 spin_lock(&root->root_item_lock); 5231 root->send_in_progress--; 5232 /* 5233 * Not much left to do, we don't know why it's unbalanced and 5234 * can't blindly reset it to 0. 5235 */ 5236 if (root->send_in_progress < 0) 5237 btrfs_err(root->fs_info, 5238 "send_in_progres unbalanced %d root %llu\n", 5239 root->send_in_progress, root->root_key.objectid); 5240 spin_unlock(&root->root_item_lock); 5241 } 5242 5243 long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) 5244 { 5245 int ret = 0; 5246 struct btrfs_root *send_root; 5247 struct btrfs_root *clone_root; 5248 struct btrfs_fs_info *fs_info; 5249 struct btrfs_ioctl_send_args *arg = NULL; 5250 struct btrfs_key key; 5251 struct send_ctx *sctx = NULL; 5252 u32 i; 5253 u64 *clone_sources_tmp = NULL; 5254 int clone_sources_to_rollback = 0; 5255 int sort_clone_roots = 0; 5256 int index; 5257 5258 if (!capable(CAP_SYS_ADMIN)) 5259 return -EPERM; 5260 5261 send_root = BTRFS_I(file_inode(mnt_file))->root; 5262 fs_info = send_root->fs_info; 5263 5264 /* 5265 * The subvolume must remain read-only during send, protect against 5266 * making it RW. 5267 */ 5268 spin_lock(&send_root->root_item_lock); 5269 send_root->send_in_progress++; 5270 spin_unlock(&send_root->root_item_lock); 5271 5272 /* 5273 * This is done when we lookup the root, it should already be complete 5274 * by the time we get here. 5275 */ 5276 WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); 5277 5278 /* 5279 * Userspace tools do the checks and warn the user if it's 5280 * not RO. 5281 */ 5282 if (!btrfs_root_readonly(send_root)) { 5283 ret = -EPERM; 5284 goto out; 5285 } 5286 5287 arg = memdup_user(arg_, sizeof(*arg)); 5288 if (IS_ERR(arg)) { 5289 ret = PTR_ERR(arg); 5290 arg = NULL; 5291 goto out; 5292 } 5293 5294 if (!access_ok(VERIFY_READ, arg->clone_sources, 5295 sizeof(*arg->clone_sources) * 5296 arg->clone_sources_count)) { 5297 ret = -EFAULT; 5298 goto out; 5299 } 5300 5301 if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { 5302 ret = -EINVAL; 5303 goto out; 5304 } 5305 5306 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); 5307 if (!sctx) { 5308 ret = -ENOMEM; 5309 goto out; 5310 } 5311 5312 INIT_LIST_HEAD(&sctx->new_refs); 5313 INIT_LIST_HEAD(&sctx->deleted_refs); 5314 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); 5315 INIT_LIST_HEAD(&sctx->name_cache_list); 5316 5317 sctx->flags = arg->flags; 5318 5319 sctx->send_filp = fget(arg->send_fd); 5320 if (!sctx->send_filp) { 5321 ret = -EBADF; 5322 goto out; 5323 } 5324 5325 sctx->send_root = send_root; 5326 sctx->clone_roots_cnt = arg->clone_sources_count; 5327 5328 sctx->send_max_size = BTRFS_SEND_BUF_SIZE; 5329 sctx->send_buf = vmalloc(sctx->send_max_size); 5330 if (!sctx->send_buf) { 5331 ret = -ENOMEM; 5332 goto out; 5333 } 5334 5335 sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); 5336 if (!sctx->read_buf) { 5337 ret = -ENOMEM; 5338 goto out; 5339 } 5340 5341 sctx->pending_dir_moves = RB_ROOT; 5342 sctx->waiting_dir_moves = RB_ROOT; 5343 5344 sctx->clone_roots = vzalloc(sizeof(struct clone_root) * 5345 (arg->clone_sources_count + 1)); 5346 if (!sctx->clone_roots) { 5347 ret = -ENOMEM; 5348 goto out; 5349 } 5350 5351 if (arg->clone_sources_count) { 5352 clone_sources_tmp = vmalloc(arg->clone_sources_count * 5353 sizeof(*arg->clone_sources)); 5354 if (!clone_sources_tmp) { 5355 ret = -ENOMEM; 5356 goto out; 5357 } 5358 5359 ret = copy_from_user(clone_sources_tmp, arg->clone_sources, 5360 arg->clone_sources_count * 5361 sizeof(*arg->clone_sources)); 5362 if (ret) { 5363 ret = -EFAULT; 5364 goto out; 5365 } 5366 5367 for (i = 0; i < arg->clone_sources_count; i++) { 5368 key.objectid = clone_sources_tmp[i]; 5369 key.type = BTRFS_ROOT_ITEM_KEY; 5370 key.offset = (u64)-1; 5371 5372 index = srcu_read_lock(&fs_info->subvol_srcu); 5373 5374 clone_root = btrfs_read_fs_root_no_name(fs_info, &key); 5375 if (IS_ERR(clone_root)) { 5376 srcu_read_unlock(&fs_info->subvol_srcu, index); 5377 ret = PTR_ERR(clone_root); 5378 goto out; 5379 } 5380 clone_sources_to_rollback = i + 1; 5381 spin_lock(&clone_root->root_item_lock); 5382 clone_root->send_in_progress++; 5383 if (!btrfs_root_readonly(clone_root)) { 5384 spin_unlock(&clone_root->root_item_lock); 5385 srcu_read_unlock(&fs_info->subvol_srcu, index); 5386 ret = -EPERM; 5387 goto out; 5388 } 5389 spin_unlock(&clone_root->root_item_lock); 5390 srcu_read_unlock(&fs_info->subvol_srcu, index); 5391 5392 sctx->clone_roots[i].root = clone_root; 5393 } 5394 vfree(clone_sources_tmp); 5395 clone_sources_tmp = NULL; 5396 } 5397 5398 if (arg->parent_root) { 5399 key.objectid = arg->parent_root; 5400 key.type = BTRFS_ROOT_ITEM_KEY; 5401 key.offset = (u64)-1; 5402 5403 index = srcu_read_lock(&fs_info->subvol_srcu); 5404 5405 sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); 5406 if (IS_ERR(sctx->parent_root)) { 5407 srcu_read_unlock(&fs_info->subvol_srcu, index); 5408 ret = PTR_ERR(sctx->parent_root); 5409 goto out; 5410 } 5411 5412 spin_lock(&sctx->parent_root->root_item_lock); 5413 sctx->parent_root->send_in_progress++; 5414 if (!btrfs_root_readonly(sctx->parent_root)) { 5415 spin_unlock(&sctx->parent_root->root_item_lock); 5416 srcu_read_unlock(&fs_info->subvol_srcu, index); 5417 ret = -EPERM; 5418 goto out; 5419 } 5420 spin_unlock(&sctx->parent_root->root_item_lock); 5421 5422 srcu_read_unlock(&fs_info->subvol_srcu, index); 5423 } 5424 5425 /* 5426 * Clones from send_root are allowed, but only if the clone source 5427 * is behind the current send position. This is checked while searching 5428 * for possible clone sources. 5429 */ 5430 sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; 5431 5432 /* We do a bsearch later */ 5433 sort(sctx->clone_roots, sctx->clone_roots_cnt, 5434 sizeof(*sctx->clone_roots), __clone_root_cmp_sort, 5435 NULL); 5436 sort_clone_roots = 1; 5437 5438 ret = send_subvol(sctx); 5439 if (ret < 0) 5440 goto out; 5441 5442 if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) { 5443 ret = begin_cmd(sctx, BTRFS_SEND_C_END); 5444 if (ret < 0) 5445 goto out; 5446 ret = send_cmd(sctx); 5447 if (ret < 0) 5448 goto out; 5449 } 5450 5451 out: 5452 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)); 5453 while (sctx && !RB_EMPTY_ROOT(&sctx->pending_dir_moves)) { 5454 struct rb_node *n; 5455 struct pending_dir_move *pm; 5456 5457 n = rb_first(&sctx->pending_dir_moves); 5458 pm = rb_entry(n, struct pending_dir_move, node); 5459 while (!list_empty(&pm->list)) { 5460 struct pending_dir_move *pm2; 5461 5462 pm2 = list_first_entry(&pm->list, 5463 struct pending_dir_move, list); 5464 free_pending_move(sctx, pm2); 5465 } 5466 free_pending_move(sctx, pm); 5467 } 5468 5469 WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)); 5470 while (sctx && !RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) { 5471 struct rb_node *n; 5472 struct waiting_dir_move *dm; 5473 5474 n = rb_first(&sctx->waiting_dir_moves); 5475 dm = rb_entry(n, struct waiting_dir_move, node); 5476 rb_erase(&dm->node, &sctx->waiting_dir_moves); 5477 kfree(dm); 5478 } 5479 5480 if (sort_clone_roots) { 5481 for (i = 0; i < sctx->clone_roots_cnt; i++) 5482 btrfs_root_dec_send_in_progress( 5483 sctx->clone_roots[i].root); 5484 } else { 5485 for (i = 0; sctx && i < clone_sources_to_rollback; i++) 5486 btrfs_root_dec_send_in_progress( 5487 sctx->clone_roots[i].root); 5488 5489 btrfs_root_dec_send_in_progress(send_root); 5490 } 5491 if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) 5492 btrfs_root_dec_send_in_progress(sctx->parent_root); 5493 5494 kfree(arg); 5495 vfree(clone_sources_tmp); 5496 5497 if (sctx) { 5498 if (sctx->send_filp) 5499 fput(sctx->send_filp); 5500 5501 vfree(sctx->clone_roots); 5502 vfree(sctx->send_buf); 5503 vfree(sctx->read_buf); 5504 5505 name_cache_free(sctx); 5506 5507 kfree(sctx); 5508 } 5509 5510 return ret; 5511 } 5512