1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2011 STRATO. All rights reserved. 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/rbtree.h> 8 #include <trace/events/btrfs.h> 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "backref.h" 12 #include "ulist.h" 13 #include "transaction.h" 14 #include "delayed-ref.h" 15 #include "locking.h" 16 #include "misc.h" 17 #include "tree-mod-log.h" 18 19 /* Just an arbitrary number so we can be sure this happened */ 20 #define BACKREF_FOUND_SHARED 6 21 22 struct extent_inode_elem { 23 u64 inum; 24 u64 offset; 25 struct extent_inode_elem *next; 26 }; 27 28 static int check_extent_in_eb(const struct btrfs_key *key, 29 const struct extent_buffer *eb, 30 const struct btrfs_file_extent_item *fi, 31 u64 extent_item_pos, 32 struct extent_inode_elem **eie, 33 bool ignore_offset) 34 { 35 u64 offset = 0; 36 struct extent_inode_elem *e; 37 38 if (!ignore_offset && 39 !btrfs_file_extent_compression(eb, fi) && 40 !btrfs_file_extent_encryption(eb, fi) && 41 !btrfs_file_extent_other_encoding(eb, fi)) { 42 u64 data_offset; 43 u64 data_len; 44 45 data_offset = btrfs_file_extent_offset(eb, fi); 46 data_len = btrfs_file_extent_num_bytes(eb, fi); 47 48 if (extent_item_pos < data_offset || 49 extent_item_pos >= data_offset + data_len) 50 return 1; 51 offset = extent_item_pos - data_offset; 52 } 53 54 e = kmalloc(sizeof(*e), GFP_NOFS); 55 if (!e) 56 return -ENOMEM; 57 58 e->next = *eie; 59 e->inum = key->objectid; 60 e->offset = key->offset + offset; 61 *eie = e; 62 63 return 0; 64 } 65 66 static void free_inode_elem_list(struct extent_inode_elem *eie) 67 { 68 struct extent_inode_elem *eie_next; 69 70 for (; eie; eie = eie_next) { 71 eie_next = eie->next; 72 kfree(eie); 73 } 74 } 75 76 static int find_extent_in_eb(const struct extent_buffer *eb, 77 u64 wanted_disk_byte, u64 extent_item_pos, 78 struct extent_inode_elem **eie, 79 bool ignore_offset) 80 { 81 u64 disk_byte; 82 struct btrfs_key key; 83 struct btrfs_file_extent_item *fi; 84 int slot; 85 int nritems; 86 int extent_type; 87 int ret; 88 89 /* 90 * from the shared data ref, we only have the leaf but we need 91 * the key. thus, we must look into all items and see that we 92 * find one (some) with a reference to our extent item. 93 */ 94 nritems = btrfs_header_nritems(eb); 95 for (slot = 0; slot < nritems; ++slot) { 96 btrfs_item_key_to_cpu(eb, &key, slot); 97 if (key.type != BTRFS_EXTENT_DATA_KEY) 98 continue; 99 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 100 extent_type = btrfs_file_extent_type(eb, fi); 101 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 102 continue; 103 /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ 104 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 105 if (disk_byte != wanted_disk_byte) 106 continue; 107 108 ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset); 109 if (ret < 0) 110 return ret; 111 } 112 113 return 0; 114 } 115 116 struct preftree { 117 struct rb_root_cached root; 118 unsigned int count; 119 }; 120 121 #define PREFTREE_INIT { .root = RB_ROOT_CACHED, .count = 0 } 122 123 struct preftrees { 124 struct preftree direct; /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */ 125 struct preftree indirect; /* BTRFS_[TREE_BLOCK|EXTENT_DATA]_REF_KEY */ 126 struct preftree indirect_missing_keys; 127 }; 128 129 /* 130 * Checks for a shared extent during backref search. 131 * 132 * The share_count tracks prelim_refs (direct and indirect) having a 133 * ref->count >0: 134 * - incremented when a ref->count transitions to >0 135 * - decremented when a ref->count transitions to <1 136 */ 137 struct share_check { 138 u64 root_objectid; 139 u64 inum; 140 int share_count; 141 bool have_delayed_delete_refs; 142 }; 143 144 static inline int extent_is_shared(struct share_check *sc) 145 { 146 return (sc && sc->share_count > 1) ? BACKREF_FOUND_SHARED : 0; 147 } 148 149 static struct kmem_cache *btrfs_prelim_ref_cache; 150 151 int __init btrfs_prelim_ref_init(void) 152 { 153 btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", 154 sizeof(struct prelim_ref), 155 0, 156 SLAB_MEM_SPREAD, 157 NULL); 158 if (!btrfs_prelim_ref_cache) 159 return -ENOMEM; 160 return 0; 161 } 162 163 void __cold btrfs_prelim_ref_exit(void) 164 { 165 kmem_cache_destroy(btrfs_prelim_ref_cache); 166 } 167 168 static void free_pref(struct prelim_ref *ref) 169 { 170 kmem_cache_free(btrfs_prelim_ref_cache, ref); 171 } 172 173 /* 174 * Return 0 when both refs are for the same block (and can be merged). 175 * A -1 return indicates ref1 is a 'lower' block than ref2, while 1 176 * indicates a 'higher' block. 177 */ 178 static int prelim_ref_compare(struct prelim_ref *ref1, 179 struct prelim_ref *ref2) 180 { 181 if (ref1->level < ref2->level) 182 return -1; 183 if (ref1->level > ref2->level) 184 return 1; 185 if (ref1->root_id < ref2->root_id) 186 return -1; 187 if (ref1->root_id > ref2->root_id) 188 return 1; 189 if (ref1->key_for_search.type < ref2->key_for_search.type) 190 return -1; 191 if (ref1->key_for_search.type > ref2->key_for_search.type) 192 return 1; 193 if (ref1->key_for_search.objectid < ref2->key_for_search.objectid) 194 return -1; 195 if (ref1->key_for_search.objectid > ref2->key_for_search.objectid) 196 return 1; 197 if (ref1->key_for_search.offset < ref2->key_for_search.offset) 198 return -1; 199 if (ref1->key_for_search.offset > ref2->key_for_search.offset) 200 return 1; 201 if (ref1->parent < ref2->parent) 202 return -1; 203 if (ref1->parent > ref2->parent) 204 return 1; 205 206 return 0; 207 } 208 209 static void update_share_count(struct share_check *sc, int oldcount, 210 int newcount) 211 { 212 if ((!sc) || (oldcount == 0 && newcount < 1)) 213 return; 214 215 if (oldcount > 0 && newcount < 1) 216 sc->share_count--; 217 else if (oldcount < 1 && newcount > 0) 218 sc->share_count++; 219 } 220 221 /* 222 * Add @newref to the @root rbtree, merging identical refs. 223 * 224 * Callers should assume that newref has been freed after calling. 225 */ 226 static void prelim_ref_insert(const struct btrfs_fs_info *fs_info, 227 struct preftree *preftree, 228 struct prelim_ref *newref, 229 struct share_check *sc) 230 { 231 struct rb_root_cached *root; 232 struct rb_node **p; 233 struct rb_node *parent = NULL; 234 struct prelim_ref *ref; 235 int result; 236 bool leftmost = true; 237 238 root = &preftree->root; 239 p = &root->rb_root.rb_node; 240 241 while (*p) { 242 parent = *p; 243 ref = rb_entry(parent, struct prelim_ref, rbnode); 244 result = prelim_ref_compare(ref, newref); 245 if (result < 0) { 246 p = &(*p)->rb_left; 247 } else if (result > 0) { 248 p = &(*p)->rb_right; 249 leftmost = false; 250 } else { 251 /* Identical refs, merge them and free @newref */ 252 struct extent_inode_elem *eie = ref->inode_list; 253 254 while (eie && eie->next) 255 eie = eie->next; 256 257 if (!eie) 258 ref->inode_list = newref->inode_list; 259 else 260 eie->next = newref->inode_list; 261 trace_btrfs_prelim_ref_merge(fs_info, ref, newref, 262 preftree->count); 263 /* 264 * A delayed ref can have newref->count < 0. 265 * The ref->count is updated to follow any 266 * BTRFS_[ADD|DROP]_DELAYED_REF actions. 267 */ 268 update_share_count(sc, ref->count, 269 ref->count + newref->count); 270 ref->count += newref->count; 271 free_pref(newref); 272 return; 273 } 274 } 275 276 update_share_count(sc, 0, newref->count); 277 preftree->count++; 278 trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count); 279 rb_link_node(&newref->rbnode, parent, p); 280 rb_insert_color_cached(&newref->rbnode, root, leftmost); 281 } 282 283 /* 284 * Release the entire tree. We don't care about internal consistency so 285 * just free everything and then reset the tree root. 286 */ 287 static void prelim_release(struct preftree *preftree) 288 { 289 struct prelim_ref *ref, *next_ref; 290 291 rbtree_postorder_for_each_entry_safe(ref, next_ref, 292 &preftree->root.rb_root, rbnode) { 293 free_inode_elem_list(ref->inode_list); 294 free_pref(ref); 295 } 296 297 preftree->root = RB_ROOT_CACHED; 298 preftree->count = 0; 299 } 300 301 /* 302 * the rules for all callers of this function are: 303 * - obtaining the parent is the goal 304 * - if you add a key, you must know that it is a correct key 305 * - if you cannot add the parent or a correct key, then we will look into the 306 * block later to set a correct key 307 * 308 * delayed refs 309 * ============ 310 * backref type | shared | indirect | shared | indirect 311 * information | tree | tree | data | data 312 * --------------------+--------+----------+--------+---------- 313 * parent logical | y | - | - | - 314 * key to resolve | - | y | y | y 315 * tree block logical | - | - | - | - 316 * root for resolving | y | y | y | y 317 * 318 * - column 1: we've the parent -> done 319 * - column 2, 3, 4: we use the key to find the parent 320 * 321 * on disk refs (inline or keyed) 322 * ============================== 323 * backref type | shared | indirect | shared | indirect 324 * information | tree | tree | data | data 325 * --------------------+--------+----------+--------+---------- 326 * parent logical | y | - | y | - 327 * key to resolve | - | - | - | y 328 * tree block logical | y | y | y | y 329 * root for resolving | - | y | y | y 330 * 331 * - column 1, 3: we've the parent -> done 332 * - column 2: we take the first key from the block to find the parent 333 * (see add_missing_keys) 334 * - column 4: we use the key to find the parent 335 * 336 * additional information that's available but not required to find the parent 337 * block might help in merging entries to gain some speed. 338 */ 339 static int add_prelim_ref(const struct btrfs_fs_info *fs_info, 340 struct preftree *preftree, u64 root_id, 341 const struct btrfs_key *key, int level, u64 parent, 342 u64 wanted_disk_byte, int count, 343 struct share_check *sc, gfp_t gfp_mask) 344 { 345 struct prelim_ref *ref; 346 347 if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) 348 return 0; 349 350 ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); 351 if (!ref) 352 return -ENOMEM; 353 354 ref->root_id = root_id; 355 if (key) 356 ref->key_for_search = *key; 357 else 358 memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); 359 360 ref->inode_list = NULL; 361 ref->level = level; 362 ref->count = count; 363 ref->parent = parent; 364 ref->wanted_disk_byte = wanted_disk_byte; 365 prelim_ref_insert(fs_info, preftree, ref, sc); 366 return extent_is_shared(sc); 367 } 368 369 /* direct refs use root == 0, key == NULL */ 370 static int add_direct_ref(const struct btrfs_fs_info *fs_info, 371 struct preftrees *preftrees, int level, u64 parent, 372 u64 wanted_disk_byte, int count, 373 struct share_check *sc, gfp_t gfp_mask) 374 { 375 return add_prelim_ref(fs_info, &preftrees->direct, 0, NULL, level, 376 parent, wanted_disk_byte, count, sc, gfp_mask); 377 } 378 379 /* indirect refs use parent == 0 */ 380 static int add_indirect_ref(const struct btrfs_fs_info *fs_info, 381 struct preftrees *preftrees, u64 root_id, 382 const struct btrfs_key *key, int level, 383 u64 wanted_disk_byte, int count, 384 struct share_check *sc, gfp_t gfp_mask) 385 { 386 struct preftree *tree = &preftrees->indirect; 387 388 if (!key) 389 tree = &preftrees->indirect_missing_keys; 390 return add_prelim_ref(fs_info, tree, root_id, key, level, 0, 391 wanted_disk_byte, count, sc, gfp_mask); 392 } 393 394 static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) 395 { 396 struct rb_node **p = &preftrees->direct.root.rb_root.rb_node; 397 struct rb_node *parent = NULL; 398 struct prelim_ref *ref = NULL; 399 struct prelim_ref target = {}; 400 int result; 401 402 target.parent = bytenr; 403 404 while (*p) { 405 parent = *p; 406 ref = rb_entry(parent, struct prelim_ref, rbnode); 407 result = prelim_ref_compare(ref, &target); 408 409 if (result < 0) 410 p = &(*p)->rb_left; 411 else if (result > 0) 412 p = &(*p)->rb_right; 413 else 414 return 1; 415 } 416 return 0; 417 } 418 419 static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 420 struct ulist *parents, 421 struct preftrees *preftrees, struct prelim_ref *ref, 422 int level, u64 time_seq, const u64 *extent_item_pos, 423 bool ignore_offset) 424 { 425 int ret = 0; 426 int slot; 427 struct extent_buffer *eb; 428 struct btrfs_key key; 429 struct btrfs_key *key_for_search = &ref->key_for_search; 430 struct btrfs_file_extent_item *fi; 431 struct extent_inode_elem *eie = NULL, *old = NULL; 432 u64 disk_byte; 433 u64 wanted_disk_byte = ref->wanted_disk_byte; 434 u64 count = 0; 435 u64 data_offset; 436 437 if (level != 0) { 438 eb = path->nodes[level]; 439 ret = ulist_add(parents, eb->start, 0, GFP_NOFS); 440 if (ret < 0) 441 return ret; 442 return 0; 443 } 444 445 /* 446 * 1. We normally enter this function with the path already pointing to 447 * the first item to check. But sometimes, we may enter it with 448 * slot == nritems. 449 * 2. We are searching for normal backref but bytenr of this leaf 450 * matches shared data backref 451 * 3. The leaf owner is not equal to the root we are searching 452 * 453 * For these cases, go to the next leaf before we continue. 454 */ 455 eb = path->nodes[0]; 456 if (path->slots[0] >= btrfs_header_nritems(eb) || 457 is_shared_data_backref(preftrees, eb->start) || 458 ref->root_id != btrfs_header_owner(eb)) { 459 if (time_seq == BTRFS_SEQ_LAST) 460 ret = btrfs_next_leaf(root, path); 461 else 462 ret = btrfs_next_old_leaf(root, path, time_seq); 463 } 464 465 while (!ret && count < ref->count) { 466 eb = path->nodes[0]; 467 slot = path->slots[0]; 468 469 btrfs_item_key_to_cpu(eb, &key, slot); 470 471 if (key.objectid != key_for_search->objectid || 472 key.type != BTRFS_EXTENT_DATA_KEY) 473 break; 474 475 /* 476 * We are searching for normal backref but bytenr of this leaf 477 * matches shared data backref, OR 478 * the leaf owner is not equal to the root we are searching for 479 */ 480 if (slot == 0 && 481 (is_shared_data_backref(preftrees, eb->start) || 482 ref->root_id != btrfs_header_owner(eb))) { 483 if (time_seq == BTRFS_SEQ_LAST) 484 ret = btrfs_next_leaf(root, path); 485 else 486 ret = btrfs_next_old_leaf(root, path, time_seq); 487 continue; 488 } 489 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 490 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 491 data_offset = btrfs_file_extent_offset(eb, fi); 492 493 if (disk_byte == wanted_disk_byte) { 494 eie = NULL; 495 old = NULL; 496 if (ref->key_for_search.offset == key.offset - data_offset) 497 count++; 498 else 499 goto next; 500 if (extent_item_pos) { 501 ret = check_extent_in_eb(&key, eb, fi, 502 *extent_item_pos, 503 &eie, ignore_offset); 504 if (ret < 0) 505 break; 506 } 507 if (ret > 0) 508 goto next; 509 ret = ulist_add_merge_ptr(parents, eb->start, 510 eie, (void **)&old, GFP_NOFS); 511 if (ret < 0) 512 break; 513 if (!ret && extent_item_pos) { 514 while (old->next) 515 old = old->next; 516 old->next = eie; 517 } 518 eie = NULL; 519 } 520 next: 521 if (time_seq == BTRFS_SEQ_LAST) 522 ret = btrfs_next_item(root, path); 523 else 524 ret = btrfs_next_old_item(root, path, time_seq); 525 } 526 527 if (ret > 0) 528 ret = 0; 529 else if (ret < 0) 530 free_inode_elem_list(eie); 531 return ret; 532 } 533 534 /* 535 * resolve an indirect backref in the form (root_id, key, level) 536 * to a logical address 537 */ 538 static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, 539 struct btrfs_path *path, u64 time_seq, 540 struct preftrees *preftrees, 541 struct prelim_ref *ref, struct ulist *parents, 542 const u64 *extent_item_pos, bool ignore_offset) 543 { 544 struct btrfs_root *root; 545 struct extent_buffer *eb; 546 int ret = 0; 547 int root_level; 548 int level = ref->level; 549 struct btrfs_key search_key = ref->key_for_search; 550 551 /* 552 * If we're search_commit_root we could possibly be holding locks on 553 * other tree nodes. This happens when qgroups does backref walks when 554 * adding new delayed refs. To deal with this we need to look in cache 555 * for the root, and if we don't find it then we need to search the 556 * tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage 557 * here. 558 */ 559 if (path->search_commit_root) 560 root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id); 561 else 562 root = btrfs_get_fs_root(fs_info, ref->root_id, false); 563 if (IS_ERR(root)) { 564 ret = PTR_ERR(root); 565 goto out_free; 566 } 567 568 if (!path->search_commit_root && 569 test_bit(BTRFS_ROOT_DELETING, &root->state)) { 570 ret = -ENOENT; 571 goto out; 572 } 573 574 if (btrfs_is_testing(fs_info)) { 575 ret = -ENOENT; 576 goto out; 577 } 578 579 if (path->search_commit_root) 580 root_level = btrfs_header_level(root->commit_root); 581 else if (time_seq == BTRFS_SEQ_LAST) 582 root_level = btrfs_header_level(root->node); 583 else 584 root_level = btrfs_old_root_level(root, time_seq); 585 586 if (root_level + 1 == level) 587 goto out; 588 589 /* 590 * We can often find data backrefs with an offset that is too large 591 * (>= LLONG_MAX, maximum allowed file offset) due to underflows when 592 * subtracting a file's offset with the data offset of its 593 * corresponding extent data item. This can happen for example in the 594 * clone ioctl. 595 * 596 * So if we detect such case we set the search key's offset to zero to 597 * make sure we will find the matching file extent item at 598 * add_all_parents(), otherwise we will miss it because the offset 599 * taken form the backref is much larger then the offset of the file 600 * extent item. This can make us scan a very large number of file 601 * extent items, but at least it will not make us miss any. 602 * 603 * This is an ugly workaround for a behaviour that should have never 604 * existed, but it does and a fix for the clone ioctl would touch a lot 605 * of places, cause backwards incompatibility and would not fix the 606 * problem for extents cloned with older kernels. 607 */ 608 if (search_key.type == BTRFS_EXTENT_DATA_KEY && 609 search_key.offset >= LLONG_MAX) 610 search_key.offset = 0; 611 path->lowest_level = level; 612 if (time_seq == BTRFS_SEQ_LAST) 613 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 614 else 615 ret = btrfs_search_old_slot(root, &search_key, path, time_seq); 616 617 btrfs_debug(fs_info, 618 "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)", 619 ref->root_id, level, ref->count, ret, 620 ref->key_for_search.objectid, ref->key_for_search.type, 621 ref->key_for_search.offset); 622 if (ret < 0) 623 goto out; 624 625 eb = path->nodes[level]; 626 while (!eb) { 627 if (WARN_ON(!level)) { 628 ret = 1; 629 goto out; 630 } 631 level--; 632 eb = path->nodes[level]; 633 } 634 635 ret = add_all_parents(root, path, parents, preftrees, ref, level, 636 time_seq, extent_item_pos, ignore_offset); 637 out: 638 btrfs_put_root(root); 639 out_free: 640 path->lowest_level = 0; 641 btrfs_release_path(path); 642 return ret; 643 } 644 645 static struct extent_inode_elem * 646 unode_aux_to_inode_list(struct ulist_node *node) 647 { 648 if (!node) 649 return NULL; 650 return (struct extent_inode_elem *)(uintptr_t)node->aux; 651 } 652 653 static void free_leaf_list(struct ulist *ulist) 654 { 655 struct ulist_node *node; 656 struct ulist_iterator uiter; 657 658 ULIST_ITER_INIT(&uiter); 659 while ((node = ulist_next(ulist, &uiter))) 660 free_inode_elem_list(unode_aux_to_inode_list(node)); 661 662 ulist_free(ulist); 663 } 664 665 /* 666 * We maintain three separate rbtrees: one for direct refs, one for 667 * indirect refs which have a key, and one for indirect refs which do not 668 * have a key. Each tree does merge on insertion. 669 * 670 * Once all of the references are located, we iterate over the tree of 671 * indirect refs with missing keys. An appropriate key is located and 672 * the ref is moved onto the tree for indirect refs. After all missing 673 * keys are thus located, we iterate over the indirect ref tree, resolve 674 * each reference, and then insert the resolved reference onto the 675 * direct tree (merging there too). 676 * 677 * New backrefs (i.e., for parent nodes) are added to the appropriate 678 * rbtree as they are encountered. The new backrefs are subsequently 679 * resolved as above. 680 */ 681 static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, 682 struct btrfs_path *path, u64 time_seq, 683 struct preftrees *preftrees, 684 const u64 *extent_item_pos, 685 struct share_check *sc, bool ignore_offset) 686 { 687 int err; 688 int ret = 0; 689 struct ulist *parents; 690 struct ulist_node *node; 691 struct ulist_iterator uiter; 692 struct rb_node *rnode; 693 694 parents = ulist_alloc(GFP_NOFS); 695 if (!parents) 696 return -ENOMEM; 697 698 /* 699 * We could trade memory usage for performance here by iterating 700 * the tree, allocating new refs for each insertion, and then 701 * freeing the entire indirect tree when we're done. In some test 702 * cases, the tree can grow quite large (~200k objects). 703 */ 704 while ((rnode = rb_first_cached(&preftrees->indirect.root))) { 705 struct prelim_ref *ref; 706 707 ref = rb_entry(rnode, struct prelim_ref, rbnode); 708 if (WARN(ref->parent, 709 "BUG: direct ref found in indirect tree")) { 710 ret = -EINVAL; 711 goto out; 712 } 713 714 rb_erase_cached(&ref->rbnode, &preftrees->indirect.root); 715 preftrees->indirect.count--; 716 717 if (ref->count == 0) { 718 free_pref(ref); 719 continue; 720 } 721 722 if (sc && sc->root_objectid && 723 ref->root_id != sc->root_objectid) { 724 free_pref(ref); 725 ret = BACKREF_FOUND_SHARED; 726 goto out; 727 } 728 err = resolve_indirect_ref(fs_info, path, time_seq, preftrees, 729 ref, parents, extent_item_pos, 730 ignore_offset); 731 /* 732 * we can only tolerate ENOENT,otherwise,we should catch error 733 * and return directly. 734 */ 735 if (err == -ENOENT) { 736 prelim_ref_insert(fs_info, &preftrees->direct, ref, 737 NULL); 738 continue; 739 } else if (err) { 740 free_pref(ref); 741 ret = err; 742 goto out; 743 } 744 745 /* we put the first parent into the ref at hand */ 746 ULIST_ITER_INIT(&uiter); 747 node = ulist_next(parents, &uiter); 748 ref->parent = node ? node->val : 0; 749 ref->inode_list = unode_aux_to_inode_list(node); 750 751 /* Add a prelim_ref(s) for any other parent(s). */ 752 while ((node = ulist_next(parents, &uiter))) { 753 struct prelim_ref *new_ref; 754 755 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, 756 GFP_NOFS); 757 if (!new_ref) { 758 free_pref(ref); 759 ret = -ENOMEM; 760 goto out; 761 } 762 memcpy(new_ref, ref, sizeof(*ref)); 763 new_ref->parent = node->val; 764 new_ref->inode_list = unode_aux_to_inode_list(node); 765 prelim_ref_insert(fs_info, &preftrees->direct, 766 new_ref, NULL); 767 } 768 769 /* 770 * Now it's a direct ref, put it in the direct tree. We must 771 * do this last because the ref could be merged/freed here. 772 */ 773 prelim_ref_insert(fs_info, &preftrees->direct, ref, NULL); 774 775 ulist_reinit(parents); 776 cond_resched(); 777 } 778 out: 779 /* 780 * We may have inode lists attached to refs in the parents ulist, so we 781 * must free them before freeing the ulist and its refs. 782 */ 783 free_leaf_list(parents); 784 return ret; 785 } 786 787 /* 788 * read tree blocks and add keys where required. 789 */ 790 static int add_missing_keys(struct btrfs_fs_info *fs_info, 791 struct preftrees *preftrees, bool lock) 792 { 793 struct prelim_ref *ref; 794 struct extent_buffer *eb; 795 struct preftree *tree = &preftrees->indirect_missing_keys; 796 struct rb_node *node; 797 798 while ((node = rb_first_cached(&tree->root))) { 799 ref = rb_entry(node, struct prelim_ref, rbnode); 800 rb_erase_cached(node, &tree->root); 801 802 BUG_ON(ref->parent); /* should not be a direct ref */ 803 BUG_ON(ref->key_for_search.type); 804 BUG_ON(!ref->wanted_disk_byte); 805 806 eb = read_tree_block(fs_info, ref->wanted_disk_byte, 807 ref->root_id, 0, ref->level - 1, NULL); 808 if (IS_ERR(eb)) { 809 free_pref(ref); 810 return PTR_ERR(eb); 811 } 812 if (!extent_buffer_uptodate(eb)) { 813 free_pref(ref); 814 free_extent_buffer(eb); 815 return -EIO; 816 } 817 818 if (lock) 819 btrfs_tree_read_lock(eb); 820 if (btrfs_header_level(eb) == 0) 821 btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); 822 else 823 btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); 824 if (lock) 825 btrfs_tree_read_unlock(eb); 826 free_extent_buffer(eb); 827 prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL); 828 cond_resched(); 829 } 830 return 0; 831 } 832 833 /* 834 * add all currently queued delayed refs from this head whose seq nr is 835 * smaller or equal that seq to the list 836 */ 837 static int add_delayed_refs(const struct btrfs_fs_info *fs_info, 838 struct btrfs_delayed_ref_head *head, u64 seq, 839 struct preftrees *preftrees, struct share_check *sc) 840 { 841 struct btrfs_delayed_ref_node *node; 842 struct btrfs_key key; 843 struct rb_node *n; 844 int count; 845 int ret = 0; 846 847 spin_lock(&head->lock); 848 for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { 849 node = rb_entry(n, struct btrfs_delayed_ref_node, 850 ref_node); 851 if (node->seq > seq) 852 continue; 853 854 switch (node->action) { 855 case BTRFS_ADD_DELAYED_EXTENT: 856 case BTRFS_UPDATE_DELAYED_HEAD: 857 WARN_ON(1); 858 continue; 859 case BTRFS_ADD_DELAYED_REF: 860 count = node->ref_mod; 861 break; 862 case BTRFS_DROP_DELAYED_REF: 863 count = node->ref_mod * -1; 864 break; 865 default: 866 BUG(); 867 } 868 switch (node->type) { 869 case BTRFS_TREE_BLOCK_REF_KEY: { 870 /* NORMAL INDIRECT METADATA backref */ 871 struct btrfs_delayed_tree_ref *ref; 872 struct btrfs_key *key_ptr = NULL; 873 874 if (head->extent_op && head->extent_op->update_key) { 875 btrfs_disk_key_to_cpu(&key, &head->extent_op->key); 876 key_ptr = &key; 877 } 878 879 ref = btrfs_delayed_node_to_tree_ref(node); 880 ret = add_indirect_ref(fs_info, preftrees, ref->root, 881 key_ptr, ref->level + 1, 882 node->bytenr, count, sc, 883 GFP_ATOMIC); 884 break; 885 } 886 case BTRFS_SHARED_BLOCK_REF_KEY: { 887 /* SHARED DIRECT METADATA backref */ 888 struct btrfs_delayed_tree_ref *ref; 889 890 ref = btrfs_delayed_node_to_tree_ref(node); 891 892 ret = add_direct_ref(fs_info, preftrees, ref->level + 1, 893 ref->parent, node->bytenr, count, 894 sc, GFP_ATOMIC); 895 break; 896 } 897 case BTRFS_EXTENT_DATA_REF_KEY: { 898 /* NORMAL INDIRECT DATA backref */ 899 struct btrfs_delayed_data_ref *ref; 900 ref = btrfs_delayed_node_to_data_ref(node); 901 902 key.objectid = ref->objectid; 903 key.type = BTRFS_EXTENT_DATA_KEY; 904 key.offset = ref->offset; 905 906 /* 907 * If we have a share check context and a reference for 908 * another inode, we can't exit immediately. This is 909 * because even if this is a BTRFS_ADD_DELAYED_REF 910 * reference we may find next a BTRFS_DROP_DELAYED_REF 911 * which cancels out this ADD reference. 912 * 913 * If this is a DROP reference and there was no previous 914 * ADD reference, then we need to signal that when we 915 * process references from the extent tree (through 916 * add_inline_refs() and add_keyed_refs()), we should 917 * not exit early if we find a reference for another 918 * inode, because one of the delayed DROP references 919 * may cancel that reference in the extent tree. 920 */ 921 if (sc && count < 0) 922 sc->have_delayed_delete_refs = true; 923 924 ret = add_indirect_ref(fs_info, preftrees, ref->root, 925 &key, 0, node->bytenr, count, sc, 926 GFP_ATOMIC); 927 break; 928 } 929 case BTRFS_SHARED_DATA_REF_KEY: { 930 /* SHARED DIRECT FULL backref */ 931 struct btrfs_delayed_data_ref *ref; 932 933 ref = btrfs_delayed_node_to_data_ref(node); 934 935 ret = add_direct_ref(fs_info, preftrees, 0, ref->parent, 936 node->bytenr, count, sc, 937 GFP_ATOMIC); 938 break; 939 } 940 default: 941 WARN_ON(1); 942 } 943 /* 944 * We must ignore BACKREF_FOUND_SHARED until all delayed 945 * refs have been checked. 946 */ 947 if (ret && (ret != BACKREF_FOUND_SHARED)) 948 break; 949 } 950 if (!ret) 951 ret = extent_is_shared(sc); 952 953 spin_unlock(&head->lock); 954 return ret; 955 } 956 957 /* 958 * add all inline backrefs for bytenr to the list 959 * 960 * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. 961 */ 962 static int add_inline_refs(const struct btrfs_fs_info *fs_info, 963 struct btrfs_path *path, u64 bytenr, 964 int *info_level, struct preftrees *preftrees, 965 struct share_check *sc) 966 { 967 int ret = 0; 968 int slot; 969 struct extent_buffer *leaf; 970 struct btrfs_key key; 971 struct btrfs_key found_key; 972 unsigned long ptr; 973 unsigned long end; 974 struct btrfs_extent_item *ei; 975 u64 flags; 976 u64 item_size; 977 978 /* 979 * enumerate all inline refs 980 */ 981 leaf = path->nodes[0]; 982 slot = path->slots[0]; 983 984 item_size = btrfs_item_size(leaf, slot); 985 BUG_ON(item_size < sizeof(*ei)); 986 987 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 988 flags = btrfs_extent_flags(leaf, ei); 989 btrfs_item_key_to_cpu(leaf, &found_key, slot); 990 991 ptr = (unsigned long)(ei + 1); 992 end = (unsigned long)ei + item_size; 993 994 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 995 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 996 struct btrfs_tree_block_info *info; 997 998 info = (struct btrfs_tree_block_info *)ptr; 999 *info_level = btrfs_tree_block_level(leaf, info); 1000 ptr += sizeof(struct btrfs_tree_block_info); 1001 BUG_ON(ptr > end); 1002 } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) { 1003 *info_level = found_key.offset; 1004 } else { 1005 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1006 } 1007 1008 while (ptr < end) { 1009 struct btrfs_extent_inline_ref *iref; 1010 u64 offset; 1011 int type; 1012 1013 iref = (struct btrfs_extent_inline_ref *)ptr; 1014 type = btrfs_get_extent_inline_ref_type(leaf, iref, 1015 BTRFS_REF_TYPE_ANY); 1016 if (type == BTRFS_REF_TYPE_INVALID) 1017 return -EUCLEAN; 1018 1019 offset = btrfs_extent_inline_ref_offset(leaf, iref); 1020 1021 switch (type) { 1022 case BTRFS_SHARED_BLOCK_REF_KEY: 1023 ret = add_direct_ref(fs_info, preftrees, 1024 *info_level + 1, offset, 1025 bytenr, 1, NULL, GFP_NOFS); 1026 break; 1027 case BTRFS_SHARED_DATA_REF_KEY: { 1028 struct btrfs_shared_data_ref *sdref; 1029 int count; 1030 1031 sdref = (struct btrfs_shared_data_ref *)(iref + 1); 1032 count = btrfs_shared_data_ref_count(leaf, sdref); 1033 1034 ret = add_direct_ref(fs_info, preftrees, 0, offset, 1035 bytenr, count, sc, GFP_NOFS); 1036 break; 1037 } 1038 case BTRFS_TREE_BLOCK_REF_KEY: 1039 ret = add_indirect_ref(fs_info, preftrees, offset, 1040 NULL, *info_level + 1, 1041 bytenr, 1, NULL, GFP_NOFS); 1042 break; 1043 case BTRFS_EXTENT_DATA_REF_KEY: { 1044 struct btrfs_extent_data_ref *dref; 1045 int count; 1046 u64 root; 1047 1048 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1049 count = btrfs_extent_data_ref_count(leaf, dref); 1050 key.objectid = btrfs_extent_data_ref_objectid(leaf, 1051 dref); 1052 key.type = BTRFS_EXTENT_DATA_KEY; 1053 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 1054 1055 if (sc && sc->inum && key.objectid != sc->inum && 1056 !sc->have_delayed_delete_refs) { 1057 ret = BACKREF_FOUND_SHARED; 1058 break; 1059 } 1060 1061 root = btrfs_extent_data_ref_root(leaf, dref); 1062 1063 ret = add_indirect_ref(fs_info, preftrees, root, 1064 &key, 0, bytenr, count, 1065 sc, GFP_NOFS); 1066 1067 break; 1068 } 1069 default: 1070 WARN_ON(1); 1071 } 1072 if (ret) 1073 return ret; 1074 ptr += btrfs_extent_inline_ref_size(type); 1075 } 1076 1077 return 0; 1078 } 1079 1080 /* 1081 * add all non-inline backrefs for bytenr to the list 1082 * 1083 * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. 1084 */ 1085 static int add_keyed_refs(struct btrfs_root *extent_root, 1086 struct btrfs_path *path, u64 bytenr, 1087 int info_level, struct preftrees *preftrees, 1088 struct share_check *sc) 1089 { 1090 struct btrfs_fs_info *fs_info = extent_root->fs_info; 1091 int ret; 1092 int slot; 1093 struct extent_buffer *leaf; 1094 struct btrfs_key key; 1095 1096 while (1) { 1097 ret = btrfs_next_item(extent_root, path); 1098 if (ret < 0) 1099 break; 1100 if (ret) { 1101 ret = 0; 1102 break; 1103 } 1104 1105 slot = path->slots[0]; 1106 leaf = path->nodes[0]; 1107 btrfs_item_key_to_cpu(leaf, &key, slot); 1108 1109 if (key.objectid != bytenr) 1110 break; 1111 if (key.type < BTRFS_TREE_BLOCK_REF_KEY) 1112 continue; 1113 if (key.type > BTRFS_SHARED_DATA_REF_KEY) 1114 break; 1115 1116 switch (key.type) { 1117 case BTRFS_SHARED_BLOCK_REF_KEY: 1118 /* SHARED DIRECT METADATA backref */ 1119 ret = add_direct_ref(fs_info, preftrees, 1120 info_level + 1, key.offset, 1121 bytenr, 1, NULL, GFP_NOFS); 1122 break; 1123 case BTRFS_SHARED_DATA_REF_KEY: { 1124 /* SHARED DIRECT FULL backref */ 1125 struct btrfs_shared_data_ref *sdref; 1126 int count; 1127 1128 sdref = btrfs_item_ptr(leaf, slot, 1129 struct btrfs_shared_data_ref); 1130 count = btrfs_shared_data_ref_count(leaf, sdref); 1131 ret = add_direct_ref(fs_info, preftrees, 0, 1132 key.offset, bytenr, count, 1133 sc, GFP_NOFS); 1134 break; 1135 } 1136 case BTRFS_TREE_BLOCK_REF_KEY: 1137 /* NORMAL INDIRECT METADATA backref */ 1138 ret = add_indirect_ref(fs_info, preftrees, key.offset, 1139 NULL, info_level + 1, bytenr, 1140 1, NULL, GFP_NOFS); 1141 break; 1142 case BTRFS_EXTENT_DATA_REF_KEY: { 1143 /* NORMAL INDIRECT DATA backref */ 1144 struct btrfs_extent_data_ref *dref; 1145 int count; 1146 u64 root; 1147 1148 dref = btrfs_item_ptr(leaf, slot, 1149 struct btrfs_extent_data_ref); 1150 count = btrfs_extent_data_ref_count(leaf, dref); 1151 key.objectid = btrfs_extent_data_ref_objectid(leaf, 1152 dref); 1153 key.type = BTRFS_EXTENT_DATA_KEY; 1154 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 1155 1156 if (sc && sc->inum && key.objectid != sc->inum && 1157 !sc->have_delayed_delete_refs) { 1158 ret = BACKREF_FOUND_SHARED; 1159 break; 1160 } 1161 1162 root = btrfs_extent_data_ref_root(leaf, dref); 1163 ret = add_indirect_ref(fs_info, preftrees, root, 1164 &key, 0, bytenr, count, 1165 sc, GFP_NOFS); 1166 break; 1167 } 1168 default: 1169 WARN_ON(1); 1170 } 1171 if (ret) 1172 return ret; 1173 1174 } 1175 1176 return ret; 1177 } 1178 1179 /* 1180 * this adds all existing backrefs (inline backrefs, backrefs and delayed 1181 * refs) for the given bytenr to the refs list, merges duplicates and resolves 1182 * indirect refs to their parent bytenr. 1183 * When roots are found, they're added to the roots list 1184 * 1185 * If time_seq is set to BTRFS_SEQ_LAST, it will not search delayed_refs, and 1186 * behave much like trans == NULL case, the difference only lies in it will not 1187 * commit root. 1188 * The special case is for qgroup to search roots in commit_transaction(). 1189 * 1190 * @sc - if !NULL, then immediately return BACKREF_FOUND_SHARED when a 1191 * shared extent is detected. 1192 * 1193 * Otherwise this returns 0 for success and <0 for an error. 1194 * 1195 * If ignore_offset is set to false, only extent refs whose offsets match 1196 * extent_item_pos are returned. If true, every extent ref is returned 1197 * and extent_item_pos is ignored. 1198 * 1199 * FIXME some caching might speed things up 1200 */ 1201 static int find_parent_nodes(struct btrfs_trans_handle *trans, 1202 struct btrfs_fs_info *fs_info, u64 bytenr, 1203 u64 time_seq, struct ulist *refs, 1204 struct ulist *roots, const u64 *extent_item_pos, 1205 struct share_check *sc, bool ignore_offset) 1206 { 1207 struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr); 1208 struct btrfs_key key; 1209 struct btrfs_path *path; 1210 struct btrfs_delayed_ref_root *delayed_refs = NULL; 1211 struct btrfs_delayed_ref_head *head; 1212 int info_level = 0; 1213 int ret; 1214 struct prelim_ref *ref; 1215 struct rb_node *node; 1216 struct extent_inode_elem *eie = NULL; 1217 struct preftrees preftrees = { 1218 .direct = PREFTREE_INIT, 1219 .indirect = PREFTREE_INIT, 1220 .indirect_missing_keys = PREFTREE_INIT 1221 }; 1222 1223 key.objectid = bytenr; 1224 key.offset = (u64)-1; 1225 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 1226 key.type = BTRFS_METADATA_ITEM_KEY; 1227 else 1228 key.type = BTRFS_EXTENT_ITEM_KEY; 1229 1230 path = btrfs_alloc_path(); 1231 if (!path) 1232 return -ENOMEM; 1233 if (!trans) { 1234 path->search_commit_root = 1; 1235 path->skip_locking = 1; 1236 } 1237 1238 if (time_seq == BTRFS_SEQ_LAST) 1239 path->skip_locking = 1; 1240 1241 again: 1242 head = NULL; 1243 1244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1245 if (ret < 0) 1246 goto out; 1247 if (ret == 0) { 1248 /* This shouldn't happen, indicates a bug or fs corruption. */ 1249 ASSERT(ret != 0); 1250 ret = -EUCLEAN; 1251 goto out; 1252 } 1253 1254 if (trans && likely(trans->type != __TRANS_DUMMY) && 1255 time_seq != BTRFS_SEQ_LAST) { 1256 /* 1257 * We have a specific time_seq we care about and trans which 1258 * means we have the path lock, we need to grab the ref head and 1259 * lock it so we have a consistent view of the refs at the given 1260 * time. 1261 */ 1262 delayed_refs = &trans->transaction->delayed_refs; 1263 spin_lock(&delayed_refs->lock); 1264 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); 1265 if (head) { 1266 if (!mutex_trylock(&head->mutex)) { 1267 refcount_inc(&head->refs); 1268 spin_unlock(&delayed_refs->lock); 1269 1270 btrfs_release_path(path); 1271 1272 /* 1273 * Mutex was contended, block until it's 1274 * released and try again 1275 */ 1276 mutex_lock(&head->mutex); 1277 mutex_unlock(&head->mutex); 1278 btrfs_put_delayed_ref_head(head); 1279 goto again; 1280 } 1281 spin_unlock(&delayed_refs->lock); 1282 ret = add_delayed_refs(fs_info, head, time_seq, 1283 &preftrees, sc); 1284 mutex_unlock(&head->mutex); 1285 if (ret) 1286 goto out; 1287 } else { 1288 spin_unlock(&delayed_refs->lock); 1289 } 1290 } 1291 1292 if (path->slots[0]) { 1293 struct extent_buffer *leaf; 1294 int slot; 1295 1296 path->slots[0]--; 1297 leaf = path->nodes[0]; 1298 slot = path->slots[0]; 1299 btrfs_item_key_to_cpu(leaf, &key, slot); 1300 if (key.objectid == bytenr && 1301 (key.type == BTRFS_EXTENT_ITEM_KEY || 1302 key.type == BTRFS_METADATA_ITEM_KEY)) { 1303 ret = add_inline_refs(fs_info, path, bytenr, 1304 &info_level, &preftrees, sc); 1305 if (ret) 1306 goto out; 1307 ret = add_keyed_refs(root, path, bytenr, info_level, 1308 &preftrees, sc); 1309 if (ret) 1310 goto out; 1311 } 1312 } 1313 1314 btrfs_release_path(path); 1315 1316 ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0); 1317 if (ret) 1318 goto out; 1319 1320 WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root)); 1321 1322 ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, 1323 extent_item_pos, sc, ignore_offset); 1324 if (ret) 1325 goto out; 1326 1327 WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root.rb_root)); 1328 1329 /* 1330 * This walks the tree of merged and resolved refs. Tree blocks are 1331 * read in as needed. Unique entries are added to the ulist, and 1332 * the list of found roots is updated. 1333 * 1334 * We release the entire tree in one go before returning. 1335 */ 1336 node = rb_first_cached(&preftrees.direct.root); 1337 while (node) { 1338 ref = rb_entry(node, struct prelim_ref, rbnode); 1339 node = rb_next(&ref->rbnode); 1340 /* 1341 * ref->count < 0 can happen here if there are delayed 1342 * refs with a node->action of BTRFS_DROP_DELAYED_REF. 1343 * prelim_ref_insert() relies on this when merging 1344 * identical refs to keep the overall count correct. 1345 * prelim_ref_insert() will merge only those refs 1346 * which compare identically. Any refs having 1347 * e.g. different offsets would not be merged, 1348 * and would retain their original ref->count < 0. 1349 */ 1350 if (roots && ref->count && ref->root_id && ref->parent == 0) { 1351 if (sc && sc->root_objectid && 1352 ref->root_id != sc->root_objectid) { 1353 ret = BACKREF_FOUND_SHARED; 1354 goto out; 1355 } 1356 1357 /* no parent == root of tree */ 1358 ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); 1359 if (ret < 0) 1360 goto out; 1361 } 1362 if (ref->count && ref->parent) { 1363 if (extent_item_pos && !ref->inode_list && 1364 ref->level == 0) { 1365 struct extent_buffer *eb; 1366 1367 eb = read_tree_block(fs_info, ref->parent, 0, 1368 0, ref->level, NULL); 1369 if (IS_ERR(eb)) { 1370 ret = PTR_ERR(eb); 1371 goto out; 1372 } 1373 if (!extent_buffer_uptodate(eb)) { 1374 free_extent_buffer(eb); 1375 ret = -EIO; 1376 goto out; 1377 } 1378 1379 if (!path->skip_locking) 1380 btrfs_tree_read_lock(eb); 1381 ret = find_extent_in_eb(eb, bytenr, 1382 *extent_item_pos, &eie, ignore_offset); 1383 if (!path->skip_locking) 1384 btrfs_tree_read_unlock(eb); 1385 free_extent_buffer(eb); 1386 if (ret < 0) 1387 goto out; 1388 ref->inode_list = eie; 1389 /* 1390 * We transferred the list ownership to the ref, 1391 * so set to NULL to avoid a double free in case 1392 * an error happens after this. 1393 */ 1394 eie = NULL; 1395 } 1396 ret = ulist_add_merge_ptr(refs, ref->parent, 1397 ref->inode_list, 1398 (void **)&eie, GFP_NOFS); 1399 if (ret < 0) 1400 goto out; 1401 if (!ret && extent_item_pos) { 1402 /* 1403 * We've recorded that parent, so we must extend 1404 * its inode list here. 1405 * 1406 * However if there was corruption we may not 1407 * have found an eie, return an error in this 1408 * case. 1409 */ 1410 ASSERT(eie); 1411 if (!eie) { 1412 ret = -EUCLEAN; 1413 goto out; 1414 } 1415 while (eie->next) 1416 eie = eie->next; 1417 eie->next = ref->inode_list; 1418 } 1419 eie = NULL; 1420 /* 1421 * We have transferred the inode list ownership from 1422 * this ref to the ref we added to the 'refs' ulist. 1423 * So set this ref's inode list to NULL to avoid 1424 * use-after-free when our caller uses it or double 1425 * frees in case an error happens before we return. 1426 */ 1427 ref->inode_list = NULL; 1428 } 1429 cond_resched(); 1430 } 1431 1432 out: 1433 btrfs_free_path(path); 1434 1435 prelim_release(&preftrees.direct); 1436 prelim_release(&preftrees.indirect); 1437 prelim_release(&preftrees.indirect_missing_keys); 1438 1439 if (ret < 0) 1440 free_inode_elem_list(eie); 1441 return ret; 1442 } 1443 1444 /* 1445 * Finds all leafs with a reference to the specified combination of bytenr and 1446 * offset. key_list_head will point to a list of corresponding keys (caller must 1447 * free each list element). The leafs will be stored in the leafs ulist, which 1448 * must be freed with ulist_free. 1449 * 1450 * returns 0 on success, <0 on error 1451 */ 1452 int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, 1453 struct btrfs_fs_info *fs_info, u64 bytenr, 1454 u64 time_seq, struct ulist **leafs, 1455 const u64 *extent_item_pos, bool ignore_offset) 1456 { 1457 int ret; 1458 1459 *leafs = ulist_alloc(GFP_NOFS); 1460 if (!*leafs) 1461 return -ENOMEM; 1462 1463 ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, 1464 *leafs, NULL, extent_item_pos, NULL, ignore_offset); 1465 if (ret < 0 && ret != -ENOENT) { 1466 free_leaf_list(*leafs); 1467 return ret; 1468 } 1469 1470 return 0; 1471 } 1472 1473 /* 1474 * walk all backrefs for a given extent to find all roots that reference this 1475 * extent. Walking a backref means finding all extents that reference this 1476 * extent and in turn walk the backrefs of those, too. Naturally this is a 1477 * recursive process, but here it is implemented in an iterative fashion: We 1478 * find all referencing extents for the extent in question and put them on a 1479 * list. In turn, we find all referencing extents for those, further appending 1480 * to the list. The way we iterate the list allows adding more elements after 1481 * the current while iterating. The process stops when we reach the end of the 1482 * list. Found roots are added to the roots list. 1483 * 1484 * returns 0 on success, < 0 on error. 1485 */ 1486 static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, 1487 struct btrfs_fs_info *fs_info, u64 bytenr, 1488 u64 time_seq, struct ulist **roots, 1489 bool ignore_offset) 1490 { 1491 struct ulist *tmp; 1492 struct ulist_node *node = NULL; 1493 struct ulist_iterator uiter; 1494 int ret; 1495 1496 tmp = ulist_alloc(GFP_NOFS); 1497 if (!tmp) 1498 return -ENOMEM; 1499 *roots = ulist_alloc(GFP_NOFS); 1500 if (!*roots) { 1501 ulist_free(tmp); 1502 return -ENOMEM; 1503 } 1504 1505 ULIST_ITER_INIT(&uiter); 1506 while (1) { 1507 ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, 1508 tmp, *roots, NULL, NULL, ignore_offset); 1509 if (ret < 0 && ret != -ENOENT) { 1510 ulist_free(tmp); 1511 ulist_free(*roots); 1512 *roots = NULL; 1513 return ret; 1514 } 1515 node = ulist_next(tmp, &uiter); 1516 if (!node) 1517 break; 1518 bytenr = node->val; 1519 cond_resched(); 1520 } 1521 1522 ulist_free(tmp); 1523 return 0; 1524 } 1525 1526 int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1527 struct btrfs_fs_info *fs_info, u64 bytenr, 1528 u64 time_seq, struct ulist **roots, 1529 bool skip_commit_root_sem) 1530 { 1531 int ret; 1532 1533 if (!trans && !skip_commit_root_sem) 1534 down_read(&fs_info->commit_root_sem); 1535 ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, 1536 time_seq, roots, false); 1537 if (!trans && !skip_commit_root_sem) 1538 up_read(&fs_info->commit_root_sem); 1539 return ret; 1540 } 1541 1542 /* 1543 * The caller has joined a transaction or is holding a read lock on the 1544 * fs_info->commit_root_sem semaphore, so no need to worry about the root's last 1545 * snapshot field changing while updating or checking the cache. 1546 */ 1547 static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache, 1548 struct btrfs_root *root, 1549 u64 bytenr, int level, bool *is_shared) 1550 { 1551 struct btrfs_backref_shared_cache_entry *entry; 1552 1553 if (!cache->use_cache) 1554 return false; 1555 1556 if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) 1557 return false; 1558 1559 /* 1560 * Level -1 is used for the data extent, which is not reliable to cache 1561 * because its reference count can increase or decrease without us 1562 * realizing. We cache results only for extent buffers that lead from 1563 * the root node down to the leaf with the file extent item. 1564 */ 1565 ASSERT(level >= 0); 1566 1567 entry = &cache->entries[level]; 1568 1569 /* Unused cache entry or being used for some other extent buffer. */ 1570 if (entry->bytenr != bytenr) 1571 return false; 1572 1573 /* 1574 * We cached a false result, but the last snapshot generation of the 1575 * root changed, so we now have a snapshot. Don't trust the result. 1576 */ 1577 if (!entry->is_shared && 1578 entry->gen != btrfs_root_last_snapshot(&root->root_item)) 1579 return false; 1580 1581 /* 1582 * If we cached a true result and the last generation used for dropping 1583 * a root changed, we can not trust the result, because the dropped root 1584 * could be a snapshot sharing this extent buffer. 1585 */ 1586 if (entry->is_shared && 1587 entry->gen != btrfs_get_last_root_drop_gen(root->fs_info)) 1588 return false; 1589 1590 *is_shared = entry->is_shared; 1591 /* 1592 * If the node at this level is shared, than all nodes below are also 1593 * shared. Currently some of the nodes below may be marked as not shared 1594 * because we have just switched from one leaf to another, and switched 1595 * also other nodes above the leaf and below the current level, so mark 1596 * them as shared. 1597 */ 1598 if (*is_shared) { 1599 for (int i = 0; i < level; i++) { 1600 cache->entries[i].is_shared = true; 1601 cache->entries[i].gen = entry->gen; 1602 } 1603 } 1604 1605 return true; 1606 } 1607 1608 /* 1609 * The caller has joined a transaction or is holding a read lock on the 1610 * fs_info->commit_root_sem semaphore, so no need to worry about the root's last 1611 * snapshot field changing while updating or checking the cache. 1612 */ 1613 static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, 1614 struct btrfs_root *root, 1615 u64 bytenr, int level, bool is_shared) 1616 { 1617 struct btrfs_backref_shared_cache_entry *entry; 1618 u64 gen; 1619 1620 if (!cache->use_cache) 1621 return; 1622 1623 if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) 1624 return; 1625 1626 /* 1627 * Level -1 is used for the data extent, which is not reliable to cache 1628 * because its reference count can increase or decrease without us 1629 * realizing. We cache results only for extent buffers that lead from 1630 * the root node down to the leaf with the file extent item. 1631 */ 1632 ASSERT(level >= 0); 1633 1634 if (is_shared) 1635 gen = btrfs_get_last_root_drop_gen(root->fs_info); 1636 else 1637 gen = btrfs_root_last_snapshot(&root->root_item); 1638 1639 entry = &cache->entries[level]; 1640 entry->bytenr = bytenr; 1641 entry->is_shared = is_shared; 1642 entry->gen = gen; 1643 1644 /* 1645 * If we found an extent buffer is shared, set the cache result for all 1646 * extent buffers below it to true. As nodes in the path are COWed, 1647 * their sharedness is moved to their children, and if a leaf is COWed, 1648 * then the sharedness of a data extent becomes direct, the refcount of 1649 * data extent is increased in the extent item at the extent tree. 1650 */ 1651 if (is_shared) { 1652 for (int i = 0; i < level; i++) { 1653 entry = &cache->entries[i]; 1654 entry->is_shared = is_shared; 1655 entry->gen = gen; 1656 } 1657 } 1658 } 1659 1660 /* 1661 * Check if a data extent is shared or not. 1662 * 1663 * @root: The root the inode belongs to. 1664 * @inum: Number of the inode whose extent we are checking. 1665 * @bytenr: Logical bytenr of the extent we are checking. 1666 * @extent_gen: Generation of the extent (file extent item) or 0 if it is 1667 * not known. 1668 * @roots: List of roots this extent is shared among. 1669 * @tmp: Temporary list used for iteration. 1670 * @cache: A backref lookup result cache. 1671 * 1672 * btrfs_is_data_extent_shared uses the backref walking code but will short 1673 * circuit as soon as it finds a root or inode that doesn't match the 1674 * one passed in. This provides a significant performance benefit for 1675 * callers (such as fiemap) which want to know whether the extent is 1676 * shared but do not need a ref count. 1677 * 1678 * This attempts to attach to the running transaction in order to account for 1679 * delayed refs, but continues on even when no running transaction exists. 1680 * 1681 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. 1682 */ 1683 int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, 1684 u64 extent_gen, 1685 struct ulist *roots, struct ulist *tmp, 1686 struct btrfs_backref_shared_cache *cache) 1687 { 1688 struct btrfs_fs_info *fs_info = root->fs_info; 1689 struct btrfs_trans_handle *trans; 1690 struct ulist_iterator uiter; 1691 struct ulist_node *node; 1692 struct btrfs_seq_list elem = BTRFS_SEQ_LIST_INIT(elem); 1693 int ret = 0; 1694 struct share_check shared = { 1695 .root_objectid = root->root_key.objectid, 1696 .inum = inum, 1697 .share_count = 0, 1698 .have_delayed_delete_refs = false, 1699 }; 1700 int level; 1701 1702 ulist_init(roots); 1703 ulist_init(tmp); 1704 1705 trans = btrfs_join_transaction_nostart(root); 1706 if (IS_ERR(trans)) { 1707 if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { 1708 ret = PTR_ERR(trans); 1709 goto out; 1710 } 1711 trans = NULL; 1712 down_read(&fs_info->commit_root_sem); 1713 } else { 1714 btrfs_get_tree_mod_seq(fs_info, &elem); 1715 } 1716 1717 /* -1 means we are in the bytenr of the data extent. */ 1718 level = -1; 1719 ULIST_ITER_INIT(&uiter); 1720 cache->use_cache = true; 1721 while (1) { 1722 bool is_shared; 1723 bool cached; 1724 1725 ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, 1726 roots, NULL, &shared, false); 1727 if (ret == BACKREF_FOUND_SHARED) { 1728 /* this is the only condition under which we return 1 */ 1729 ret = 1; 1730 if (level >= 0) 1731 store_backref_shared_cache(cache, root, bytenr, 1732 level, true); 1733 break; 1734 } 1735 if (ret < 0 && ret != -ENOENT) 1736 break; 1737 ret = 0; 1738 /* 1739 * If our data extent is not shared through reflinks and it was 1740 * created in a generation after the last one used to create a 1741 * snapshot of the inode's root, then it can not be shared 1742 * indirectly through subtrees, as that can only happen with 1743 * snapshots. In this case bail out, no need to check for the 1744 * sharedness of extent buffers. 1745 */ 1746 if (level == -1 && 1747 extent_gen > btrfs_root_last_snapshot(&root->root_item)) 1748 break; 1749 1750 /* 1751 * If our data extent was not directly shared (without multiple 1752 * reference items), than it might have a single reference item 1753 * with a count > 1 for the same offset, which means there are 2 1754 * (or more) file extent items that point to the data extent - 1755 * this happens when a file extent item needs to be split and 1756 * then one item gets moved to another leaf due to a b+tree leaf 1757 * split when inserting some item. In this case the file extent 1758 * items may be located in different leaves and therefore some 1759 * of the leaves may be referenced through shared subtrees while 1760 * others are not. Since our extent buffer cache only works for 1761 * a single path (by far the most common case and simpler to 1762 * deal with), we can not use it if we have multiple leaves 1763 * (which implies multiple paths). 1764 */ 1765 if (level == -1 && tmp->nnodes > 1) 1766 cache->use_cache = false; 1767 1768 if (level >= 0) 1769 store_backref_shared_cache(cache, root, bytenr, 1770 level, false); 1771 node = ulist_next(tmp, &uiter); 1772 if (!node) 1773 break; 1774 bytenr = node->val; 1775 level++; 1776 cached = lookup_backref_shared_cache(cache, root, bytenr, level, 1777 &is_shared); 1778 if (cached) { 1779 ret = (is_shared ? 1 : 0); 1780 break; 1781 } 1782 shared.share_count = 0; 1783 shared.have_delayed_delete_refs = false; 1784 cond_resched(); 1785 } 1786 1787 if (trans) { 1788 btrfs_put_tree_mod_seq(fs_info, &elem); 1789 btrfs_end_transaction(trans); 1790 } else { 1791 up_read(&fs_info->commit_root_sem); 1792 } 1793 out: 1794 ulist_release(roots); 1795 ulist_release(tmp); 1796 return ret; 1797 } 1798 1799 int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 1800 u64 start_off, struct btrfs_path *path, 1801 struct btrfs_inode_extref **ret_extref, 1802 u64 *found_off) 1803 { 1804 int ret, slot; 1805 struct btrfs_key key; 1806 struct btrfs_key found_key; 1807 struct btrfs_inode_extref *extref; 1808 const struct extent_buffer *leaf; 1809 unsigned long ptr; 1810 1811 key.objectid = inode_objectid; 1812 key.type = BTRFS_INODE_EXTREF_KEY; 1813 key.offset = start_off; 1814 1815 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1816 if (ret < 0) 1817 return ret; 1818 1819 while (1) { 1820 leaf = path->nodes[0]; 1821 slot = path->slots[0]; 1822 if (slot >= btrfs_header_nritems(leaf)) { 1823 /* 1824 * If the item at offset is not found, 1825 * btrfs_search_slot will point us to the slot 1826 * where it should be inserted. In our case 1827 * that will be the slot directly before the 1828 * next INODE_REF_KEY_V2 item. In the case 1829 * that we're pointing to the last slot in a 1830 * leaf, we must move one leaf over. 1831 */ 1832 ret = btrfs_next_leaf(root, path); 1833 if (ret) { 1834 if (ret >= 1) 1835 ret = -ENOENT; 1836 break; 1837 } 1838 continue; 1839 } 1840 1841 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1842 1843 /* 1844 * Check that we're still looking at an extended ref key for 1845 * this particular objectid. If we have different 1846 * objectid or type then there are no more to be found 1847 * in the tree and we can exit. 1848 */ 1849 ret = -ENOENT; 1850 if (found_key.objectid != inode_objectid) 1851 break; 1852 if (found_key.type != BTRFS_INODE_EXTREF_KEY) 1853 break; 1854 1855 ret = 0; 1856 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1857 extref = (struct btrfs_inode_extref *)ptr; 1858 *ret_extref = extref; 1859 if (found_off) 1860 *found_off = found_key.offset; 1861 break; 1862 } 1863 1864 return ret; 1865 } 1866 1867 /* 1868 * this iterates to turn a name (from iref/extref) into a full filesystem path. 1869 * Elements of the path are separated by '/' and the path is guaranteed to be 1870 * 0-terminated. the path is only given within the current file system. 1871 * Therefore, it never starts with a '/'. the caller is responsible to provide 1872 * "size" bytes in "dest". the dest buffer will be filled backwards. finally, 1873 * the start point of the resulting string is returned. this pointer is within 1874 * dest, normally. 1875 * in case the path buffer would overflow, the pointer is decremented further 1876 * as if output was written to the buffer, though no more output is actually 1877 * generated. that way, the caller can determine how much space would be 1878 * required for the path to fit into the buffer. in that case, the returned 1879 * value will be smaller than dest. callers must check this! 1880 */ 1881 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 1882 u32 name_len, unsigned long name_off, 1883 struct extent_buffer *eb_in, u64 parent, 1884 char *dest, u32 size) 1885 { 1886 int slot; 1887 u64 next_inum; 1888 int ret; 1889 s64 bytes_left = ((s64)size) - 1; 1890 struct extent_buffer *eb = eb_in; 1891 struct btrfs_key found_key; 1892 struct btrfs_inode_ref *iref; 1893 1894 if (bytes_left >= 0) 1895 dest[bytes_left] = '\0'; 1896 1897 while (1) { 1898 bytes_left -= name_len; 1899 if (bytes_left >= 0) 1900 read_extent_buffer(eb, dest + bytes_left, 1901 name_off, name_len); 1902 if (eb != eb_in) { 1903 if (!path->skip_locking) 1904 btrfs_tree_read_unlock(eb); 1905 free_extent_buffer(eb); 1906 } 1907 ret = btrfs_find_item(fs_root, path, parent, 0, 1908 BTRFS_INODE_REF_KEY, &found_key); 1909 if (ret > 0) 1910 ret = -ENOENT; 1911 if (ret) 1912 break; 1913 1914 next_inum = found_key.offset; 1915 1916 /* regular exit ahead */ 1917 if (parent == next_inum) 1918 break; 1919 1920 slot = path->slots[0]; 1921 eb = path->nodes[0]; 1922 /* make sure we can use eb after releasing the path */ 1923 if (eb != eb_in) { 1924 path->nodes[0] = NULL; 1925 path->locks[0] = 0; 1926 } 1927 btrfs_release_path(path); 1928 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1929 1930 name_len = btrfs_inode_ref_name_len(eb, iref); 1931 name_off = (unsigned long)(iref + 1); 1932 1933 parent = next_inum; 1934 --bytes_left; 1935 if (bytes_left >= 0) 1936 dest[bytes_left] = '/'; 1937 } 1938 1939 btrfs_release_path(path); 1940 1941 if (ret) 1942 return ERR_PTR(ret); 1943 1944 return dest + bytes_left; 1945 } 1946 1947 /* 1948 * this makes the path point to (logical EXTENT_ITEM *) 1949 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for 1950 * tree blocks and <0 on error. 1951 */ 1952 int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, 1953 struct btrfs_path *path, struct btrfs_key *found_key, 1954 u64 *flags_ret) 1955 { 1956 struct btrfs_root *extent_root = btrfs_extent_root(fs_info, logical); 1957 int ret; 1958 u64 flags; 1959 u64 size = 0; 1960 u32 item_size; 1961 const struct extent_buffer *eb; 1962 struct btrfs_extent_item *ei; 1963 struct btrfs_key key; 1964 1965 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 1966 key.type = BTRFS_METADATA_ITEM_KEY; 1967 else 1968 key.type = BTRFS_EXTENT_ITEM_KEY; 1969 key.objectid = logical; 1970 key.offset = (u64)-1; 1971 1972 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 1973 if (ret < 0) 1974 return ret; 1975 1976 ret = btrfs_previous_extent_item(extent_root, path, 0); 1977 if (ret) { 1978 if (ret > 0) 1979 ret = -ENOENT; 1980 return ret; 1981 } 1982 btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); 1983 if (found_key->type == BTRFS_METADATA_ITEM_KEY) 1984 size = fs_info->nodesize; 1985 else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) 1986 size = found_key->offset; 1987 1988 if (found_key->objectid > logical || 1989 found_key->objectid + size <= logical) { 1990 btrfs_debug(fs_info, 1991 "logical %llu is not within any extent", logical); 1992 return -ENOENT; 1993 } 1994 1995 eb = path->nodes[0]; 1996 item_size = btrfs_item_size(eb, path->slots[0]); 1997 BUG_ON(item_size < sizeof(*ei)); 1998 1999 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 2000 flags = btrfs_extent_flags(eb, ei); 2001 2002 btrfs_debug(fs_info, 2003 "logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u", 2004 logical, logical - found_key->objectid, found_key->objectid, 2005 found_key->offset, flags, item_size); 2006 2007 WARN_ON(!flags_ret); 2008 if (flags_ret) { 2009 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 2010 *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK; 2011 else if (flags & BTRFS_EXTENT_FLAG_DATA) 2012 *flags_ret = BTRFS_EXTENT_FLAG_DATA; 2013 else 2014 BUG(); 2015 return 0; 2016 } 2017 2018 return -EIO; 2019 } 2020 2021 /* 2022 * helper function to iterate extent inline refs. ptr must point to a 0 value 2023 * for the first call and may be modified. it is used to track state. 2024 * if more refs exist, 0 is returned and the next call to 2025 * get_extent_inline_ref must pass the modified ptr parameter to get the 2026 * next ref. after the last ref was processed, 1 is returned. 2027 * returns <0 on error 2028 */ 2029 static int get_extent_inline_ref(unsigned long *ptr, 2030 const struct extent_buffer *eb, 2031 const struct btrfs_key *key, 2032 const struct btrfs_extent_item *ei, 2033 u32 item_size, 2034 struct btrfs_extent_inline_ref **out_eiref, 2035 int *out_type) 2036 { 2037 unsigned long end; 2038 u64 flags; 2039 struct btrfs_tree_block_info *info; 2040 2041 if (!*ptr) { 2042 /* first call */ 2043 flags = btrfs_extent_flags(eb, ei); 2044 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 2045 if (key->type == BTRFS_METADATA_ITEM_KEY) { 2046 /* a skinny metadata extent */ 2047 *out_eiref = 2048 (struct btrfs_extent_inline_ref *)(ei + 1); 2049 } else { 2050 WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY); 2051 info = (struct btrfs_tree_block_info *)(ei + 1); 2052 *out_eiref = 2053 (struct btrfs_extent_inline_ref *)(info + 1); 2054 } 2055 } else { 2056 *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); 2057 } 2058 *ptr = (unsigned long)*out_eiref; 2059 if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size) 2060 return -ENOENT; 2061 } 2062 2063 end = (unsigned long)ei + item_size; 2064 *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr); 2065 *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref, 2066 BTRFS_REF_TYPE_ANY); 2067 if (*out_type == BTRFS_REF_TYPE_INVALID) 2068 return -EUCLEAN; 2069 2070 *ptr += btrfs_extent_inline_ref_size(*out_type); 2071 WARN_ON(*ptr > end); 2072 if (*ptr == end) 2073 return 1; /* last */ 2074 2075 return 0; 2076 } 2077 2078 /* 2079 * reads the tree block backref for an extent. tree level and root are returned 2080 * through out_level and out_root. ptr must point to a 0 value for the first 2081 * call and may be modified (see get_extent_inline_ref comment). 2082 * returns 0 if data was provided, 1 if there was no more data to provide or 2083 * <0 on error. 2084 */ 2085 int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, 2086 struct btrfs_key *key, struct btrfs_extent_item *ei, 2087 u32 item_size, u64 *out_root, u8 *out_level) 2088 { 2089 int ret; 2090 int type; 2091 struct btrfs_extent_inline_ref *eiref; 2092 2093 if (*ptr == (unsigned long)-1) 2094 return 1; 2095 2096 while (1) { 2097 ret = get_extent_inline_ref(ptr, eb, key, ei, item_size, 2098 &eiref, &type); 2099 if (ret < 0) 2100 return ret; 2101 2102 if (type == BTRFS_TREE_BLOCK_REF_KEY || 2103 type == BTRFS_SHARED_BLOCK_REF_KEY) 2104 break; 2105 2106 if (ret == 1) 2107 return 1; 2108 } 2109 2110 /* we can treat both ref types equally here */ 2111 *out_root = btrfs_extent_inline_ref_offset(eb, eiref); 2112 2113 if (key->type == BTRFS_EXTENT_ITEM_KEY) { 2114 struct btrfs_tree_block_info *info; 2115 2116 info = (struct btrfs_tree_block_info *)(ei + 1); 2117 *out_level = btrfs_tree_block_level(eb, info); 2118 } else { 2119 ASSERT(key->type == BTRFS_METADATA_ITEM_KEY); 2120 *out_level = (u8)key->offset; 2121 } 2122 2123 if (ret == 1) 2124 *ptr = (unsigned long)-1; 2125 2126 return 0; 2127 } 2128 2129 static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, 2130 struct extent_inode_elem *inode_list, 2131 u64 root, u64 extent_item_objectid, 2132 iterate_extent_inodes_t *iterate, void *ctx) 2133 { 2134 struct extent_inode_elem *eie; 2135 int ret = 0; 2136 2137 for (eie = inode_list; eie; eie = eie->next) { 2138 btrfs_debug(fs_info, 2139 "ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu", 2140 extent_item_objectid, eie->inum, 2141 eie->offset, root); 2142 ret = iterate(eie->inum, eie->offset, root, ctx); 2143 if (ret) { 2144 btrfs_debug(fs_info, 2145 "stopping iteration for %llu due to ret=%d", 2146 extent_item_objectid, ret); 2147 break; 2148 } 2149 } 2150 2151 return ret; 2152 } 2153 2154 /* 2155 * calls iterate() for every inode that references the extent identified by 2156 * the given parameters. 2157 * when the iterator function returns a non-zero value, iteration stops. 2158 */ 2159 int iterate_extent_inodes(struct btrfs_fs_info *fs_info, 2160 u64 extent_item_objectid, u64 extent_item_pos, 2161 int search_commit_root, 2162 iterate_extent_inodes_t *iterate, void *ctx, 2163 bool ignore_offset) 2164 { 2165 int ret; 2166 struct btrfs_trans_handle *trans = NULL; 2167 struct ulist *refs = NULL; 2168 struct ulist *roots = NULL; 2169 struct ulist_node *ref_node = NULL; 2170 struct ulist_node *root_node = NULL; 2171 struct btrfs_seq_list seq_elem = BTRFS_SEQ_LIST_INIT(seq_elem); 2172 struct ulist_iterator ref_uiter; 2173 struct ulist_iterator root_uiter; 2174 2175 btrfs_debug(fs_info, "resolving all inodes for extent %llu", 2176 extent_item_objectid); 2177 2178 if (!search_commit_root) { 2179 trans = btrfs_attach_transaction(fs_info->tree_root); 2180 if (IS_ERR(trans)) { 2181 if (PTR_ERR(trans) != -ENOENT && 2182 PTR_ERR(trans) != -EROFS) 2183 return PTR_ERR(trans); 2184 trans = NULL; 2185 } 2186 } 2187 2188 if (trans) 2189 btrfs_get_tree_mod_seq(fs_info, &seq_elem); 2190 else 2191 down_read(&fs_info->commit_root_sem); 2192 2193 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 2194 seq_elem.seq, &refs, 2195 &extent_item_pos, ignore_offset); 2196 if (ret) 2197 goto out; 2198 2199 ULIST_ITER_INIT(&ref_uiter); 2200 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { 2201 ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, 2202 seq_elem.seq, &roots, 2203 ignore_offset); 2204 if (ret) 2205 break; 2206 ULIST_ITER_INIT(&root_uiter); 2207 while (!ret && (root_node = ulist_next(roots, &root_uiter))) { 2208 btrfs_debug(fs_info, 2209 "root %llu references leaf %llu, data list %#llx", 2210 root_node->val, ref_node->val, 2211 ref_node->aux); 2212 ret = iterate_leaf_refs(fs_info, 2213 (struct extent_inode_elem *) 2214 (uintptr_t)ref_node->aux, 2215 root_node->val, 2216 extent_item_objectid, 2217 iterate, ctx); 2218 } 2219 ulist_free(roots); 2220 } 2221 2222 free_leaf_list(refs); 2223 out: 2224 if (trans) { 2225 btrfs_put_tree_mod_seq(fs_info, &seq_elem); 2226 btrfs_end_transaction(trans); 2227 } else { 2228 up_read(&fs_info->commit_root_sem); 2229 } 2230 2231 return ret; 2232 } 2233 2234 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 2235 { 2236 struct btrfs_data_container *inodes = ctx; 2237 const size_t c = 3 * sizeof(u64); 2238 2239 if (inodes->bytes_left >= c) { 2240 inodes->bytes_left -= c; 2241 inodes->val[inodes->elem_cnt] = inum; 2242 inodes->val[inodes->elem_cnt + 1] = offset; 2243 inodes->val[inodes->elem_cnt + 2] = root; 2244 inodes->elem_cnt += 3; 2245 } else { 2246 inodes->bytes_missing += c - inodes->bytes_left; 2247 inodes->bytes_left = 0; 2248 inodes->elem_missed += 3; 2249 } 2250 2251 return 0; 2252 } 2253 2254 int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, 2255 struct btrfs_path *path, 2256 void *ctx, bool ignore_offset) 2257 { 2258 int ret; 2259 u64 extent_item_pos; 2260 u64 flags = 0; 2261 struct btrfs_key found_key; 2262 int search_commit_root = path->search_commit_root; 2263 2264 ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); 2265 btrfs_release_path(path); 2266 if (ret < 0) 2267 return ret; 2268 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 2269 return -EINVAL; 2270 2271 extent_item_pos = logical - found_key.objectid; 2272 ret = iterate_extent_inodes(fs_info, found_key.objectid, 2273 extent_item_pos, search_commit_root, 2274 build_ino_list, ctx, ignore_offset); 2275 2276 return ret; 2277 } 2278 2279 static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, 2280 struct extent_buffer *eb, struct inode_fs_paths *ipath); 2281 2282 static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath) 2283 { 2284 int ret = 0; 2285 int slot; 2286 u32 cur; 2287 u32 len; 2288 u32 name_len; 2289 u64 parent = 0; 2290 int found = 0; 2291 struct btrfs_root *fs_root = ipath->fs_root; 2292 struct btrfs_path *path = ipath->btrfs_path; 2293 struct extent_buffer *eb; 2294 struct btrfs_inode_ref *iref; 2295 struct btrfs_key found_key; 2296 2297 while (!ret) { 2298 ret = btrfs_find_item(fs_root, path, inum, 2299 parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY, 2300 &found_key); 2301 2302 if (ret < 0) 2303 break; 2304 if (ret) { 2305 ret = found ? 0 : -ENOENT; 2306 break; 2307 } 2308 ++found; 2309 2310 parent = found_key.offset; 2311 slot = path->slots[0]; 2312 eb = btrfs_clone_extent_buffer(path->nodes[0]); 2313 if (!eb) { 2314 ret = -ENOMEM; 2315 break; 2316 } 2317 btrfs_release_path(path); 2318 2319 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 2320 2321 for (cur = 0; cur < btrfs_item_size(eb, slot); cur += len) { 2322 name_len = btrfs_inode_ref_name_len(eb, iref); 2323 /* path must be released before calling iterate()! */ 2324 btrfs_debug(fs_root->fs_info, 2325 "following ref at offset %u for inode %llu in tree %llu", 2326 cur, found_key.objectid, 2327 fs_root->root_key.objectid); 2328 ret = inode_to_path(parent, name_len, 2329 (unsigned long)(iref + 1), eb, ipath); 2330 if (ret) 2331 break; 2332 len = sizeof(*iref) + name_len; 2333 iref = (struct btrfs_inode_ref *)((char *)iref + len); 2334 } 2335 free_extent_buffer(eb); 2336 } 2337 2338 btrfs_release_path(path); 2339 2340 return ret; 2341 } 2342 2343 static int iterate_inode_extrefs(u64 inum, struct inode_fs_paths *ipath) 2344 { 2345 int ret; 2346 int slot; 2347 u64 offset = 0; 2348 u64 parent; 2349 int found = 0; 2350 struct btrfs_root *fs_root = ipath->fs_root; 2351 struct btrfs_path *path = ipath->btrfs_path; 2352 struct extent_buffer *eb; 2353 struct btrfs_inode_extref *extref; 2354 u32 item_size; 2355 u32 cur_offset; 2356 unsigned long ptr; 2357 2358 while (1) { 2359 ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref, 2360 &offset); 2361 if (ret < 0) 2362 break; 2363 if (ret) { 2364 ret = found ? 0 : -ENOENT; 2365 break; 2366 } 2367 ++found; 2368 2369 slot = path->slots[0]; 2370 eb = btrfs_clone_extent_buffer(path->nodes[0]); 2371 if (!eb) { 2372 ret = -ENOMEM; 2373 break; 2374 } 2375 btrfs_release_path(path); 2376 2377 item_size = btrfs_item_size(eb, slot); 2378 ptr = btrfs_item_ptr_offset(eb, slot); 2379 cur_offset = 0; 2380 2381 while (cur_offset < item_size) { 2382 u32 name_len; 2383 2384 extref = (struct btrfs_inode_extref *)(ptr + cur_offset); 2385 parent = btrfs_inode_extref_parent(eb, extref); 2386 name_len = btrfs_inode_extref_name_len(eb, extref); 2387 ret = inode_to_path(parent, name_len, 2388 (unsigned long)&extref->name, eb, ipath); 2389 if (ret) 2390 break; 2391 2392 cur_offset += btrfs_inode_extref_name_len(eb, extref); 2393 cur_offset += sizeof(*extref); 2394 } 2395 free_extent_buffer(eb); 2396 2397 offset++; 2398 } 2399 2400 btrfs_release_path(path); 2401 2402 return ret; 2403 } 2404 2405 /* 2406 * returns 0 if the path could be dumped (probably truncated) 2407 * returns <0 in case of an error 2408 */ 2409 static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, 2410 struct extent_buffer *eb, struct inode_fs_paths *ipath) 2411 { 2412 char *fspath; 2413 char *fspath_min; 2414 int i = ipath->fspath->elem_cnt; 2415 const int s_ptr = sizeof(char *); 2416 u32 bytes_left; 2417 2418 bytes_left = ipath->fspath->bytes_left > s_ptr ? 2419 ipath->fspath->bytes_left - s_ptr : 0; 2420 2421 fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; 2422 fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, 2423 name_off, eb, inum, fspath_min, bytes_left); 2424 if (IS_ERR(fspath)) 2425 return PTR_ERR(fspath); 2426 2427 if (fspath > fspath_min) { 2428 ipath->fspath->val[i] = (u64)(unsigned long)fspath; 2429 ++ipath->fspath->elem_cnt; 2430 ipath->fspath->bytes_left = fspath - fspath_min; 2431 } else { 2432 ++ipath->fspath->elem_missed; 2433 ipath->fspath->bytes_missing += fspath_min - fspath; 2434 ipath->fspath->bytes_left = 0; 2435 } 2436 2437 return 0; 2438 } 2439 2440 /* 2441 * this dumps all file system paths to the inode into the ipath struct, provided 2442 * is has been created large enough. each path is zero-terminated and accessed 2443 * from ipath->fspath->val[i]. 2444 * when it returns, there are ipath->fspath->elem_cnt number of paths available 2445 * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the 2446 * number of missed paths is recorded in ipath->fspath->elem_missed, otherwise, 2447 * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would 2448 * have been needed to return all paths. 2449 */ 2450 int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) 2451 { 2452 int ret; 2453 int found_refs = 0; 2454 2455 ret = iterate_inode_refs(inum, ipath); 2456 if (!ret) 2457 ++found_refs; 2458 else if (ret != -ENOENT) 2459 return ret; 2460 2461 ret = iterate_inode_extrefs(inum, ipath); 2462 if (ret == -ENOENT && found_refs) 2463 return 0; 2464 2465 return ret; 2466 } 2467 2468 struct btrfs_data_container *init_data_container(u32 total_bytes) 2469 { 2470 struct btrfs_data_container *data; 2471 size_t alloc_bytes; 2472 2473 alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); 2474 data = kvmalloc(alloc_bytes, GFP_KERNEL); 2475 if (!data) 2476 return ERR_PTR(-ENOMEM); 2477 2478 if (total_bytes >= sizeof(*data)) { 2479 data->bytes_left = total_bytes - sizeof(*data); 2480 data->bytes_missing = 0; 2481 } else { 2482 data->bytes_missing = sizeof(*data) - total_bytes; 2483 data->bytes_left = 0; 2484 } 2485 2486 data->elem_cnt = 0; 2487 data->elem_missed = 0; 2488 2489 return data; 2490 } 2491 2492 /* 2493 * allocates space to return multiple file system paths for an inode. 2494 * total_bytes to allocate are passed, note that space usable for actual path 2495 * information will be total_bytes - sizeof(struct inode_fs_paths). 2496 * the returned pointer must be freed with free_ipath() in the end. 2497 */ 2498 struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 2499 struct btrfs_path *path) 2500 { 2501 struct inode_fs_paths *ifp; 2502 struct btrfs_data_container *fspath; 2503 2504 fspath = init_data_container(total_bytes); 2505 if (IS_ERR(fspath)) 2506 return ERR_CAST(fspath); 2507 2508 ifp = kmalloc(sizeof(*ifp), GFP_KERNEL); 2509 if (!ifp) { 2510 kvfree(fspath); 2511 return ERR_PTR(-ENOMEM); 2512 } 2513 2514 ifp->btrfs_path = path; 2515 ifp->fspath = fspath; 2516 ifp->fs_root = fs_root; 2517 2518 return ifp; 2519 } 2520 2521 void free_ipath(struct inode_fs_paths *ipath) 2522 { 2523 if (!ipath) 2524 return; 2525 kvfree(ipath->fspath); 2526 kfree(ipath); 2527 } 2528 2529 struct btrfs_backref_iter *btrfs_backref_iter_alloc( 2530 struct btrfs_fs_info *fs_info, gfp_t gfp_flag) 2531 { 2532 struct btrfs_backref_iter *ret; 2533 2534 ret = kzalloc(sizeof(*ret), gfp_flag); 2535 if (!ret) 2536 return NULL; 2537 2538 ret->path = btrfs_alloc_path(); 2539 if (!ret->path) { 2540 kfree(ret); 2541 return NULL; 2542 } 2543 2544 /* Current backref iterator only supports iteration in commit root */ 2545 ret->path->search_commit_root = 1; 2546 ret->path->skip_locking = 1; 2547 ret->fs_info = fs_info; 2548 2549 return ret; 2550 } 2551 2552 int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr) 2553 { 2554 struct btrfs_fs_info *fs_info = iter->fs_info; 2555 struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr); 2556 struct btrfs_path *path = iter->path; 2557 struct btrfs_extent_item *ei; 2558 struct btrfs_key key; 2559 int ret; 2560 2561 key.objectid = bytenr; 2562 key.type = BTRFS_METADATA_ITEM_KEY; 2563 key.offset = (u64)-1; 2564 iter->bytenr = bytenr; 2565 2566 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2567 if (ret < 0) 2568 return ret; 2569 if (ret == 0) { 2570 ret = -EUCLEAN; 2571 goto release; 2572 } 2573 if (path->slots[0] == 0) { 2574 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 2575 ret = -EUCLEAN; 2576 goto release; 2577 } 2578 path->slots[0]--; 2579 2580 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2581 if ((key.type != BTRFS_EXTENT_ITEM_KEY && 2582 key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) { 2583 ret = -ENOENT; 2584 goto release; 2585 } 2586 memcpy(&iter->cur_key, &key, sizeof(key)); 2587 iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], 2588 path->slots[0]); 2589 iter->end_ptr = (u32)(iter->item_ptr + 2590 btrfs_item_size(path->nodes[0], path->slots[0])); 2591 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 2592 struct btrfs_extent_item); 2593 2594 /* 2595 * Only support iteration on tree backref yet. 2596 * 2597 * This is an extra precaution for non skinny-metadata, where 2598 * EXTENT_ITEM is also used for tree blocks, that we can only use 2599 * extent flags to determine if it's a tree block. 2600 */ 2601 if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) { 2602 ret = -ENOTSUPP; 2603 goto release; 2604 } 2605 iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei)); 2606 2607 /* If there is no inline backref, go search for keyed backref */ 2608 if (iter->cur_ptr >= iter->end_ptr) { 2609 ret = btrfs_next_item(extent_root, path); 2610 2611 /* No inline nor keyed ref */ 2612 if (ret > 0) { 2613 ret = -ENOENT; 2614 goto release; 2615 } 2616 if (ret < 0) 2617 goto release; 2618 2619 btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, 2620 path->slots[0]); 2621 if (iter->cur_key.objectid != bytenr || 2622 (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY && 2623 iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) { 2624 ret = -ENOENT; 2625 goto release; 2626 } 2627 iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], 2628 path->slots[0]); 2629 iter->item_ptr = iter->cur_ptr; 2630 iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size( 2631 path->nodes[0], path->slots[0])); 2632 } 2633 2634 return 0; 2635 release: 2636 btrfs_backref_iter_release(iter); 2637 return ret; 2638 } 2639 2640 /* 2641 * Go to the next backref item of current bytenr, can be either inlined or 2642 * keyed. 2643 * 2644 * Caller needs to check whether it's inline ref or not by iter->cur_key. 2645 * 2646 * Return 0 if we get next backref without problem. 2647 * Return >0 if there is no extra backref for this bytenr. 2648 * Return <0 if there is something wrong happened. 2649 */ 2650 int btrfs_backref_iter_next(struct btrfs_backref_iter *iter) 2651 { 2652 struct extent_buffer *eb = btrfs_backref_get_eb(iter); 2653 struct btrfs_root *extent_root; 2654 struct btrfs_path *path = iter->path; 2655 struct btrfs_extent_inline_ref *iref; 2656 int ret; 2657 u32 size; 2658 2659 if (btrfs_backref_iter_is_inline_ref(iter)) { 2660 /* We're still inside the inline refs */ 2661 ASSERT(iter->cur_ptr < iter->end_ptr); 2662 2663 if (btrfs_backref_has_tree_block_info(iter)) { 2664 /* First tree block info */ 2665 size = sizeof(struct btrfs_tree_block_info); 2666 } else { 2667 /* Use inline ref type to determine the size */ 2668 int type; 2669 2670 iref = (struct btrfs_extent_inline_ref *) 2671 ((unsigned long)iter->cur_ptr); 2672 type = btrfs_extent_inline_ref_type(eb, iref); 2673 2674 size = btrfs_extent_inline_ref_size(type); 2675 } 2676 iter->cur_ptr += size; 2677 if (iter->cur_ptr < iter->end_ptr) 2678 return 0; 2679 2680 /* All inline items iterated, fall through */ 2681 } 2682 2683 /* We're at keyed items, there is no inline item, go to the next one */ 2684 extent_root = btrfs_extent_root(iter->fs_info, iter->bytenr); 2685 ret = btrfs_next_item(extent_root, iter->path); 2686 if (ret) 2687 return ret; 2688 2689 btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]); 2690 if (iter->cur_key.objectid != iter->bytenr || 2691 (iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY && 2692 iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY)) 2693 return 1; 2694 iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], 2695 path->slots[0]); 2696 iter->cur_ptr = iter->item_ptr; 2697 iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size(path->nodes[0], 2698 path->slots[0]); 2699 return 0; 2700 } 2701 2702 void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info, 2703 struct btrfs_backref_cache *cache, int is_reloc) 2704 { 2705 int i; 2706 2707 cache->rb_root = RB_ROOT; 2708 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 2709 INIT_LIST_HEAD(&cache->pending[i]); 2710 INIT_LIST_HEAD(&cache->changed); 2711 INIT_LIST_HEAD(&cache->detached); 2712 INIT_LIST_HEAD(&cache->leaves); 2713 INIT_LIST_HEAD(&cache->pending_edge); 2714 INIT_LIST_HEAD(&cache->useless_node); 2715 cache->fs_info = fs_info; 2716 cache->is_reloc = is_reloc; 2717 } 2718 2719 struct btrfs_backref_node *btrfs_backref_alloc_node( 2720 struct btrfs_backref_cache *cache, u64 bytenr, int level) 2721 { 2722 struct btrfs_backref_node *node; 2723 2724 ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL); 2725 node = kzalloc(sizeof(*node), GFP_NOFS); 2726 if (!node) 2727 return node; 2728 2729 INIT_LIST_HEAD(&node->list); 2730 INIT_LIST_HEAD(&node->upper); 2731 INIT_LIST_HEAD(&node->lower); 2732 RB_CLEAR_NODE(&node->rb_node); 2733 cache->nr_nodes++; 2734 node->level = level; 2735 node->bytenr = bytenr; 2736 2737 return node; 2738 } 2739 2740 struct btrfs_backref_edge *btrfs_backref_alloc_edge( 2741 struct btrfs_backref_cache *cache) 2742 { 2743 struct btrfs_backref_edge *edge; 2744 2745 edge = kzalloc(sizeof(*edge), GFP_NOFS); 2746 if (edge) 2747 cache->nr_edges++; 2748 return edge; 2749 } 2750 2751 /* 2752 * Drop the backref node from cache, also cleaning up all its 2753 * upper edges and any uncached nodes in the path. 2754 * 2755 * This cleanup happens bottom up, thus the node should either 2756 * be the lowest node in the cache or a detached node. 2757 */ 2758 void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache, 2759 struct btrfs_backref_node *node) 2760 { 2761 struct btrfs_backref_node *upper; 2762 struct btrfs_backref_edge *edge; 2763 2764 if (!node) 2765 return; 2766 2767 BUG_ON(!node->lowest && !node->detached); 2768 while (!list_empty(&node->upper)) { 2769 edge = list_entry(node->upper.next, struct btrfs_backref_edge, 2770 list[LOWER]); 2771 upper = edge->node[UPPER]; 2772 list_del(&edge->list[LOWER]); 2773 list_del(&edge->list[UPPER]); 2774 btrfs_backref_free_edge(cache, edge); 2775 2776 /* 2777 * Add the node to leaf node list if no other child block 2778 * cached. 2779 */ 2780 if (list_empty(&upper->lower)) { 2781 list_add_tail(&upper->lower, &cache->leaves); 2782 upper->lowest = 1; 2783 } 2784 } 2785 2786 btrfs_backref_drop_node(cache, node); 2787 } 2788 2789 /* 2790 * Release all nodes/edges from current cache 2791 */ 2792 void btrfs_backref_release_cache(struct btrfs_backref_cache *cache) 2793 { 2794 struct btrfs_backref_node *node; 2795 int i; 2796 2797 while (!list_empty(&cache->detached)) { 2798 node = list_entry(cache->detached.next, 2799 struct btrfs_backref_node, list); 2800 btrfs_backref_cleanup_node(cache, node); 2801 } 2802 2803 while (!list_empty(&cache->leaves)) { 2804 node = list_entry(cache->leaves.next, 2805 struct btrfs_backref_node, lower); 2806 btrfs_backref_cleanup_node(cache, node); 2807 } 2808 2809 cache->last_trans = 0; 2810 2811 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 2812 ASSERT(list_empty(&cache->pending[i])); 2813 ASSERT(list_empty(&cache->pending_edge)); 2814 ASSERT(list_empty(&cache->useless_node)); 2815 ASSERT(list_empty(&cache->changed)); 2816 ASSERT(list_empty(&cache->detached)); 2817 ASSERT(RB_EMPTY_ROOT(&cache->rb_root)); 2818 ASSERT(!cache->nr_nodes); 2819 ASSERT(!cache->nr_edges); 2820 } 2821 2822 /* 2823 * Handle direct tree backref 2824 * 2825 * Direct tree backref means, the backref item shows its parent bytenr 2826 * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined). 2827 * 2828 * @ref_key: The converted backref key. 2829 * For keyed backref, it's the item key. 2830 * For inlined backref, objectid is the bytenr, 2831 * type is btrfs_inline_ref_type, offset is 2832 * btrfs_inline_ref_offset. 2833 */ 2834 static int handle_direct_tree_backref(struct btrfs_backref_cache *cache, 2835 struct btrfs_key *ref_key, 2836 struct btrfs_backref_node *cur) 2837 { 2838 struct btrfs_backref_edge *edge; 2839 struct btrfs_backref_node *upper; 2840 struct rb_node *rb_node; 2841 2842 ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY); 2843 2844 /* Only reloc root uses backref pointing to itself */ 2845 if (ref_key->objectid == ref_key->offset) { 2846 struct btrfs_root *root; 2847 2848 cur->is_reloc_root = 1; 2849 /* Only reloc backref cache cares about a specific root */ 2850 if (cache->is_reloc) { 2851 root = find_reloc_root(cache->fs_info, cur->bytenr); 2852 if (!root) 2853 return -ENOENT; 2854 cur->root = root; 2855 } else { 2856 /* 2857 * For generic purpose backref cache, reloc root node 2858 * is useless. 2859 */ 2860 list_add(&cur->list, &cache->useless_node); 2861 } 2862 return 0; 2863 } 2864 2865 edge = btrfs_backref_alloc_edge(cache); 2866 if (!edge) 2867 return -ENOMEM; 2868 2869 rb_node = rb_simple_search(&cache->rb_root, ref_key->offset); 2870 if (!rb_node) { 2871 /* Parent node not yet cached */ 2872 upper = btrfs_backref_alloc_node(cache, ref_key->offset, 2873 cur->level + 1); 2874 if (!upper) { 2875 btrfs_backref_free_edge(cache, edge); 2876 return -ENOMEM; 2877 } 2878 2879 /* 2880 * Backrefs for the upper level block isn't cached, add the 2881 * block to pending list 2882 */ 2883 list_add_tail(&edge->list[UPPER], &cache->pending_edge); 2884 } else { 2885 /* Parent node already cached */ 2886 upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node); 2887 ASSERT(upper->checked); 2888 INIT_LIST_HEAD(&edge->list[UPPER]); 2889 } 2890 btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER); 2891 return 0; 2892 } 2893 2894 /* 2895 * Handle indirect tree backref 2896 * 2897 * Indirect tree backref means, we only know which tree the node belongs to. 2898 * We still need to do a tree search to find out the parents. This is for 2899 * TREE_BLOCK_REF backref (keyed or inlined). 2900 * 2901 * @ref_key: The same as @ref_key in handle_direct_tree_backref() 2902 * @tree_key: The first key of this tree block. 2903 * @path: A clean (released) path, to avoid allocating path every time 2904 * the function get called. 2905 */ 2906 static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache, 2907 struct btrfs_path *path, 2908 struct btrfs_key *ref_key, 2909 struct btrfs_key *tree_key, 2910 struct btrfs_backref_node *cur) 2911 { 2912 struct btrfs_fs_info *fs_info = cache->fs_info; 2913 struct btrfs_backref_node *upper; 2914 struct btrfs_backref_node *lower; 2915 struct btrfs_backref_edge *edge; 2916 struct extent_buffer *eb; 2917 struct btrfs_root *root; 2918 struct rb_node *rb_node; 2919 int level; 2920 bool need_check = true; 2921 int ret; 2922 2923 root = btrfs_get_fs_root(fs_info, ref_key->offset, false); 2924 if (IS_ERR(root)) 2925 return PTR_ERR(root); 2926 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) 2927 cur->cowonly = 1; 2928 2929 if (btrfs_root_level(&root->root_item) == cur->level) { 2930 /* Tree root */ 2931 ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr); 2932 /* 2933 * For reloc backref cache, we may ignore reloc root. But for 2934 * general purpose backref cache, we can't rely on 2935 * btrfs_should_ignore_reloc_root() as it may conflict with 2936 * current running relocation and lead to missing root. 2937 * 2938 * For general purpose backref cache, reloc root detection is 2939 * completely relying on direct backref (key->offset is parent 2940 * bytenr), thus only do such check for reloc cache. 2941 */ 2942 if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) { 2943 btrfs_put_root(root); 2944 list_add(&cur->list, &cache->useless_node); 2945 } else { 2946 cur->root = root; 2947 } 2948 return 0; 2949 } 2950 2951 level = cur->level + 1; 2952 2953 /* Search the tree to find parent blocks referring to the block */ 2954 path->search_commit_root = 1; 2955 path->skip_locking = 1; 2956 path->lowest_level = level; 2957 ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0); 2958 path->lowest_level = 0; 2959 if (ret < 0) { 2960 btrfs_put_root(root); 2961 return ret; 2962 } 2963 if (ret > 0 && path->slots[level] > 0) 2964 path->slots[level]--; 2965 2966 eb = path->nodes[level]; 2967 if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) { 2968 btrfs_err(fs_info, 2969 "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)", 2970 cur->bytenr, level - 1, root->root_key.objectid, 2971 tree_key->objectid, tree_key->type, tree_key->offset); 2972 btrfs_put_root(root); 2973 ret = -ENOENT; 2974 goto out; 2975 } 2976 lower = cur; 2977 2978 /* Add all nodes and edges in the path */ 2979 for (; level < BTRFS_MAX_LEVEL; level++) { 2980 if (!path->nodes[level]) { 2981 ASSERT(btrfs_root_bytenr(&root->root_item) == 2982 lower->bytenr); 2983 /* Same as previous should_ignore_reloc_root() call */ 2984 if (btrfs_should_ignore_reloc_root(root) && 2985 cache->is_reloc) { 2986 btrfs_put_root(root); 2987 list_add(&lower->list, &cache->useless_node); 2988 } else { 2989 lower->root = root; 2990 } 2991 break; 2992 } 2993 2994 edge = btrfs_backref_alloc_edge(cache); 2995 if (!edge) { 2996 btrfs_put_root(root); 2997 ret = -ENOMEM; 2998 goto out; 2999 } 3000 3001 eb = path->nodes[level]; 3002 rb_node = rb_simple_search(&cache->rb_root, eb->start); 3003 if (!rb_node) { 3004 upper = btrfs_backref_alloc_node(cache, eb->start, 3005 lower->level + 1); 3006 if (!upper) { 3007 btrfs_put_root(root); 3008 btrfs_backref_free_edge(cache, edge); 3009 ret = -ENOMEM; 3010 goto out; 3011 } 3012 upper->owner = btrfs_header_owner(eb); 3013 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) 3014 upper->cowonly = 1; 3015 3016 /* 3017 * If we know the block isn't shared we can avoid 3018 * checking its backrefs. 3019 */ 3020 if (btrfs_block_can_be_shared(root, eb)) 3021 upper->checked = 0; 3022 else 3023 upper->checked = 1; 3024 3025 /* 3026 * Add the block to pending list if we need to check its 3027 * backrefs, we only do this once while walking up a 3028 * tree as we will catch anything else later on. 3029 */ 3030 if (!upper->checked && need_check) { 3031 need_check = false; 3032 list_add_tail(&edge->list[UPPER], 3033 &cache->pending_edge); 3034 } else { 3035 if (upper->checked) 3036 need_check = true; 3037 INIT_LIST_HEAD(&edge->list[UPPER]); 3038 } 3039 } else { 3040 upper = rb_entry(rb_node, struct btrfs_backref_node, 3041 rb_node); 3042 ASSERT(upper->checked); 3043 INIT_LIST_HEAD(&edge->list[UPPER]); 3044 if (!upper->owner) 3045 upper->owner = btrfs_header_owner(eb); 3046 } 3047 btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER); 3048 3049 if (rb_node) { 3050 btrfs_put_root(root); 3051 break; 3052 } 3053 lower = upper; 3054 upper = NULL; 3055 } 3056 out: 3057 btrfs_release_path(path); 3058 return ret; 3059 } 3060 3061 /* 3062 * Add backref node @cur into @cache. 3063 * 3064 * NOTE: Even if the function returned 0, @cur is not yet cached as its upper 3065 * links aren't yet bi-directional. Needs to finish such links. 3066 * Use btrfs_backref_finish_upper_links() to finish such linkage. 3067 * 3068 * @path: Released path for indirect tree backref lookup 3069 * @iter: Released backref iter for extent tree search 3070 * @node_key: The first key of the tree block 3071 */ 3072 int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, 3073 struct btrfs_path *path, 3074 struct btrfs_backref_iter *iter, 3075 struct btrfs_key *node_key, 3076 struct btrfs_backref_node *cur) 3077 { 3078 struct btrfs_fs_info *fs_info = cache->fs_info; 3079 struct btrfs_backref_edge *edge; 3080 struct btrfs_backref_node *exist; 3081 int ret; 3082 3083 ret = btrfs_backref_iter_start(iter, cur->bytenr); 3084 if (ret < 0) 3085 return ret; 3086 /* 3087 * We skip the first btrfs_tree_block_info, as we don't use the key 3088 * stored in it, but fetch it from the tree block 3089 */ 3090 if (btrfs_backref_has_tree_block_info(iter)) { 3091 ret = btrfs_backref_iter_next(iter); 3092 if (ret < 0) 3093 goto out; 3094 /* No extra backref? This means the tree block is corrupted */ 3095 if (ret > 0) { 3096 ret = -EUCLEAN; 3097 goto out; 3098 } 3099 } 3100 WARN_ON(cur->checked); 3101 if (!list_empty(&cur->upper)) { 3102 /* 3103 * The backref was added previously when processing backref of 3104 * type BTRFS_TREE_BLOCK_REF_KEY 3105 */ 3106 ASSERT(list_is_singular(&cur->upper)); 3107 edge = list_entry(cur->upper.next, struct btrfs_backref_edge, 3108 list[LOWER]); 3109 ASSERT(list_empty(&edge->list[UPPER])); 3110 exist = edge->node[UPPER]; 3111 /* 3112 * Add the upper level block to pending list if we need check 3113 * its backrefs 3114 */ 3115 if (!exist->checked) 3116 list_add_tail(&edge->list[UPPER], &cache->pending_edge); 3117 } else { 3118 exist = NULL; 3119 } 3120 3121 for (; ret == 0; ret = btrfs_backref_iter_next(iter)) { 3122 struct extent_buffer *eb; 3123 struct btrfs_key key; 3124 int type; 3125 3126 cond_resched(); 3127 eb = btrfs_backref_get_eb(iter); 3128 3129 key.objectid = iter->bytenr; 3130 if (btrfs_backref_iter_is_inline_ref(iter)) { 3131 struct btrfs_extent_inline_ref *iref; 3132 3133 /* Update key for inline backref */ 3134 iref = (struct btrfs_extent_inline_ref *) 3135 ((unsigned long)iter->cur_ptr); 3136 type = btrfs_get_extent_inline_ref_type(eb, iref, 3137 BTRFS_REF_TYPE_BLOCK); 3138 if (type == BTRFS_REF_TYPE_INVALID) { 3139 ret = -EUCLEAN; 3140 goto out; 3141 } 3142 key.type = type; 3143 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3144 } else { 3145 key.type = iter->cur_key.type; 3146 key.offset = iter->cur_key.offset; 3147 } 3148 3149 /* 3150 * Parent node found and matches current inline ref, no need to 3151 * rebuild this node for this inline ref 3152 */ 3153 if (exist && 3154 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 3155 exist->owner == key.offset) || 3156 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 3157 exist->bytenr == key.offset))) { 3158 exist = NULL; 3159 continue; 3160 } 3161 3162 /* SHARED_BLOCK_REF means key.offset is the parent bytenr */ 3163 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 3164 ret = handle_direct_tree_backref(cache, &key, cur); 3165 if (ret < 0) 3166 goto out; 3167 continue; 3168 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { 3169 ret = -EINVAL; 3170 btrfs_print_v0_err(fs_info); 3171 btrfs_handle_fs_error(fs_info, ret, NULL); 3172 goto out; 3173 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 3174 continue; 3175 } 3176 3177 /* 3178 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset 3179 * means the root objectid. We need to search the tree to get 3180 * its parent bytenr. 3181 */ 3182 ret = handle_indirect_tree_backref(cache, path, &key, node_key, 3183 cur); 3184 if (ret < 0) 3185 goto out; 3186 } 3187 ret = 0; 3188 cur->checked = 1; 3189 WARN_ON(exist); 3190 out: 3191 btrfs_backref_iter_release(iter); 3192 return ret; 3193 } 3194 3195 /* 3196 * Finish the upwards linkage created by btrfs_backref_add_tree_node() 3197 */ 3198 int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache, 3199 struct btrfs_backref_node *start) 3200 { 3201 struct list_head *useless_node = &cache->useless_node; 3202 struct btrfs_backref_edge *edge; 3203 struct rb_node *rb_node; 3204 LIST_HEAD(pending_edge); 3205 3206 ASSERT(start->checked); 3207 3208 /* Insert this node to cache if it's not COW-only */ 3209 if (!start->cowonly) { 3210 rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, 3211 &start->rb_node); 3212 if (rb_node) 3213 btrfs_backref_panic(cache->fs_info, start->bytenr, 3214 -EEXIST); 3215 list_add_tail(&start->lower, &cache->leaves); 3216 } 3217 3218 /* 3219 * Use breadth first search to iterate all related edges. 3220 * 3221 * The starting points are all the edges of this node 3222 */ 3223 list_for_each_entry(edge, &start->upper, list[LOWER]) 3224 list_add_tail(&edge->list[UPPER], &pending_edge); 3225 3226 while (!list_empty(&pending_edge)) { 3227 struct btrfs_backref_node *upper; 3228 struct btrfs_backref_node *lower; 3229 3230 edge = list_first_entry(&pending_edge, 3231 struct btrfs_backref_edge, list[UPPER]); 3232 list_del_init(&edge->list[UPPER]); 3233 upper = edge->node[UPPER]; 3234 lower = edge->node[LOWER]; 3235 3236 /* Parent is detached, no need to keep any edges */ 3237 if (upper->detached) { 3238 list_del(&edge->list[LOWER]); 3239 btrfs_backref_free_edge(cache, edge); 3240 3241 /* Lower node is orphan, queue for cleanup */ 3242 if (list_empty(&lower->upper)) 3243 list_add(&lower->list, useless_node); 3244 continue; 3245 } 3246 3247 /* 3248 * All new nodes added in current build_backref_tree() haven't 3249 * been linked to the cache rb tree. 3250 * So if we have upper->rb_node populated, this means a cache 3251 * hit. We only need to link the edge, as @upper and all its 3252 * parents have already been linked. 3253 */ 3254 if (!RB_EMPTY_NODE(&upper->rb_node)) { 3255 if (upper->lowest) { 3256 list_del_init(&upper->lower); 3257 upper->lowest = 0; 3258 } 3259 3260 list_add_tail(&edge->list[UPPER], &upper->lower); 3261 continue; 3262 } 3263 3264 /* Sanity check, we shouldn't have any unchecked nodes */ 3265 if (!upper->checked) { 3266 ASSERT(0); 3267 return -EUCLEAN; 3268 } 3269 3270 /* Sanity check, COW-only node has non-COW-only parent */ 3271 if (start->cowonly != upper->cowonly) { 3272 ASSERT(0); 3273 return -EUCLEAN; 3274 } 3275 3276 /* Only cache non-COW-only (subvolume trees) tree blocks */ 3277 if (!upper->cowonly) { 3278 rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr, 3279 &upper->rb_node); 3280 if (rb_node) { 3281 btrfs_backref_panic(cache->fs_info, 3282 upper->bytenr, -EEXIST); 3283 return -EUCLEAN; 3284 } 3285 } 3286 3287 list_add_tail(&edge->list[UPPER], &upper->lower); 3288 3289 /* 3290 * Also queue all the parent edges of this uncached node 3291 * to finish the upper linkage 3292 */ 3293 list_for_each_entry(edge, &upper->upper, list[LOWER]) 3294 list_add_tail(&edge->list[UPPER], &pending_edge); 3295 } 3296 return 0; 3297 } 3298 3299 void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache, 3300 struct btrfs_backref_node *node) 3301 { 3302 struct btrfs_backref_node *lower; 3303 struct btrfs_backref_node *upper; 3304 struct btrfs_backref_edge *edge; 3305 3306 while (!list_empty(&cache->useless_node)) { 3307 lower = list_first_entry(&cache->useless_node, 3308 struct btrfs_backref_node, list); 3309 list_del_init(&lower->list); 3310 } 3311 while (!list_empty(&cache->pending_edge)) { 3312 edge = list_first_entry(&cache->pending_edge, 3313 struct btrfs_backref_edge, list[UPPER]); 3314 list_del(&edge->list[UPPER]); 3315 list_del(&edge->list[LOWER]); 3316 lower = edge->node[LOWER]; 3317 upper = edge->node[UPPER]; 3318 btrfs_backref_free_edge(cache, edge); 3319 3320 /* 3321 * Lower is no longer linked to any upper backref nodes and 3322 * isn't in the cache, we can free it ourselves. 3323 */ 3324 if (list_empty(&lower->upper) && 3325 RB_EMPTY_NODE(&lower->rb_node)) 3326 list_add(&lower->list, &cache->useless_node); 3327 3328 if (!RB_EMPTY_NODE(&upper->rb_node)) 3329 continue; 3330 3331 /* Add this guy's upper edges to the list to process */ 3332 list_for_each_entry(edge, &upper->upper, list[LOWER]) 3333 list_add_tail(&edge->list[UPPER], 3334 &cache->pending_edge); 3335 if (list_empty(&upper->upper)) 3336 list_add(&upper->list, &cache->useless_node); 3337 } 3338 3339 while (!list_empty(&cache->useless_node)) { 3340 lower = list_first_entry(&cache->useless_node, 3341 struct btrfs_backref_node, list); 3342 list_del_init(&lower->list); 3343 if (lower == node) 3344 node = NULL; 3345 btrfs_backref_drop_node(cache, lower); 3346 } 3347 3348 btrfs_backref_cleanup_node(cache, node); 3349 ASSERT(list_empty(&cache->useless_node) && 3350 list_empty(&cache->pending_edge)); 3351 } 3352