1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2011 STRATO. All rights reserved. 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/rbtree.h> 8 #include <trace/events/btrfs.h> 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "backref.h" 12 #include "ulist.h" 13 #include "transaction.h" 14 #include "delayed-ref.h" 15 #include "locking.h" 16 #include "misc.h" 17 #include "tree-mod-log.h" 18 19 /* Just an arbitrary number so we can be sure this happened */ 20 #define BACKREF_FOUND_SHARED 6 21 22 struct extent_inode_elem { 23 u64 inum; 24 u64 offset; 25 struct extent_inode_elem *next; 26 }; 27 28 static int check_extent_in_eb(const struct btrfs_key *key, 29 const struct extent_buffer *eb, 30 const struct btrfs_file_extent_item *fi, 31 u64 extent_item_pos, 32 struct extent_inode_elem **eie, 33 bool ignore_offset) 34 { 35 u64 offset = 0; 36 struct extent_inode_elem *e; 37 38 if (!ignore_offset && 39 !btrfs_file_extent_compression(eb, fi) && 40 !btrfs_file_extent_encryption(eb, fi) && 41 !btrfs_file_extent_other_encoding(eb, fi)) { 42 u64 data_offset; 43 u64 data_len; 44 45 data_offset = btrfs_file_extent_offset(eb, fi); 46 data_len = btrfs_file_extent_num_bytes(eb, fi); 47 48 if (extent_item_pos < data_offset || 49 extent_item_pos >= data_offset + data_len) 50 return 1; 51 offset = extent_item_pos - data_offset; 52 } 53 54 e = kmalloc(sizeof(*e), GFP_NOFS); 55 if (!e) 56 return -ENOMEM; 57 58 e->next = *eie; 59 e->inum = key->objectid; 60 e->offset = key->offset + offset; 61 *eie = e; 62 63 return 0; 64 } 65 66 static void free_inode_elem_list(struct extent_inode_elem *eie) 67 { 68 struct extent_inode_elem *eie_next; 69 70 for (; eie; eie = eie_next) { 71 eie_next = eie->next; 72 kfree(eie); 73 } 74 } 75 76 static int find_extent_in_eb(const struct extent_buffer *eb, 77 u64 wanted_disk_byte, u64 extent_item_pos, 78 struct extent_inode_elem **eie, 79 bool ignore_offset) 80 { 81 u64 disk_byte; 82 struct btrfs_key key; 83 struct btrfs_file_extent_item *fi; 84 int slot; 85 int nritems; 86 int extent_type; 87 int ret; 88 89 /* 90 * from the shared data ref, we only have the leaf but we need 91 * the key. thus, we must look into all items and see that we 92 * find one (some) with a reference to our extent item. 93 */ 94 nritems = btrfs_header_nritems(eb); 95 for (slot = 0; slot < nritems; ++slot) { 96 btrfs_item_key_to_cpu(eb, &key, slot); 97 if (key.type != BTRFS_EXTENT_DATA_KEY) 98 continue; 99 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 100 extent_type = btrfs_file_extent_type(eb, fi); 101 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 102 continue; 103 /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ 104 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 105 if (disk_byte != wanted_disk_byte) 106 continue; 107 108 ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset); 109 if (ret < 0) 110 return ret; 111 } 112 113 return 0; 114 } 115 116 struct preftree { 117 struct rb_root_cached root; 118 unsigned int count; 119 }; 120 121 #define PREFTREE_INIT { .root = RB_ROOT_CACHED, .count = 0 } 122 123 struct preftrees { 124 struct preftree direct; /* BTRFS_SHARED_[DATA|BLOCK]_REF_KEY */ 125 struct preftree indirect; /* BTRFS_[TREE_BLOCK|EXTENT_DATA]_REF_KEY */ 126 struct preftree indirect_missing_keys; 127 }; 128 129 /* 130 * Checks for a shared extent during backref search. 131 * 132 * The share_count tracks prelim_refs (direct and indirect) having a 133 * ref->count >0: 134 * - incremented when a ref->count transitions to >0 135 * - decremented when a ref->count transitions to <1 136 */ 137 struct share_check { 138 u64 root_objectid; 139 u64 inum; 140 int share_count; 141 bool have_delayed_delete_refs; 142 }; 143 144 static inline int extent_is_shared(struct share_check *sc) 145 { 146 return (sc && sc->share_count > 1) ? BACKREF_FOUND_SHARED : 0; 147 } 148 149 static struct kmem_cache *btrfs_prelim_ref_cache; 150 151 int __init btrfs_prelim_ref_init(void) 152 { 153 btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", 154 sizeof(struct prelim_ref), 155 0, 156 SLAB_MEM_SPREAD, 157 NULL); 158 if (!btrfs_prelim_ref_cache) 159 return -ENOMEM; 160 return 0; 161 } 162 163 void __cold btrfs_prelim_ref_exit(void) 164 { 165 kmem_cache_destroy(btrfs_prelim_ref_cache); 166 } 167 168 static void free_pref(struct prelim_ref *ref) 169 { 170 kmem_cache_free(btrfs_prelim_ref_cache, ref); 171 } 172 173 /* 174 * Return 0 when both refs are for the same block (and can be merged). 175 * A -1 return indicates ref1 is a 'lower' block than ref2, while 1 176 * indicates a 'higher' block. 177 */ 178 static int prelim_ref_compare(struct prelim_ref *ref1, 179 struct prelim_ref *ref2) 180 { 181 if (ref1->level < ref2->level) 182 return -1; 183 if (ref1->level > ref2->level) 184 return 1; 185 if (ref1->root_id < ref2->root_id) 186 return -1; 187 if (ref1->root_id > ref2->root_id) 188 return 1; 189 if (ref1->key_for_search.type < ref2->key_for_search.type) 190 return -1; 191 if (ref1->key_for_search.type > ref2->key_for_search.type) 192 return 1; 193 if (ref1->key_for_search.objectid < ref2->key_for_search.objectid) 194 return -1; 195 if (ref1->key_for_search.objectid > ref2->key_for_search.objectid) 196 return 1; 197 if (ref1->key_for_search.offset < ref2->key_for_search.offset) 198 return -1; 199 if (ref1->key_for_search.offset > ref2->key_for_search.offset) 200 return 1; 201 if (ref1->parent < ref2->parent) 202 return -1; 203 if (ref1->parent > ref2->parent) 204 return 1; 205 206 return 0; 207 } 208 209 static void update_share_count(struct share_check *sc, int oldcount, 210 int newcount) 211 { 212 if ((!sc) || (oldcount == 0 && newcount < 1)) 213 return; 214 215 if (oldcount > 0 && newcount < 1) 216 sc->share_count--; 217 else if (oldcount < 1 && newcount > 0) 218 sc->share_count++; 219 } 220 221 /* 222 * Add @newref to the @root rbtree, merging identical refs. 223 * 224 * Callers should assume that newref has been freed after calling. 225 */ 226 static void prelim_ref_insert(const struct btrfs_fs_info *fs_info, 227 struct preftree *preftree, 228 struct prelim_ref *newref, 229 struct share_check *sc) 230 { 231 struct rb_root_cached *root; 232 struct rb_node **p; 233 struct rb_node *parent = NULL; 234 struct prelim_ref *ref; 235 int result; 236 bool leftmost = true; 237 238 root = &preftree->root; 239 p = &root->rb_root.rb_node; 240 241 while (*p) { 242 parent = *p; 243 ref = rb_entry(parent, struct prelim_ref, rbnode); 244 result = prelim_ref_compare(ref, newref); 245 if (result < 0) { 246 p = &(*p)->rb_left; 247 } else if (result > 0) { 248 p = &(*p)->rb_right; 249 leftmost = false; 250 } else { 251 /* Identical refs, merge them and free @newref */ 252 struct extent_inode_elem *eie = ref->inode_list; 253 254 while (eie && eie->next) 255 eie = eie->next; 256 257 if (!eie) 258 ref->inode_list = newref->inode_list; 259 else 260 eie->next = newref->inode_list; 261 trace_btrfs_prelim_ref_merge(fs_info, ref, newref, 262 preftree->count); 263 /* 264 * A delayed ref can have newref->count < 0. 265 * The ref->count is updated to follow any 266 * BTRFS_[ADD|DROP]_DELAYED_REF actions. 267 */ 268 update_share_count(sc, ref->count, 269 ref->count + newref->count); 270 ref->count += newref->count; 271 free_pref(newref); 272 return; 273 } 274 } 275 276 update_share_count(sc, 0, newref->count); 277 preftree->count++; 278 trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count); 279 rb_link_node(&newref->rbnode, parent, p); 280 rb_insert_color_cached(&newref->rbnode, root, leftmost); 281 } 282 283 /* 284 * Release the entire tree. We don't care about internal consistency so 285 * just free everything and then reset the tree root. 286 */ 287 static void prelim_release(struct preftree *preftree) 288 { 289 struct prelim_ref *ref, *next_ref; 290 291 rbtree_postorder_for_each_entry_safe(ref, next_ref, 292 &preftree->root.rb_root, rbnode) 293 free_pref(ref); 294 295 preftree->root = RB_ROOT_CACHED; 296 preftree->count = 0; 297 } 298 299 /* 300 * the rules for all callers of this function are: 301 * - obtaining the parent is the goal 302 * - if you add a key, you must know that it is a correct key 303 * - if you cannot add the parent or a correct key, then we will look into the 304 * block later to set a correct key 305 * 306 * delayed refs 307 * ============ 308 * backref type | shared | indirect | shared | indirect 309 * information | tree | tree | data | data 310 * --------------------+--------+----------+--------+---------- 311 * parent logical | y | - | - | - 312 * key to resolve | - | y | y | y 313 * tree block logical | - | - | - | - 314 * root for resolving | y | y | y | y 315 * 316 * - column 1: we've the parent -> done 317 * - column 2, 3, 4: we use the key to find the parent 318 * 319 * on disk refs (inline or keyed) 320 * ============================== 321 * backref type | shared | indirect | shared | indirect 322 * information | tree | tree | data | data 323 * --------------------+--------+----------+--------+---------- 324 * parent logical | y | - | y | - 325 * key to resolve | - | - | - | y 326 * tree block logical | y | y | y | y 327 * root for resolving | - | y | y | y 328 * 329 * - column 1, 3: we've the parent -> done 330 * - column 2: we take the first key from the block to find the parent 331 * (see add_missing_keys) 332 * - column 4: we use the key to find the parent 333 * 334 * additional information that's available but not required to find the parent 335 * block might help in merging entries to gain some speed. 336 */ 337 static int add_prelim_ref(const struct btrfs_fs_info *fs_info, 338 struct preftree *preftree, u64 root_id, 339 const struct btrfs_key *key, int level, u64 parent, 340 u64 wanted_disk_byte, int count, 341 struct share_check *sc, gfp_t gfp_mask) 342 { 343 struct prelim_ref *ref; 344 345 if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) 346 return 0; 347 348 ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); 349 if (!ref) 350 return -ENOMEM; 351 352 ref->root_id = root_id; 353 if (key) 354 ref->key_for_search = *key; 355 else 356 memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); 357 358 ref->inode_list = NULL; 359 ref->level = level; 360 ref->count = count; 361 ref->parent = parent; 362 ref->wanted_disk_byte = wanted_disk_byte; 363 prelim_ref_insert(fs_info, preftree, ref, sc); 364 return extent_is_shared(sc); 365 } 366 367 /* direct refs use root == 0, key == NULL */ 368 static int add_direct_ref(const struct btrfs_fs_info *fs_info, 369 struct preftrees *preftrees, int level, u64 parent, 370 u64 wanted_disk_byte, int count, 371 struct share_check *sc, gfp_t gfp_mask) 372 { 373 return add_prelim_ref(fs_info, &preftrees->direct, 0, NULL, level, 374 parent, wanted_disk_byte, count, sc, gfp_mask); 375 } 376 377 /* indirect refs use parent == 0 */ 378 static int add_indirect_ref(const struct btrfs_fs_info *fs_info, 379 struct preftrees *preftrees, u64 root_id, 380 const struct btrfs_key *key, int level, 381 u64 wanted_disk_byte, int count, 382 struct share_check *sc, gfp_t gfp_mask) 383 { 384 struct preftree *tree = &preftrees->indirect; 385 386 if (!key) 387 tree = &preftrees->indirect_missing_keys; 388 return add_prelim_ref(fs_info, tree, root_id, key, level, 0, 389 wanted_disk_byte, count, sc, gfp_mask); 390 } 391 392 static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) 393 { 394 struct rb_node **p = &preftrees->direct.root.rb_root.rb_node; 395 struct rb_node *parent = NULL; 396 struct prelim_ref *ref = NULL; 397 struct prelim_ref target = {}; 398 int result; 399 400 target.parent = bytenr; 401 402 while (*p) { 403 parent = *p; 404 ref = rb_entry(parent, struct prelim_ref, rbnode); 405 result = prelim_ref_compare(ref, &target); 406 407 if (result < 0) 408 p = &(*p)->rb_left; 409 else if (result > 0) 410 p = &(*p)->rb_right; 411 else 412 return 1; 413 } 414 return 0; 415 } 416 417 static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 418 struct ulist *parents, 419 struct preftrees *preftrees, struct prelim_ref *ref, 420 int level, u64 time_seq, const u64 *extent_item_pos, 421 bool ignore_offset) 422 { 423 int ret = 0; 424 int slot; 425 struct extent_buffer *eb; 426 struct btrfs_key key; 427 struct btrfs_key *key_for_search = &ref->key_for_search; 428 struct btrfs_file_extent_item *fi; 429 struct extent_inode_elem *eie = NULL, *old = NULL; 430 u64 disk_byte; 431 u64 wanted_disk_byte = ref->wanted_disk_byte; 432 u64 count = 0; 433 u64 data_offset; 434 435 if (level != 0) { 436 eb = path->nodes[level]; 437 ret = ulist_add(parents, eb->start, 0, GFP_NOFS); 438 if (ret < 0) 439 return ret; 440 return 0; 441 } 442 443 /* 444 * 1. We normally enter this function with the path already pointing to 445 * the first item to check. But sometimes, we may enter it with 446 * slot == nritems. 447 * 2. We are searching for normal backref but bytenr of this leaf 448 * matches shared data backref 449 * 3. The leaf owner is not equal to the root we are searching 450 * 451 * For these cases, go to the next leaf before we continue. 452 */ 453 eb = path->nodes[0]; 454 if (path->slots[0] >= btrfs_header_nritems(eb) || 455 is_shared_data_backref(preftrees, eb->start) || 456 ref->root_id != btrfs_header_owner(eb)) { 457 if (time_seq == BTRFS_SEQ_LAST) 458 ret = btrfs_next_leaf(root, path); 459 else 460 ret = btrfs_next_old_leaf(root, path, time_seq); 461 } 462 463 while (!ret && count < ref->count) { 464 eb = path->nodes[0]; 465 slot = path->slots[0]; 466 467 btrfs_item_key_to_cpu(eb, &key, slot); 468 469 if (key.objectid != key_for_search->objectid || 470 key.type != BTRFS_EXTENT_DATA_KEY) 471 break; 472 473 /* 474 * We are searching for normal backref but bytenr of this leaf 475 * matches shared data backref, OR 476 * the leaf owner is not equal to the root we are searching for 477 */ 478 if (slot == 0 && 479 (is_shared_data_backref(preftrees, eb->start) || 480 ref->root_id != btrfs_header_owner(eb))) { 481 if (time_seq == BTRFS_SEQ_LAST) 482 ret = btrfs_next_leaf(root, path); 483 else 484 ret = btrfs_next_old_leaf(root, path, time_seq); 485 continue; 486 } 487 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 488 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 489 data_offset = btrfs_file_extent_offset(eb, fi); 490 491 if (disk_byte == wanted_disk_byte) { 492 eie = NULL; 493 old = NULL; 494 if (ref->key_for_search.offset == key.offset - data_offset) 495 count++; 496 else 497 goto next; 498 if (extent_item_pos) { 499 ret = check_extent_in_eb(&key, eb, fi, 500 *extent_item_pos, 501 &eie, ignore_offset); 502 if (ret < 0) 503 break; 504 } 505 if (ret > 0) 506 goto next; 507 ret = ulist_add_merge_ptr(parents, eb->start, 508 eie, (void **)&old, GFP_NOFS); 509 if (ret < 0) 510 break; 511 if (!ret && extent_item_pos) { 512 while (old->next) 513 old = old->next; 514 old->next = eie; 515 } 516 eie = NULL; 517 } 518 next: 519 if (time_seq == BTRFS_SEQ_LAST) 520 ret = btrfs_next_item(root, path); 521 else 522 ret = btrfs_next_old_item(root, path, time_seq); 523 } 524 525 if (ret > 0) 526 ret = 0; 527 else if (ret < 0) 528 free_inode_elem_list(eie); 529 return ret; 530 } 531 532 /* 533 * resolve an indirect backref in the form (root_id, key, level) 534 * to a logical address 535 */ 536 static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, 537 struct btrfs_path *path, u64 time_seq, 538 struct preftrees *preftrees, 539 struct prelim_ref *ref, struct ulist *parents, 540 const u64 *extent_item_pos, bool ignore_offset) 541 { 542 struct btrfs_root *root; 543 struct extent_buffer *eb; 544 int ret = 0; 545 int root_level; 546 int level = ref->level; 547 struct btrfs_key search_key = ref->key_for_search; 548 549 /* 550 * If we're search_commit_root we could possibly be holding locks on 551 * other tree nodes. This happens when qgroups does backref walks when 552 * adding new delayed refs. To deal with this we need to look in cache 553 * for the root, and if we don't find it then we need to search the 554 * tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage 555 * here. 556 */ 557 if (path->search_commit_root) 558 root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id); 559 else 560 root = btrfs_get_fs_root(fs_info, ref->root_id, false); 561 if (IS_ERR(root)) { 562 ret = PTR_ERR(root); 563 goto out_free; 564 } 565 566 if (!path->search_commit_root && 567 test_bit(BTRFS_ROOT_DELETING, &root->state)) { 568 ret = -ENOENT; 569 goto out; 570 } 571 572 if (btrfs_is_testing(fs_info)) { 573 ret = -ENOENT; 574 goto out; 575 } 576 577 if (path->search_commit_root) 578 root_level = btrfs_header_level(root->commit_root); 579 else if (time_seq == BTRFS_SEQ_LAST) 580 root_level = btrfs_header_level(root->node); 581 else 582 root_level = btrfs_old_root_level(root, time_seq); 583 584 if (root_level + 1 == level) 585 goto out; 586 587 /* 588 * We can often find data backrefs with an offset that is too large 589 * (>= LLONG_MAX, maximum allowed file offset) due to underflows when 590 * subtracting a file's offset with the data offset of its 591 * corresponding extent data item. This can happen for example in the 592 * clone ioctl. 593 * 594 * So if we detect such case we set the search key's offset to zero to 595 * make sure we will find the matching file extent item at 596 * add_all_parents(), otherwise we will miss it because the offset 597 * taken form the backref is much larger then the offset of the file 598 * extent item. This can make us scan a very large number of file 599 * extent items, but at least it will not make us miss any. 600 * 601 * This is an ugly workaround for a behaviour that should have never 602 * existed, but it does and a fix for the clone ioctl would touch a lot 603 * of places, cause backwards incompatibility and would not fix the 604 * problem for extents cloned with older kernels. 605 */ 606 if (search_key.type == BTRFS_EXTENT_DATA_KEY && 607 search_key.offset >= LLONG_MAX) 608 search_key.offset = 0; 609 path->lowest_level = level; 610 if (time_seq == BTRFS_SEQ_LAST) 611 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); 612 else 613 ret = btrfs_search_old_slot(root, &search_key, path, time_seq); 614 615 btrfs_debug(fs_info, 616 "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)", 617 ref->root_id, level, ref->count, ret, 618 ref->key_for_search.objectid, ref->key_for_search.type, 619 ref->key_for_search.offset); 620 if (ret < 0) 621 goto out; 622 623 eb = path->nodes[level]; 624 while (!eb) { 625 if (WARN_ON(!level)) { 626 ret = 1; 627 goto out; 628 } 629 level--; 630 eb = path->nodes[level]; 631 } 632 633 ret = add_all_parents(root, path, parents, preftrees, ref, level, 634 time_seq, extent_item_pos, ignore_offset); 635 out: 636 btrfs_put_root(root); 637 out_free: 638 path->lowest_level = 0; 639 btrfs_release_path(path); 640 return ret; 641 } 642 643 static struct extent_inode_elem * 644 unode_aux_to_inode_list(struct ulist_node *node) 645 { 646 if (!node) 647 return NULL; 648 return (struct extent_inode_elem *)(uintptr_t)node->aux; 649 } 650 651 /* 652 * We maintain three separate rbtrees: one for direct refs, one for 653 * indirect refs which have a key, and one for indirect refs which do not 654 * have a key. Each tree does merge on insertion. 655 * 656 * Once all of the references are located, we iterate over the tree of 657 * indirect refs with missing keys. An appropriate key is located and 658 * the ref is moved onto the tree for indirect refs. After all missing 659 * keys are thus located, we iterate over the indirect ref tree, resolve 660 * each reference, and then insert the resolved reference onto the 661 * direct tree (merging there too). 662 * 663 * New backrefs (i.e., for parent nodes) are added to the appropriate 664 * rbtree as they are encountered. The new backrefs are subsequently 665 * resolved as above. 666 */ 667 static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, 668 struct btrfs_path *path, u64 time_seq, 669 struct preftrees *preftrees, 670 const u64 *extent_item_pos, 671 struct share_check *sc, bool ignore_offset) 672 { 673 int err; 674 int ret = 0; 675 struct ulist *parents; 676 struct ulist_node *node; 677 struct ulist_iterator uiter; 678 struct rb_node *rnode; 679 680 parents = ulist_alloc(GFP_NOFS); 681 if (!parents) 682 return -ENOMEM; 683 684 /* 685 * We could trade memory usage for performance here by iterating 686 * the tree, allocating new refs for each insertion, and then 687 * freeing the entire indirect tree when we're done. In some test 688 * cases, the tree can grow quite large (~200k objects). 689 */ 690 while ((rnode = rb_first_cached(&preftrees->indirect.root))) { 691 struct prelim_ref *ref; 692 693 ref = rb_entry(rnode, struct prelim_ref, rbnode); 694 if (WARN(ref->parent, 695 "BUG: direct ref found in indirect tree")) { 696 ret = -EINVAL; 697 goto out; 698 } 699 700 rb_erase_cached(&ref->rbnode, &preftrees->indirect.root); 701 preftrees->indirect.count--; 702 703 if (ref->count == 0) { 704 free_pref(ref); 705 continue; 706 } 707 708 if (sc && sc->root_objectid && 709 ref->root_id != sc->root_objectid) { 710 free_pref(ref); 711 ret = BACKREF_FOUND_SHARED; 712 goto out; 713 } 714 err = resolve_indirect_ref(fs_info, path, time_seq, preftrees, 715 ref, parents, extent_item_pos, 716 ignore_offset); 717 /* 718 * we can only tolerate ENOENT,otherwise,we should catch error 719 * and return directly. 720 */ 721 if (err == -ENOENT) { 722 prelim_ref_insert(fs_info, &preftrees->direct, ref, 723 NULL); 724 continue; 725 } else if (err) { 726 free_pref(ref); 727 ret = err; 728 goto out; 729 } 730 731 /* we put the first parent into the ref at hand */ 732 ULIST_ITER_INIT(&uiter); 733 node = ulist_next(parents, &uiter); 734 ref->parent = node ? node->val : 0; 735 ref->inode_list = unode_aux_to_inode_list(node); 736 737 /* Add a prelim_ref(s) for any other parent(s). */ 738 while ((node = ulist_next(parents, &uiter))) { 739 struct prelim_ref *new_ref; 740 741 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, 742 GFP_NOFS); 743 if (!new_ref) { 744 free_pref(ref); 745 ret = -ENOMEM; 746 goto out; 747 } 748 memcpy(new_ref, ref, sizeof(*ref)); 749 new_ref->parent = node->val; 750 new_ref->inode_list = unode_aux_to_inode_list(node); 751 prelim_ref_insert(fs_info, &preftrees->direct, 752 new_ref, NULL); 753 } 754 755 /* 756 * Now it's a direct ref, put it in the direct tree. We must 757 * do this last because the ref could be merged/freed here. 758 */ 759 prelim_ref_insert(fs_info, &preftrees->direct, ref, NULL); 760 761 ulist_reinit(parents); 762 cond_resched(); 763 } 764 out: 765 ulist_free(parents); 766 return ret; 767 } 768 769 /* 770 * read tree blocks and add keys where required. 771 */ 772 static int add_missing_keys(struct btrfs_fs_info *fs_info, 773 struct preftrees *preftrees, bool lock) 774 { 775 struct prelim_ref *ref; 776 struct extent_buffer *eb; 777 struct preftree *tree = &preftrees->indirect_missing_keys; 778 struct rb_node *node; 779 780 while ((node = rb_first_cached(&tree->root))) { 781 ref = rb_entry(node, struct prelim_ref, rbnode); 782 rb_erase_cached(node, &tree->root); 783 784 BUG_ON(ref->parent); /* should not be a direct ref */ 785 BUG_ON(ref->key_for_search.type); 786 BUG_ON(!ref->wanted_disk_byte); 787 788 eb = read_tree_block(fs_info, ref->wanted_disk_byte, 789 ref->root_id, 0, ref->level - 1, NULL); 790 if (IS_ERR(eb)) { 791 free_pref(ref); 792 return PTR_ERR(eb); 793 } 794 if (!extent_buffer_uptodate(eb)) { 795 free_pref(ref); 796 free_extent_buffer(eb); 797 return -EIO; 798 } 799 800 if (lock) 801 btrfs_tree_read_lock(eb); 802 if (btrfs_header_level(eb) == 0) 803 btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); 804 else 805 btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); 806 if (lock) 807 btrfs_tree_read_unlock(eb); 808 free_extent_buffer(eb); 809 prelim_ref_insert(fs_info, &preftrees->indirect, ref, NULL); 810 cond_resched(); 811 } 812 return 0; 813 } 814 815 /* 816 * add all currently queued delayed refs from this head whose seq nr is 817 * smaller or equal that seq to the list 818 */ 819 static int add_delayed_refs(const struct btrfs_fs_info *fs_info, 820 struct btrfs_delayed_ref_head *head, u64 seq, 821 struct preftrees *preftrees, struct share_check *sc) 822 { 823 struct btrfs_delayed_ref_node *node; 824 struct btrfs_key key; 825 struct rb_node *n; 826 int count; 827 int ret = 0; 828 829 spin_lock(&head->lock); 830 for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { 831 node = rb_entry(n, struct btrfs_delayed_ref_node, 832 ref_node); 833 if (node->seq > seq) 834 continue; 835 836 switch (node->action) { 837 case BTRFS_ADD_DELAYED_EXTENT: 838 case BTRFS_UPDATE_DELAYED_HEAD: 839 WARN_ON(1); 840 continue; 841 case BTRFS_ADD_DELAYED_REF: 842 count = node->ref_mod; 843 break; 844 case BTRFS_DROP_DELAYED_REF: 845 count = node->ref_mod * -1; 846 break; 847 default: 848 BUG(); 849 } 850 switch (node->type) { 851 case BTRFS_TREE_BLOCK_REF_KEY: { 852 /* NORMAL INDIRECT METADATA backref */ 853 struct btrfs_delayed_tree_ref *ref; 854 struct btrfs_key *key_ptr = NULL; 855 856 if (head->extent_op && head->extent_op->update_key) { 857 btrfs_disk_key_to_cpu(&key, &head->extent_op->key); 858 key_ptr = &key; 859 } 860 861 ref = btrfs_delayed_node_to_tree_ref(node); 862 ret = add_indirect_ref(fs_info, preftrees, ref->root, 863 key_ptr, ref->level + 1, 864 node->bytenr, count, sc, 865 GFP_ATOMIC); 866 break; 867 } 868 case BTRFS_SHARED_BLOCK_REF_KEY: { 869 /* SHARED DIRECT METADATA backref */ 870 struct btrfs_delayed_tree_ref *ref; 871 872 ref = btrfs_delayed_node_to_tree_ref(node); 873 874 ret = add_direct_ref(fs_info, preftrees, ref->level + 1, 875 ref->parent, node->bytenr, count, 876 sc, GFP_ATOMIC); 877 break; 878 } 879 case BTRFS_EXTENT_DATA_REF_KEY: { 880 /* NORMAL INDIRECT DATA backref */ 881 struct btrfs_delayed_data_ref *ref; 882 ref = btrfs_delayed_node_to_data_ref(node); 883 884 key.objectid = ref->objectid; 885 key.type = BTRFS_EXTENT_DATA_KEY; 886 key.offset = ref->offset; 887 888 /* 889 * If we have a share check context and a reference for 890 * another inode, we can't exit immediately. This is 891 * because even if this is a BTRFS_ADD_DELAYED_REF 892 * reference we may find next a BTRFS_DROP_DELAYED_REF 893 * which cancels out this ADD reference. 894 * 895 * If this is a DROP reference and there was no previous 896 * ADD reference, then we need to signal that when we 897 * process references from the extent tree (through 898 * add_inline_refs() and add_keyed_refs()), we should 899 * not exit early if we find a reference for another 900 * inode, because one of the delayed DROP references 901 * may cancel that reference in the extent tree. 902 */ 903 if (sc && count < 0) 904 sc->have_delayed_delete_refs = true; 905 906 ret = add_indirect_ref(fs_info, preftrees, ref->root, 907 &key, 0, node->bytenr, count, sc, 908 GFP_ATOMIC); 909 break; 910 } 911 case BTRFS_SHARED_DATA_REF_KEY: { 912 /* SHARED DIRECT FULL backref */ 913 struct btrfs_delayed_data_ref *ref; 914 915 ref = btrfs_delayed_node_to_data_ref(node); 916 917 ret = add_direct_ref(fs_info, preftrees, 0, ref->parent, 918 node->bytenr, count, sc, 919 GFP_ATOMIC); 920 break; 921 } 922 default: 923 WARN_ON(1); 924 } 925 /* 926 * We must ignore BACKREF_FOUND_SHARED until all delayed 927 * refs have been checked. 928 */ 929 if (ret && (ret != BACKREF_FOUND_SHARED)) 930 break; 931 } 932 if (!ret) 933 ret = extent_is_shared(sc); 934 935 spin_unlock(&head->lock); 936 return ret; 937 } 938 939 /* 940 * add all inline backrefs for bytenr to the list 941 * 942 * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. 943 */ 944 static int add_inline_refs(const struct btrfs_fs_info *fs_info, 945 struct btrfs_path *path, u64 bytenr, 946 int *info_level, struct preftrees *preftrees, 947 struct share_check *sc) 948 { 949 int ret = 0; 950 int slot; 951 struct extent_buffer *leaf; 952 struct btrfs_key key; 953 struct btrfs_key found_key; 954 unsigned long ptr; 955 unsigned long end; 956 struct btrfs_extent_item *ei; 957 u64 flags; 958 u64 item_size; 959 960 /* 961 * enumerate all inline refs 962 */ 963 leaf = path->nodes[0]; 964 slot = path->slots[0]; 965 966 item_size = btrfs_item_size(leaf, slot); 967 BUG_ON(item_size < sizeof(*ei)); 968 969 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 970 flags = btrfs_extent_flags(leaf, ei); 971 btrfs_item_key_to_cpu(leaf, &found_key, slot); 972 973 ptr = (unsigned long)(ei + 1); 974 end = (unsigned long)ei + item_size; 975 976 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 977 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 978 struct btrfs_tree_block_info *info; 979 980 info = (struct btrfs_tree_block_info *)ptr; 981 *info_level = btrfs_tree_block_level(leaf, info); 982 ptr += sizeof(struct btrfs_tree_block_info); 983 BUG_ON(ptr > end); 984 } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) { 985 *info_level = found_key.offset; 986 } else { 987 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 988 } 989 990 while (ptr < end) { 991 struct btrfs_extent_inline_ref *iref; 992 u64 offset; 993 int type; 994 995 iref = (struct btrfs_extent_inline_ref *)ptr; 996 type = btrfs_get_extent_inline_ref_type(leaf, iref, 997 BTRFS_REF_TYPE_ANY); 998 if (type == BTRFS_REF_TYPE_INVALID) 999 return -EUCLEAN; 1000 1001 offset = btrfs_extent_inline_ref_offset(leaf, iref); 1002 1003 switch (type) { 1004 case BTRFS_SHARED_BLOCK_REF_KEY: 1005 ret = add_direct_ref(fs_info, preftrees, 1006 *info_level + 1, offset, 1007 bytenr, 1, NULL, GFP_NOFS); 1008 break; 1009 case BTRFS_SHARED_DATA_REF_KEY: { 1010 struct btrfs_shared_data_ref *sdref; 1011 int count; 1012 1013 sdref = (struct btrfs_shared_data_ref *)(iref + 1); 1014 count = btrfs_shared_data_ref_count(leaf, sdref); 1015 1016 ret = add_direct_ref(fs_info, preftrees, 0, offset, 1017 bytenr, count, sc, GFP_NOFS); 1018 break; 1019 } 1020 case BTRFS_TREE_BLOCK_REF_KEY: 1021 ret = add_indirect_ref(fs_info, preftrees, offset, 1022 NULL, *info_level + 1, 1023 bytenr, 1, NULL, GFP_NOFS); 1024 break; 1025 case BTRFS_EXTENT_DATA_REF_KEY: { 1026 struct btrfs_extent_data_ref *dref; 1027 int count; 1028 u64 root; 1029 1030 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1031 count = btrfs_extent_data_ref_count(leaf, dref); 1032 key.objectid = btrfs_extent_data_ref_objectid(leaf, 1033 dref); 1034 key.type = BTRFS_EXTENT_DATA_KEY; 1035 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 1036 1037 if (sc && sc->inum && key.objectid != sc->inum && 1038 !sc->have_delayed_delete_refs) { 1039 ret = BACKREF_FOUND_SHARED; 1040 break; 1041 } 1042 1043 root = btrfs_extent_data_ref_root(leaf, dref); 1044 1045 ret = add_indirect_ref(fs_info, preftrees, root, 1046 &key, 0, bytenr, count, 1047 sc, GFP_NOFS); 1048 1049 break; 1050 } 1051 default: 1052 WARN_ON(1); 1053 } 1054 if (ret) 1055 return ret; 1056 ptr += btrfs_extent_inline_ref_size(type); 1057 } 1058 1059 return 0; 1060 } 1061 1062 /* 1063 * add all non-inline backrefs for bytenr to the list 1064 * 1065 * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. 1066 */ 1067 static int add_keyed_refs(struct btrfs_root *extent_root, 1068 struct btrfs_path *path, u64 bytenr, 1069 int info_level, struct preftrees *preftrees, 1070 struct share_check *sc) 1071 { 1072 struct btrfs_fs_info *fs_info = extent_root->fs_info; 1073 int ret; 1074 int slot; 1075 struct extent_buffer *leaf; 1076 struct btrfs_key key; 1077 1078 while (1) { 1079 ret = btrfs_next_item(extent_root, path); 1080 if (ret < 0) 1081 break; 1082 if (ret) { 1083 ret = 0; 1084 break; 1085 } 1086 1087 slot = path->slots[0]; 1088 leaf = path->nodes[0]; 1089 btrfs_item_key_to_cpu(leaf, &key, slot); 1090 1091 if (key.objectid != bytenr) 1092 break; 1093 if (key.type < BTRFS_TREE_BLOCK_REF_KEY) 1094 continue; 1095 if (key.type > BTRFS_SHARED_DATA_REF_KEY) 1096 break; 1097 1098 switch (key.type) { 1099 case BTRFS_SHARED_BLOCK_REF_KEY: 1100 /* SHARED DIRECT METADATA backref */ 1101 ret = add_direct_ref(fs_info, preftrees, 1102 info_level + 1, key.offset, 1103 bytenr, 1, NULL, GFP_NOFS); 1104 break; 1105 case BTRFS_SHARED_DATA_REF_KEY: { 1106 /* SHARED DIRECT FULL backref */ 1107 struct btrfs_shared_data_ref *sdref; 1108 int count; 1109 1110 sdref = btrfs_item_ptr(leaf, slot, 1111 struct btrfs_shared_data_ref); 1112 count = btrfs_shared_data_ref_count(leaf, sdref); 1113 ret = add_direct_ref(fs_info, preftrees, 0, 1114 key.offset, bytenr, count, 1115 sc, GFP_NOFS); 1116 break; 1117 } 1118 case BTRFS_TREE_BLOCK_REF_KEY: 1119 /* NORMAL INDIRECT METADATA backref */ 1120 ret = add_indirect_ref(fs_info, preftrees, key.offset, 1121 NULL, info_level + 1, bytenr, 1122 1, NULL, GFP_NOFS); 1123 break; 1124 case BTRFS_EXTENT_DATA_REF_KEY: { 1125 /* NORMAL INDIRECT DATA backref */ 1126 struct btrfs_extent_data_ref *dref; 1127 int count; 1128 u64 root; 1129 1130 dref = btrfs_item_ptr(leaf, slot, 1131 struct btrfs_extent_data_ref); 1132 count = btrfs_extent_data_ref_count(leaf, dref); 1133 key.objectid = btrfs_extent_data_ref_objectid(leaf, 1134 dref); 1135 key.type = BTRFS_EXTENT_DATA_KEY; 1136 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 1137 1138 if (sc && sc->inum && key.objectid != sc->inum && 1139 !sc->have_delayed_delete_refs) { 1140 ret = BACKREF_FOUND_SHARED; 1141 break; 1142 } 1143 1144 root = btrfs_extent_data_ref_root(leaf, dref); 1145 ret = add_indirect_ref(fs_info, preftrees, root, 1146 &key, 0, bytenr, count, 1147 sc, GFP_NOFS); 1148 break; 1149 } 1150 default: 1151 WARN_ON(1); 1152 } 1153 if (ret) 1154 return ret; 1155 1156 } 1157 1158 return ret; 1159 } 1160 1161 /* 1162 * this adds all existing backrefs (inline backrefs, backrefs and delayed 1163 * refs) for the given bytenr to the refs list, merges duplicates and resolves 1164 * indirect refs to their parent bytenr. 1165 * When roots are found, they're added to the roots list 1166 * 1167 * If time_seq is set to BTRFS_SEQ_LAST, it will not search delayed_refs, and 1168 * behave much like trans == NULL case, the difference only lies in it will not 1169 * commit root. 1170 * The special case is for qgroup to search roots in commit_transaction(). 1171 * 1172 * @sc - if !NULL, then immediately return BACKREF_FOUND_SHARED when a 1173 * shared extent is detected. 1174 * 1175 * Otherwise this returns 0 for success and <0 for an error. 1176 * 1177 * If ignore_offset is set to false, only extent refs whose offsets match 1178 * extent_item_pos are returned. If true, every extent ref is returned 1179 * and extent_item_pos is ignored. 1180 * 1181 * FIXME some caching might speed things up 1182 */ 1183 static int find_parent_nodes(struct btrfs_trans_handle *trans, 1184 struct btrfs_fs_info *fs_info, u64 bytenr, 1185 u64 time_seq, struct ulist *refs, 1186 struct ulist *roots, const u64 *extent_item_pos, 1187 struct share_check *sc, bool ignore_offset) 1188 { 1189 struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr); 1190 struct btrfs_key key; 1191 struct btrfs_path *path; 1192 struct btrfs_delayed_ref_root *delayed_refs = NULL; 1193 struct btrfs_delayed_ref_head *head; 1194 int info_level = 0; 1195 int ret; 1196 struct prelim_ref *ref; 1197 struct rb_node *node; 1198 struct extent_inode_elem *eie = NULL; 1199 struct preftrees preftrees = { 1200 .direct = PREFTREE_INIT, 1201 .indirect = PREFTREE_INIT, 1202 .indirect_missing_keys = PREFTREE_INIT 1203 }; 1204 1205 key.objectid = bytenr; 1206 key.offset = (u64)-1; 1207 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 1208 key.type = BTRFS_METADATA_ITEM_KEY; 1209 else 1210 key.type = BTRFS_EXTENT_ITEM_KEY; 1211 1212 path = btrfs_alloc_path(); 1213 if (!path) 1214 return -ENOMEM; 1215 if (!trans) { 1216 path->search_commit_root = 1; 1217 path->skip_locking = 1; 1218 } 1219 1220 if (time_seq == BTRFS_SEQ_LAST) 1221 path->skip_locking = 1; 1222 1223 again: 1224 head = NULL; 1225 1226 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1227 if (ret < 0) 1228 goto out; 1229 if (ret == 0) { 1230 /* This shouldn't happen, indicates a bug or fs corruption. */ 1231 ASSERT(ret != 0); 1232 ret = -EUCLEAN; 1233 goto out; 1234 } 1235 1236 if (trans && likely(trans->type != __TRANS_DUMMY) && 1237 time_seq != BTRFS_SEQ_LAST) { 1238 /* 1239 * We have a specific time_seq we care about and trans which 1240 * means we have the path lock, we need to grab the ref head and 1241 * lock it so we have a consistent view of the refs at the given 1242 * time. 1243 */ 1244 delayed_refs = &trans->transaction->delayed_refs; 1245 spin_lock(&delayed_refs->lock); 1246 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); 1247 if (head) { 1248 if (!mutex_trylock(&head->mutex)) { 1249 refcount_inc(&head->refs); 1250 spin_unlock(&delayed_refs->lock); 1251 1252 btrfs_release_path(path); 1253 1254 /* 1255 * Mutex was contended, block until it's 1256 * released and try again 1257 */ 1258 mutex_lock(&head->mutex); 1259 mutex_unlock(&head->mutex); 1260 btrfs_put_delayed_ref_head(head); 1261 goto again; 1262 } 1263 spin_unlock(&delayed_refs->lock); 1264 ret = add_delayed_refs(fs_info, head, time_seq, 1265 &preftrees, sc); 1266 mutex_unlock(&head->mutex); 1267 if (ret) 1268 goto out; 1269 } else { 1270 spin_unlock(&delayed_refs->lock); 1271 } 1272 } 1273 1274 if (path->slots[0]) { 1275 struct extent_buffer *leaf; 1276 int slot; 1277 1278 path->slots[0]--; 1279 leaf = path->nodes[0]; 1280 slot = path->slots[0]; 1281 btrfs_item_key_to_cpu(leaf, &key, slot); 1282 if (key.objectid == bytenr && 1283 (key.type == BTRFS_EXTENT_ITEM_KEY || 1284 key.type == BTRFS_METADATA_ITEM_KEY)) { 1285 ret = add_inline_refs(fs_info, path, bytenr, 1286 &info_level, &preftrees, sc); 1287 if (ret) 1288 goto out; 1289 ret = add_keyed_refs(root, path, bytenr, info_level, 1290 &preftrees, sc); 1291 if (ret) 1292 goto out; 1293 } 1294 } 1295 1296 btrfs_release_path(path); 1297 1298 ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0); 1299 if (ret) 1300 goto out; 1301 1302 WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root)); 1303 1304 ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, 1305 extent_item_pos, sc, ignore_offset); 1306 if (ret) 1307 goto out; 1308 1309 WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect.root.rb_root)); 1310 1311 /* 1312 * This walks the tree of merged and resolved refs. Tree blocks are 1313 * read in as needed. Unique entries are added to the ulist, and 1314 * the list of found roots is updated. 1315 * 1316 * We release the entire tree in one go before returning. 1317 */ 1318 node = rb_first_cached(&preftrees.direct.root); 1319 while (node) { 1320 ref = rb_entry(node, struct prelim_ref, rbnode); 1321 node = rb_next(&ref->rbnode); 1322 /* 1323 * ref->count < 0 can happen here if there are delayed 1324 * refs with a node->action of BTRFS_DROP_DELAYED_REF. 1325 * prelim_ref_insert() relies on this when merging 1326 * identical refs to keep the overall count correct. 1327 * prelim_ref_insert() will merge only those refs 1328 * which compare identically. Any refs having 1329 * e.g. different offsets would not be merged, 1330 * and would retain their original ref->count < 0. 1331 */ 1332 if (roots && ref->count && ref->root_id && ref->parent == 0) { 1333 if (sc && sc->root_objectid && 1334 ref->root_id != sc->root_objectid) { 1335 ret = BACKREF_FOUND_SHARED; 1336 goto out; 1337 } 1338 1339 /* no parent == root of tree */ 1340 ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); 1341 if (ret < 0) 1342 goto out; 1343 } 1344 if (ref->count && ref->parent) { 1345 if (extent_item_pos && !ref->inode_list && 1346 ref->level == 0) { 1347 struct extent_buffer *eb; 1348 1349 eb = read_tree_block(fs_info, ref->parent, 0, 1350 0, ref->level, NULL); 1351 if (IS_ERR(eb)) { 1352 ret = PTR_ERR(eb); 1353 goto out; 1354 } 1355 if (!extent_buffer_uptodate(eb)) { 1356 free_extent_buffer(eb); 1357 ret = -EIO; 1358 goto out; 1359 } 1360 1361 if (!path->skip_locking) 1362 btrfs_tree_read_lock(eb); 1363 ret = find_extent_in_eb(eb, bytenr, 1364 *extent_item_pos, &eie, ignore_offset); 1365 if (!path->skip_locking) 1366 btrfs_tree_read_unlock(eb); 1367 free_extent_buffer(eb); 1368 if (ret < 0) 1369 goto out; 1370 ref->inode_list = eie; 1371 } 1372 ret = ulist_add_merge_ptr(refs, ref->parent, 1373 ref->inode_list, 1374 (void **)&eie, GFP_NOFS); 1375 if (ret < 0) 1376 goto out; 1377 if (!ret && extent_item_pos) { 1378 /* 1379 * We've recorded that parent, so we must extend 1380 * its inode list here. 1381 * 1382 * However if there was corruption we may not 1383 * have found an eie, return an error in this 1384 * case. 1385 */ 1386 ASSERT(eie); 1387 if (!eie) { 1388 ret = -EUCLEAN; 1389 goto out; 1390 } 1391 while (eie->next) 1392 eie = eie->next; 1393 eie->next = ref->inode_list; 1394 } 1395 eie = NULL; 1396 } 1397 cond_resched(); 1398 } 1399 1400 out: 1401 btrfs_free_path(path); 1402 1403 prelim_release(&preftrees.direct); 1404 prelim_release(&preftrees.indirect); 1405 prelim_release(&preftrees.indirect_missing_keys); 1406 1407 if (ret < 0) 1408 free_inode_elem_list(eie); 1409 return ret; 1410 } 1411 1412 static void free_leaf_list(struct ulist *blocks) 1413 { 1414 struct ulist_node *node = NULL; 1415 struct extent_inode_elem *eie; 1416 struct ulist_iterator uiter; 1417 1418 ULIST_ITER_INIT(&uiter); 1419 while ((node = ulist_next(blocks, &uiter))) { 1420 if (!node->aux) 1421 continue; 1422 eie = unode_aux_to_inode_list(node); 1423 free_inode_elem_list(eie); 1424 node->aux = 0; 1425 } 1426 1427 ulist_free(blocks); 1428 } 1429 1430 /* 1431 * Finds all leafs with a reference to the specified combination of bytenr and 1432 * offset. key_list_head will point to a list of corresponding keys (caller must 1433 * free each list element). The leafs will be stored in the leafs ulist, which 1434 * must be freed with ulist_free. 1435 * 1436 * returns 0 on success, <0 on error 1437 */ 1438 int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, 1439 struct btrfs_fs_info *fs_info, u64 bytenr, 1440 u64 time_seq, struct ulist **leafs, 1441 const u64 *extent_item_pos, bool ignore_offset) 1442 { 1443 int ret; 1444 1445 *leafs = ulist_alloc(GFP_NOFS); 1446 if (!*leafs) 1447 return -ENOMEM; 1448 1449 ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, 1450 *leafs, NULL, extent_item_pos, NULL, ignore_offset); 1451 if (ret < 0 && ret != -ENOENT) { 1452 free_leaf_list(*leafs); 1453 return ret; 1454 } 1455 1456 return 0; 1457 } 1458 1459 /* 1460 * walk all backrefs for a given extent to find all roots that reference this 1461 * extent. Walking a backref means finding all extents that reference this 1462 * extent and in turn walk the backrefs of those, too. Naturally this is a 1463 * recursive process, but here it is implemented in an iterative fashion: We 1464 * find all referencing extents for the extent in question and put them on a 1465 * list. In turn, we find all referencing extents for those, further appending 1466 * to the list. The way we iterate the list allows adding more elements after 1467 * the current while iterating. The process stops when we reach the end of the 1468 * list. Found roots are added to the roots list. 1469 * 1470 * returns 0 on success, < 0 on error. 1471 */ 1472 static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, 1473 struct btrfs_fs_info *fs_info, u64 bytenr, 1474 u64 time_seq, struct ulist **roots, 1475 bool ignore_offset) 1476 { 1477 struct ulist *tmp; 1478 struct ulist_node *node = NULL; 1479 struct ulist_iterator uiter; 1480 int ret; 1481 1482 tmp = ulist_alloc(GFP_NOFS); 1483 if (!tmp) 1484 return -ENOMEM; 1485 *roots = ulist_alloc(GFP_NOFS); 1486 if (!*roots) { 1487 ulist_free(tmp); 1488 return -ENOMEM; 1489 } 1490 1491 ULIST_ITER_INIT(&uiter); 1492 while (1) { 1493 ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, 1494 tmp, *roots, NULL, NULL, ignore_offset); 1495 if (ret < 0 && ret != -ENOENT) { 1496 ulist_free(tmp); 1497 ulist_free(*roots); 1498 *roots = NULL; 1499 return ret; 1500 } 1501 node = ulist_next(tmp, &uiter); 1502 if (!node) 1503 break; 1504 bytenr = node->val; 1505 cond_resched(); 1506 } 1507 1508 ulist_free(tmp); 1509 return 0; 1510 } 1511 1512 int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1513 struct btrfs_fs_info *fs_info, u64 bytenr, 1514 u64 time_seq, struct ulist **roots, 1515 bool skip_commit_root_sem) 1516 { 1517 int ret; 1518 1519 if (!trans && !skip_commit_root_sem) 1520 down_read(&fs_info->commit_root_sem); 1521 ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, 1522 time_seq, roots, false); 1523 if (!trans && !skip_commit_root_sem) 1524 up_read(&fs_info->commit_root_sem); 1525 return ret; 1526 } 1527 1528 /* 1529 * The caller has joined a transaction or is holding a read lock on the 1530 * fs_info->commit_root_sem semaphore, so no need to worry about the root's last 1531 * snapshot field changing while updating or checking the cache. 1532 */ 1533 static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache, 1534 struct btrfs_root *root, 1535 u64 bytenr, int level, bool *is_shared) 1536 { 1537 struct btrfs_backref_shared_cache_entry *entry; 1538 1539 if (!cache->use_cache) 1540 return false; 1541 1542 if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) 1543 return false; 1544 1545 /* 1546 * Level -1 is used for the data extent, which is not reliable to cache 1547 * because its reference count can increase or decrease without us 1548 * realizing. We cache results only for extent buffers that lead from 1549 * the root node down to the leaf with the file extent item. 1550 */ 1551 ASSERT(level >= 0); 1552 1553 entry = &cache->entries[level]; 1554 1555 /* Unused cache entry or being used for some other extent buffer. */ 1556 if (entry->bytenr != bytenr) 1557 return false; 1558 1559 /* 1560 * We cached a false result, but the last snapshot generation of the 1561 * root changed, so we now have a snapshot. Don't trust the result. 1562 */ 1563 if (!entry->is_shared && 1564 entry->gen != btrfs_root_last_snapshot(&root->root_item)) 1565 return false; 1566 1567 /* 1568 * If we cached a true result and the last generation used for dropping 1569 * a root changed, we can not trust the result, because the dropped root 1570 * could be a snapshot sharing this extent buffer. 1571 */ 1572 if (entry->is_shared && 1573 entry->gen != btrfs_get_last_root_drop_gen(root->fs_info)) 1574 return false; 1575 1576 *is_shared = entry->is_shared; 1577 /* 1578 * If the node at this level is shared, than all nodes below are also 1579 * shared. Currently some of the nodes below may be marked as not shared 1580 * because we have just switched from one leaf to another, and switched 1581 * also other nodes above the leaf and below the current level, so mark 1582 * them as shared. 1583 */ 1584 if (*is_shared) { 1585 for (int i = 0; i < level; i++) { 1586 cache->entries[i].is_shared = true; 1587 cache->entries[i].gen = entry->gen; 1588 } 1589 } 1590 1591 return true; 1592 } 1593 1594 /* 1595 * The caller has joined a transaction or is holding a read lock on the 1596 * fs_info->commit_root_sem semaphore, so no need to worry about the root's last 1597 * snapshot field changing while updating or checking the cache. 1598 */ 1599 static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, 1600 struct btrfs_root *root, 1601 u64 bytenr, int level, bool is_shared) 1602 { 1603 struct btrfs_backref_shared_cache_entry *entry; 1604 u64 gen; 1605 1606 if (!cache->use_cache) 1607 return; 1608 1609 if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) 1610 return; 1611 1612 /* 1613 * Level -1 is used for the data extent, which is not reliable to cache 1614 * because its reference count can increase or decrease without us 1615 * realizing. We cache results only for extent buffers that lead from 1616 * the root node down to the leaf with the file extent item. 1617 */ 1618 ASSERT(level >= 0); 1619 1620 if (is_shared) 1621 gen = btrfs_get_last_root_drop_gen(root->fs_info); 1622 else 1623 gen = btrfs_root_last_snapshot(&root->root_item); 1624 1625 entry = &cache->entries[level]; 1626 entry->bytenr = bytenr; 1627 entry->is_shared = is_shared; 1628 entry->gen = gen; 1629 1630 /* 1631 * If we found an extent buffer is shared, set the cache result for all 1632 * extent buffers below it to true. As nodes in the path are COWed, 1633 * their sharedness is moved to their children, and if a leaf is COWed, 1634 * then the sharedness of a data extent becomes direct, the refcount of 1635 * data extent is increased in the extent item at the extent tree. 1636 */ 1637 if (is_shared) { 1638 for (int i = 0; i < level; i++) { 1639 entry = &cache->entries[i]; 1640 entry->is_shared = is_shared; 1641 entry->gen = gen; 1642 } 1643 } 1644 } 1645 1646 /* 1647 * Check if a data extent is shared or not. 1648 * 1649 * @root: The root the inode belongs to. 1650 * @inum: Number of the inode whose extent we are checking. 1651 * @bytenr: Logical bytenr of the extent we are checking. 1652 * @extent_gen: Generation of the extent (file extent item) or 0 if it is 1653 * not known. 1654 * @roots: List of roots this extent is shared among. 1655 * @tmp: Temporary list used for iteration. 1656 * @cache: A backref lookup result cache. 1657 * 1658 * btrfs_is_data_extent_shared uses the backref walking code but will short 1659 * circuit as soon as it finds a root or inode that doesn't match the 1660 * one passed in. This provides a significant performance benefit for 1661 * callers (such as fiemap) which want to know whether the extent is 1662 * shared but do not need a ref count. 1663 * 1664 * This attempts to attach to the running transaction in order to account for 1665 * delayed refs, but continues on even when no running transaction exists. 1666 * 1667 * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. 1668 */ 1669 int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, 1670 u64 extent_gen, 1671 struct ulist *roots, struct ulist *tmp, 1672 struct btrfs_backref_shared_cache *cache) 1673 { 1674 struct btrfs_fs_info *fs_info = root->fs_info; 1675 struct btrfs_trans_handle *trans; 1676 struct ulist_iterator uiter; 1677 struct ulist_node *node; 1678 struct btrfs_seq_list elem = BTRFS_SEQ_LIST_INIT(elem); 1679 int ret = 0; 1680 struct share_check shared = { 1681 .root_objectid = root->root_key.objectid, 1682 .inum = inum, 1683 .share_count = 0, 1684 .have_delayed_delete_refs = false, 1685 }; 1686 int level; 1687 1688 ulist_init(roots); 1689 ulist_init(tmp); 1690 1691 trans = btrfs_join_transaction_nostart(root); 1692 if (IS_ERR(trans)) { 1693 if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { 1694 ret = PTR_ERR(trans); 1695 goto out; 1696 } 1697 trans = NULL; 1698 down_read(&fs_info->commit_root_sem); 1699 } else { 1700 btrfs_get_tree_mod_seq(fs_info, &elem); 1701 } 1702 1703 /* -1 means we are in the bytenr of the data extent. */ 1704 level = -1; 1705 ULIST_ITER_INIT(&uiter); 1706 cache->use_cache = true; 1707 while (1) { 1708 bool is_shared; 1709 bool cached; 1710 1711 ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, 1712 roots, NULL, &shared, false); 1713 if (ret == BACKREF_FOUND_SHARED) { 1714 /* this is the only condition under which we return 1 */ 1715 ret = 1; 1716 if (level >= 0) 1717 store_backref_shared_cache(cache, root, bytenr, 1718 level, true); 1719 break; 1720 } 1721 if (ret < 0 && ret != -ENOENT) 1722 break; 1723 ret = 0; 1724 /* 1725 * If our data extent is not shared through reflinks and it was 1726 * created in a generation after the last one used to create a 1727 * snapshot of the inode's root, then it can not be shared 1728 * indirectly through subtrees, as that can only happen with 1729 * snapshots. In this case bail out, no need to check for the 1730 * sharedness of extent buffers. 1731 */ 1732 if (level == -1 && 1733 extent_gen > btrfs_root_last_snapshot(&root->root_item)) 1734 break; 1735 1736 /* 1737 * If our data extent was not directly shared (without multiple 1738 * reference items), than it might have a single reference item 1739 * with a count > 1 for the same offset, which means there are 2 1740 * (or more) file extent items that point to the data extent - 1741 * this happens when a file extent item needs to be split and 1742 * then one item gets moved to another leaf due to a b+tree leaf 1743 * split when inserting some item. In this case the file extent 1744 * items may be located in different leaves and therefore some 1745 * of the leaves may be referenced through shared subtrees while 1746 * others are not. Since our extent buffer cache only works for 1747 * a single path (by far the most common case and simpler to 1748 * deal with), we can not use it if we have multiple leaves 1749 * (which implies multiple paths). 1750 */ 1751 if (level == -1 && tmp->nnodes > 1) 1752 cache->use_cache = false; 1753 1754 if (level >= 0) 1755 store_backref_shared_cache(cache, root, bytenr, 1756 level, false); 1757 node = ulist_next(tmp, &uiter); 1758 if (!node) 1759 break; 1760 bytenr = node->val; 1761 level++; 1762 cached = lookup_backref_shared_cache(cache, root, bytenr, level, 1763 &is_shared); 1764 if (cached) { 1765 ret = (is_shared ? 1 : 0); 1766 break; 1767 } 1768 shared.share_count = 0; 1769 shared.have_delayed_delete_refs = false; 1770 cond_resched(); 1771 } 1772 1773 if (trans) { 1774 btrfs_put_tree_mod_seq(fs_info, &elem); 1775 btrfs_end_transaction(trans); 1776 } else { 1777 up_read(&fs_info->commit_root_sem); 1778 } 1779 out: 1780 ulist_release(roots); 1781 ulist_release(tmp); 1782 return ret; 1783 } 1784 1785 int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 1786 u64 start_off, struct btrfs_path *path, 1787 struct btrfs_inode_extref **ret_extref, 1788 u64 *found_off) 1789 { 1790 int ret, slot; 1791 struct btrfs_key key; 1792 struct btrfs_key found_key; 1793 struct btrfs_inode_extref *extref; 1794 const struct extent_buffer *leaf; 1795 unsigned long ptr; 1796 1797 key.objectid = inode_objectid; 1798 key.type = BTRFS_INODE_EXTREF_KEY; 1799 key.offset = start_off; 1800 1801 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1802 if (ret < 0) 1803 return ret; 1804 1805 while (1) { 1806 leaf = path->nodes[0]; 1807 slot = path->slots[0]; 1808 if (slot >= btrfs_header_nritems(leaf)) { 1809 /* 1810 * If the item at offset is not found, 1811 * btrfs_search_slot will point us to the slot 1812 * where it should be inserted. In our case 1813 * that will be the slot directly before the 1814 * next INODE_REF_KEY_V2 item. In the case 1815 * that we're pointing to the last slot in a 1816 * leaf, we must move one leaf over. 1817 */ 1818 ret = btrfs_next_leaf(root, path); 1819 if (ret) { 1820 if (ret >= 1) 1821 ret = -ENOENT; 1822 break; 1823 } 1824 continue; 1825 } 1826 1827 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1828 1829 /* 1830 * Check that we're still looking at an extended ref key for 1831 * this particular objectid. If we have different 1832 * objectid or type then there are no more to be found 1833 * in the tree and we can exit. 1834 */ 1835 ret = -ENOENT; 1836 if (found_key.objectid != inode_objectid) 1837 break; 1838 if (found_key.type != BTRFS_INODE_EXTREF_KEY) 1839 break; 1840 1841 ret = 0; 1842 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1843 extref = (struct btrfs_inode_extref *)ptr; 1844 *ret_extref = extref; 1845 if (found_off) 1846 *found_off = found_key.offset; 1847 break; 1848 } 1849 1850 return ret; 1851 } 1852 1853 /* 1854 * this iterates to turn a name (from iref/extref) into a full filesystem path. 1855 * Elements of the path are separated by '/' and the path is guaranteed to be 1856 * 0-terminated. the path is only given within the current file system. 1857 * Therefore, it never starts with a '/'. the caller is responsible to provide 1858 * "size" bytes in "dest". the dest buffer will be filled backwards. finally, 1859 * the start point of the resulting string is returned. this pointer is within 1860 * dest, normally. 1861 * in case the path buffer would overflow, the pointer is decremented further 1862 * as if output was written to the buffer, though no more output is actually 1863 * generated. that way, the caller can determine how much space would be 1864 * required for the path to fit into the buffer. in that case, the returned 1865 * value will be smaller than dest. callers must check this! 1866 */ 1867 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 1868 u32 name_len, unsigned long name_off, 1869 struct extent_buffer *eb_in, u64 parent, 1870 char *dest, u32 size) 1871 { 1872 int slot; 1873 u64 next_inum; 1874 int ret; 1875 s64 bytes_left = ((s64)size) - 1; 1876 struct extent_buffer *eb = eb_in; 1877 struct btrfs_key found_key; 1878 struct btrfs_inode_ref *iref; 1879 1880 if (bytes_left >= 0) 1881 dest[bytes_left] = '\0'; 1882 1883 while (1) { 1884 bytes_left -= name_len; 1885 if (bytes_left >= 0) 1886 read_extent_buffer(eb, dest + bytes_left, 1887 name_off, name_len); 1888 if (eb != eb_in) { 1889 if (!path->skip_locking) 1890 btrfs_tree_read_unlock(eb); 1891 free_extent_buffer(eb); 1892 } 1893 ret = btrfs_find_item(fs_root, path, parent, 0, 1894 BTRFS_INODE_REF_KEY, &found_key); 1895 if (ret > 0) 1896 ret = -ENOENT; 1897 if (ret) 1898 break; 1899 1900 next_inum = found_key.offset; 1901 1902 /* regular exit ahead */ 1903 if (parent == next_inum) 1904 break; 1905 1906 slot = path->slots[0]; 1907 eb = path->nodes[0]; 1908 /* make sure we can use eb after releasing the path */ 1909 if (eb != eb_in) { 1910 path->nodes[0] = NULL; 1911 path->locks[0] = 0; 1912 } 1913 btrfs_release_path(path); 1914 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1915 1916 name_len = btrfs_inode_ref_name_len(eb, iref); 1917 name_off = (unsigned long)(iref + 1); 1918 1919 parent = next_inum; 1920 --bytes_left; 1921 if (bytes_left >= 0) 1922 dest[bytes_left] = '/'; 1923 } 1924 1925 btrfs_release_path(path); 1926 1927 if (ret) 1928 return ERR_PTR(ret); 1929 1930 return dest + bytes_left; 1931 } 1932 1933 /* 1934 * this makes the path point to (logical EXTENT_ITEM *) 1935 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for 1936 * tree blocks and <0 on error. 1937 */ 1938 int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, 1939 struct btrfs_path *path, struct btrfs_key *found_key, 1940 u64 *flags_ret) 1941 { 1942 struct btrfs_root *extent_root = btrfs_extent_root(fs_info, logical); 1943 int ret; 1944 u64 flags; 1945 u64 size = 0; 1946 u32 item_size; 1947 const struct extent_buffer *eb; 1948 struct btrfs_extent_item *ei; 1949 struct btrfs_key key; 1950 1951 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 1952 key.type = BTRFS_METADATA_ITEM_KEY; 1953 else 1954 key.type = BTRFS_EXTENT_ITEM_KEY; 1955 key.objectid = logical; 1956 key.offset = (u64)-1; 1957 1958 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 1959 if (ret < 0) 1960 return ret; 1961 1962 ret = btrfs_previous_extent_item(extent_root, path, 0); 1963 if (ret) { 1964 if (ret > 0) 1965 ret = -ENOENT; 1966 return ret; 1967 } 1968 btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); 1969 if (found_key->type == BTRFS_METADATA_ITEM_KEY) 1970 size = fs_info->nodesize; 1971 else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) 1972 size = found_key->offset; 1973 1974 if (found_key->objectid > logical || 1975 found_key->objectid + size <= logical) { 1976 btrfs_debug(fs_info, 1977 "logical %llu is not within any extent", logical); 1978 return -ENOENT; 1979 } 1980 1981 eb = path->nodes[0]; 1982 item_size = btrfs_item_size(eb, path->slots[0]); 1983 BUG_ON(item_size < sizeof(*ei)); 1984 1985 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 1986 flags = btrfs_extent_flags(eb, ei); 1987 1988 btrfs_debug(fs_info, 1989 "logical %llu is at position %llu within the extent (%llu EXTENT_ITEM %llu) flags %#llx size %u", 1990 logical, logical - found_key->objectid, found_key->objectid, 1991 found_key->offset, flags, item_size); 1992 1993 WARN_ON(!flags_ret); 1994 if (flags_ret) { 1995 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 1996 *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK; 1997 else if (flags & BTRFS_EXTENT_FLAG_DATA) 1998 *flags_ret = BTRFS_EXTENT_FLAG_DATA; 1999 else 2000 BUG(); 2001 return 0; 2002 } 2003 2004 return -EIO; 2005 } 2006 2007 /* 2008 * helper function to iterate extent inline refs. ptr must point to a 0 value 2009 * for the first call and may be modified. it is used to track state. 2010 * if more refs exist, 0 is returned and the next call to 2011 * get_extent_inline_ref must pass the modified ptr parameter to get the 2012 * next ref. after the last ref was processed, 1 is returned. 2013 * returns <0 on error 2014 */ 2015 static int get_extent_inline_ref(unsigned long *ptr, 2016 const struct extent_buffer *eb, 2017 const struct btrfs_key *key, 2018 const struct btrfs_extent_item *ei, 2019 u32 item_size, 2020 struct btrfs_extent_inline_ref **out_eiref, 2021 int *out_type) 2022 { 2023 unsigned long end; 2024 u64 flags; 2025 struct btrfs_tree_block_info *info; 2026 2027 if (!*ptr) { 2028 /* first call */ 2029 flags = btrfs_extent_flags(eb, ei); 2030 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 2031 if (key->type == BTRFS_METADATA_ITEM_KEY) { 2032 /* a skinny metadata extent */ 2033 *out_eiref = 2034 (struct btrfs_extent_inline_ref *)(ei + 1); 2035 } else { 2036 WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY); 2037 info = (struct btrfs_tree_block_info *)(ei + 1); 2038 *out_eiref = 2039 (struct btrfs_extent_inline_ref *)(info + 1); 2040 } 2041 } else { 2042 *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); 2043 } 2044 *ptr = (unsigned long)*out_eiref; 2045 if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size) 2046 return -ENOENT; 2047 } 2048 2049 end = (unsigned long)ei + item_size; 2050 *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr); 2051 *out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref, 2052 BTRFS_REF_TYPE_ANY); 2053 if (*out_type == BTRFS_REF_TYPE_INVALID) 2054 return -EUCLEAN; 2055 2056 *ptr += btrfs_extent_inline_ref_size(*out_type); 2057 WARN_ON(*ptr > end); 2058 if (*ptr == end) 2059 return 1; /* last */ 2060 2061 return 0; 2062 } 2063 2064 /* 2065 * reads the tree block backref for an extent. tree level and root are returned 2066 * through out_level and out_root. ptr must point to a 0 value for the first 2067 * call and may be modified (see get_extent_inline_ref comment). 2068 * returns 0 if data was provided, 1 if there was no more data to provide or 2069 * <0 on error. 2070 */ 2071 int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, 2072 struct btrfs_key *key, struct btrfs_extent_item *ei, 2073 u32 item_size, u64 *out_root, u8 *out_level) 2074 { 2075 int ret; 2076 int type; 2077 struct btrfs_extent_inline_ref *eiref; 2078 2079 if (*ptr == (unsigned long)-1) 2080 return 1; 2081 2082 while (1) { 2083 ret = get_extent_inline_ref(ptr, eb, key, ei, item_size, 2084 &eiref, &type); 2085 if (ret < 0) 2086 return ret; 2087 2088 if (type == BTRFS_TREE_BLOCK_REF_KEY || 2089 type == BTRFS_SHARED_BLOCK_REF_KEY) 2090 break; 2091 2092 if (ret == 1) 2093 return 1; 2094 } 2095 2096 /* we can treat both ref types equally here */ 2097 *out_root = btrfs_extent_inline_ref_offset(eb, eiref); 2098 2099 if (key->type == BTRFS_EXTENT_ITEM_KEY) { 2100 struct btrfs_tree_block_info *info; 2101 2102 info = (struct btrfs_tree_block_info *)(ei + 1); 2103 *out_level = btrfs_tree_block_level(eb, info); 2104 } else { 2105 ASSERT(key->type == BTRFS_METADATA_ITEM_KEY); 2106 *out_level = (u8)key->offset; 2107 } 2108 2109 if (ret == 1) 2110 *ptr = (unsigned long)-1; 2111 2112 return 0; 2113 } 2114 2115 static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, 2116 struct extent_inode_elem *inode_list, 2117 u64 root, u64 extent_item_objectid, 2118 iterate_extent_inodes_t *iterate, void *ctx) 2119 { 2120 struct extent_inode_elem *eie; 2121 int ret = 0; 2122 2123 for (eie = inode_list; eie; eie = eie->next) { 2124 btrfs_debug(fs_info, 2125 "ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu", 2126 extent_item_objectid, eie->inum, 2127 eie->offset, root); 2128 ret = iterate(eie->inum, eie->offset, root, ctx); 2129 if (ret) { 2130 btrfs_debug(fs_info, 2131 "stopping iteration for %llu due to ret=%d", 2132 extent_item_objectid, ret); 2133 break; 2134 } 2135 } 2136 2137 return ret; 2138 } 2139 2140 /* 2141 * calls iterate() for every inode that references the extent identified by 2142 * the given parameters. 2143 * when the iterator function returns a non-zero value, iteration stops. 2144 */ 2145 int iterate_extent_inodes(struct btrfs_fs_info *fs_info, 2146 u64 extent_item_objectid, u64 extent_item_pos, 2147 int search_commit_root, 2148 iterate_extent_inodes_t *iterate, void *ctx, 2149 bool ignore_offset) 2150 { 2151 int ret; 2152 struct btrfs_trans_handle *trans = NULL; 2153 struct ulist *refs = NULL; 2154 struct ulist *roots = NULL; 2155 struct ulist_node *ref_node = NULL; 2156 struct ulist_node *root_node = NULL; 2157 struct btrfs_seq_list seq_elem = BTRFS_SEQ_LIST_INIT(seq_elem); 2158 struct ulist_iterator ref_uiter; 2159 struct ulist_iterator root_uiter; 2160 2161 btrfs_debug(fs_info, "resolving all inodes for extent %llu", 2162 extent_item_objectid); 2163 2164 if (!search_commit_root) { 2165 trans = btrfs_attach_transaction(fs_info->tree_root); 2166 if (IS_ERR(trans)) { 2167 if (PTR_ERR(trans) != -ENOENT && 2168 PTR_ERR(trans) != -EROFS) 2169 return PTR_ERR(trans); 2170 trans = NULL; 2171 } 2172 } 2173 2174 if (trans) 2175 btrfs_get_tree_mod_seq(fs_info, &seq_elem); 2176 else 2177 down_read(&fs_info->commit_root_sem); 2178 2179 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 2180 seq_elem.seq, &refs, 2181 &extent_item_pos, ignore_offset); 2182 if (ret) 2183 goto out; 2184 2185 ULIST_ITER_INIT(&ref_uiter); 2186 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { 2187 ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, 2188 seq_elem.seq, &roots, 2189 ignore_offset); 2190 if (ret) 2191 break; 2192 ULIST_ITER_INIT(&root_uiter); 2193 while (!ret && (root_node = ulist_next(roots, &root_uiter))) { 2194 btrfs_debug(fs_info, 2195 "root %llu references leaf %llu, data list %#llx", 2196 root_node->val, ref_node->val, 2197 ref_node->aux); 2198 ret = iterate_leaf_refs(fs_info, 2199 (struct extent_inode_elem *) 2200 (uintptr_t)ref_node->aux, 2201 root_node->val, 2202 extent_item_objectid, 2203 iterate, ctx); 2204 } 2205 ulist_free(roots); 2206 } 2207 2208 free_leaf_list(refs); 2209 out: 2210 if (trans) { 2211 btrfs_put_tree_mod_seq(fs_info, &seq_elem); 2212 btrfs_end_transaction(trans); 2213 } else { 2214 up_read(&fs_info->commit_root_sem); 2215 } 2216 2217 return ret; 2218 } 2219 2220 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 2221 { 2222 struct btrfs_data_container *inodes = ctx; 2223 const size_t c = 3 * sizeof(u64); 2224 2225 if (inodes->bytes_left >= c) { 2226 inodes->bytes_left -= c; 2227 inodes->val[inodes->elem_cnt] = inum; 2228 inodes->val[inodes->elem_cnt + 1] = offset; 2229 inodes->val[inodes->elem_cnt + 2] = root; 2230 inodes->elem_cnt += 3; 2231 } else { 2232 inodes->bytes_missing += c - inodes->bytes_left; 2233 inodes->bytes_left = 0; 2234 inodes->elem_missed += 3; 2235 } 2236 2237 return 0; 2238 } 2239 2240 int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, 2241 struct btrfs_path *path, 2242 void *ctx, bool ignore_offset) 2243 { 2244 int ret; 2245 u64 extent_item_pos; 2246 u64 flags = 0; 2247 struct btrfs_key found_key; 2248 int search_commit_root = path->search_commit_root; 2249 2250 ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); 2251 btrfs_release_path(path); 2252 if (ret < 0) 2253 return ret; 2254 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 2255 return -EINVAL; 2256 2257 extent_item_pos = logical - found_key.objectid; 2258 ret = iterate_extent_inodes(fs_info, found_key.objectid, 2259 extent_item_pos, search_commit_root, 2260 build_ino_list, ctx, ignore_offset); 2261 2262 return ret; 2263 } 2264 2265 static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, 2266 struct extent_buffer *eb, struct inode_fs_paths *ipath); 2267 2268 static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath) 2269 { 2270 int ret = 0; 2271 int slot; 2272 u32 cur; 2273 u32 len; 2274 u32 name_len; 2275 u64 parent = 0; 2276 int found = 0; 2277 struct btrfs_root *fs_root = ipath->fs_root; 2278 struct btrfs_path *path = ipath->btrfs_path; 2279 struct extent_buffer *eb; 2280 struct btrfs_inode_ref *iref; 2281 struct btrfs_key found_key; 2282 2283 while (!ret) { 2284 ret = btrfs_find_item(fs_root, path, inum, 2285 parent ? parent + 1 : 0, BTRFS_INODE_REF_KEY, 2286 &found_key); 2287 2288 if (ret < 0) 2289 break; 2290 if (ret) { 2291 ret = found ? 0 : -ENOENT; 2292 break; 2293 } 2294 ++found; 2295 2296 parent = found_key.offset; 2297 slot = path->slots[0]; 2298 eb = btrfs_clone_extent_buffer(path->nodes[0]); 2299 if (!eb) { 2300 ret = -ENOMEM; 2301 break; 2302 } 2303 btrfs_release_path(path); 2304 2305 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 2306 2307 for (cur = 0; cur < btrfs_item_size(eb, slot); cur += len) { 2308 name_len = btrfs_inode_ref_name_len(eb, iref); 2309 /* path must be released before calling iterate()! */ 2310 btrfs_debug(fs_root->fs_info, 2311 "following ref at offset %u for inode %llu in tree %llu", 2312 cur, found_key.objectid, 2313 fs_root->root_key.objectid); 2314 ret = inode_to_path(parent, name_len, 2315 (unsigned long)(iref + 1), eb, ipath); 2316 if (ret) 2317 break; 2318 len = sizeof(*iref) + name_len; 2319 iref = (struct btrfs_inode_ref *)((char *)iref + len); 2320 } 2321 free_extent_buffer(eb); 2322 } 2323 2324 btrfs_release_path(path); 2325 2326 return ret; 2327 } 2328 2329 static int iterate_inode_extrefs(u64 inum, struct inode_fs_paths *ipath) 2330 { 2331 int ret; 2332 int slot; 2333 u64 offset = 0; 2334 u64 parent; 2335 int found = 0; 2336 struct btrfs_root *fs_root = ipath->fs_root; 2337 struct btrfs_path *path = ipath->btrfs_path; 2338 struct extent_buffer *eb; 2339 struct btrfs_inode_extref *extref; 2340 u32 item_size; 2341 u32 cur_offset; 2342 unsigned long ptr; 2343 2344 while (1) { 2345 ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref, 2346 &offset); 2347 if (ret < 0) 2348 break; 2349 if (ret) { 2350 ret = found ? 0 : -ENOENT; 2351 break; 2352 } 2353 ++found; 2354 2355 slot = path->slots[0]; 2356 eb = btrfs_clone_extent_buffer(path->nodes[0]); 2357 if (!eb) { 2358 ret = -ENOMEM; 2359 break; 2360 } 2361 btrfs_release_path(path); 2362 2363 item_size = btrfs_item_size(eb, slot); 2364 ptr = btrfs_item_ptr_offset(eb, slot); 2365 cur_offset = 0; 2366 2367 while (cur_offset < item_size) { 2368 u32 name_len; 2369 2370 extref = (struct btrfs_inode_extref *)(ptr + cur_offset); 2371 parent = btrfs_inode_extref_parent(eb, extref); 2372 name_len = btrfs_inode_extref_name_len(eb, extref); 2373 ret = inode_to_path(parent, name_len, 2374 (unsigned long)&extref->name, eb, ipath); 2375 if (ret) 2376 break; 2377 2378 cur_offset += btrfs_inode_extref_name_len(eb, extref); 2379 cur_offset += sizeof(*extref); 2380 } 2381 free_extent_buffer(eb); 2382 2383 offset++; 2384 } 2385 2386 btrfs_release_path(path); 2387 2388 return ret; 2389 } 2390 2391 /* 2392 * returns 0 if the path could be dumped (probably truncated) 2393 * returns <0 in case of an error 2394 */ 2395 static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, 2396 struct extent_buffer *eb, struct inode_fs_paths *ipath) 2397 { 2398 char *fspath; 2399 char *fspath_min; 2400 int i = ipath->fspath->elem_cnt; 2401 const int s_ptr = sizeof(char *); 2402 u32 bytes_left; 2403 2404 bytes_left = ipath->fspath->bytes_left > s_ptr ? 2405 ipath->fspath->bytes_left - s_ptr : 0; 2406 2407 fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; 2408 fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, 2409 name_off, eb, inum, fspath_min, bytes_left); 2410 if (IS_ERR(fspath)) 2411 return PTR_ERR(fspath); 2412 2413 if (fspath > fspath_min) { 2414 ipath->fspath->val[i] = (u64)(unsigned long)fspath; 2415 ++ipath->fspath->elem_cnt; 2416 ipath->fspath->bytes_left = fspath - fspath_min; 2417 } else { 2418 ++ipath->fspath->elem_missed; 2419 ipath->fspath->bytes_missing += fspath_min - fspath; 2420 ipath->fspath->bytes_left = 0; 2421 } 2422 2423 return 0; 2424 } 2425 2426 /* 2427 * this dumps all file system paths to the inode into the ipath struct, provided 2428 * is has been created large enough. each path is zero-terminated and accessed 2429 * from ipath->fspath->val[i]. 2430 * when it returns, there are ipath->fspath->elem_cnt number of paths available 2431 * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the 2432 * number of missed paths is recorded in ipath->fspath->elem_missed, otherwise, 2433 * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would 2434 * have been needed to return all paths. 2435 */ 2436 int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) 2437 { 2438 int ret; 2439 int found_refs = 0; 2440 2441 ret = iterate_inode_refs(inum, ipath); 2442 if (!ret) 2443 ++found_refs; 2444 else if (ret != -ENOENT) 2445 return ret; 2446 2447 ret = iterate_inode_extrefs(inum, ipath); 2448 if (ret == -ENOENT && found_refs) 2449 return 0; 2450 2451 return ret; 2452 } 2453 2454 struct btrfs_data_container *init_data_container(u32 total_bytes) 2455 { 2456 struct btrfs_data_container *data; 2457 size_t alloc_bytes; 2458 2459 alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); 2460 data = kvmalloc(alloc_bytes, GFP_KERNEL); 2461 if (!data) 2462 return ERR_PTR(-ENOMEM); 2463 2464 if (total_bytes >= sizeof(*data)) { 2465 data->bytes_left = total_bytes - sizeof(*data); 2466 data->bytes_missing = 0; 2467 } else { 2468 data->bytes_missing = sizeof(*data) - total_bytes; 2469 data->bytes_left = 0; 2470 } 2471 2472 data->elem_cnt = 0; 2473 data->elem_missed = 0; 2474 2475 return data; 2476 } 2477 2478 /* 2479 * allocates space to return multiple file system paths for an inode. 2480 * total_bytes to allocate are passed, note that space usable for actual path 2481 * information will be total_bytes - sizeof(struct inode_fs_paths). 2482 * the returned pointer must be freed with free_ipath() in the end. 2483 */ 2484 struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 2485 struct btrfs_path *path) 2486 { 2487 struct inode_fs_paths *ifp; 2488 struct btrfs_data_container *fspath; 2489 2490 fspath = init_data_container(total_bytes); 2491 if (IS_ERR(fspath)) 2492 return ERR_CAST(fspath); 2493 2494 ifp = kmalloc(sizeof(*ifp), GFP_KERNEL); 2495 if (!ifp) { 2496 kvfree(fspath); 2497 return ERR_PTR(-ENOMEM); 2498 } 2499 2500 ifp->btrfs_path = path; 2501 ifp->fspath = fspath; 2502 ifp->fs_root = fs_root; 2503 2504 return ifp; 2505 } 2506 2507 void free_ipath(struct inode_fs_paths *ipath) 2508 { 2509 if (!ipath) 2510 return; 2511 kvfree(ipath->fspath); 2512 kfree(ipath); 2513 } 2514 2515 struct btrfs_backref_iter *btrfs_backref_iter_alloc( 2516 struct btrfs_fs_info *fs_info, gfp_t gfp_flag) 2517 { 2518 struct btrfs_backref_iter *ret; 2519 2520 ret = kzalloc(sizeof(*ret), gfp_flag); 2521 if (!ret) 2522 return NULL; 2523 2524 ret->path = btrfs_alloc_path(); 2525 if (!ret->path) { 2526 kfree(ret); 2527 return NULL; 2528 } 2529 2530 /* Current backref iterator only supports iteration in commit root */ 2531 ret->path->search_commit_root = 1; 2532 ret->path->skip_locking = 1; 2533 ret->fs_info = fs_info; 2534 2535 return ret; 2536 } 2537 2538 int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr) 2539 { 2540 struct btrfs_fs_info *fs_info = iter->fs_info; 2541 struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr); 2542 struct btrfs_path *path = iter->path; 2543 struct btrfs_extent_item *ei; 2544 struct btrfs_key key; 2545 int ret; 2546 2547 key.objectid = bytenr; 2548 key.type = BTRFS_METADATA_ITEM_KEY; 2549 key.offset = (u64)-1; 2550 iter->bytenr = bytenr; 2551 2552 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2553 if (ret < 0) 2554 return ret; 2555 if (ret == 0) { 2556 ret = -EUCLEAN; 2557 goto release; 2558 } 2559 if (path->slots[0] == 0) { 2560 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 2561 ret = -EUCLEAN; 2562 goto release; 2563 } 2564 path->slots[0]--; 2565 2566 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2567 if ((key.type != BTRFS_EXTENT_ITEM_KEY && 2568 key.type != BTRFS_METADATA_ITEM_KEY) || key.objectid != bytenr) { 2569 ret = -ENOENT; 2570 goto release; 2571 } 2572 memcpy(&iter->cur_key, &key, sizeof(key)); 2573 iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], 2574 path->slots[0]); 2575 iter->end_ptr = (u32)(iter->item_ptr + 2576 btrfs_item_size(path->nodes[0], path->slots[0])); 2577 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 2578 struct btrfs_extent_item); 2579 2580 /* 2581 * Only support iteration on tree backref yet. 2582 * 2583 * This is an extra precaution for non skinny-metadata, where 2584 * EXTENT_ITEM is also used for tree blocks, that we can only use 2585 * extent flags to determine if it's a tree block. 2586 */ 2587 if (btrfs_extent_flags(path->nodes[0], ei) & BTRFS_EXTENT_FLAG_DATA) { 2588 ret = -ENOTSUPP; 2589 goto release; 2590 } 2591 iter->cur_ptr = (u32)(iter->item_ptr + sizeof(*ei)); 2592 2593 /* If there is no inline backref, go search for keyed backref */ 2594 if (iter->cur_ptr >= iter->end_ptr) { 2595 ret = btrfs_next_item(extent_root, path); 2596 2597 /* No inline nor keyed ref */ 2598 if (ret > 0) { 2599 ret = -ENOENT; 2600 goto release; 2601 } 2602 if (ret < 0) 2603 goto release; 2604 2605 btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, 2606 path->slots[0]); 2607 if (iter->cur_key.objectid != bytenr || 2608 (iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY && 2609 iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY)) { 2610 ret = -ENOENT; 2611 goto release; 2612 } 2613 iter->cur_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], 2614 path->slots[0]); 2615 iter->item_ptr = iter->cur_ptr; 2616 iter->end_ptr = (u32)(iter->item_ptr + btrfs_item_size( 2617 path->nodes[0], path->slots[0])); 2618 } 2619 2620 return 0; 2621 release: 2622 btrfs_backref_iter_release(iter); 2623 return ret; 2624 } 2625 2626 /* 2627 * Go to the next backref item of current bytenr, can be either inlined or 2628 * keyed. 2629 * 2630 * Caller needs to check whether it's inline ref or not by iter->cur_key. 2631 * 2632 * Return 0 if we get next backref without problem. 2633 * Return >0 if there is no extra backref for this bytenr. 2634 * Return <0 if there is something wrong happened. 2635 */ 2636 int btrfs_backref_iter_next(struct btrfs_backref_iter *iter) 2637 { 2638 struct extent_buffer *eb = btrfs_backref_get_eb(iter); 2639 struct btrfs_root *extent_root; 2640 struct btrfs_path *path = iter->path; 2641 struct btrfs_extent_inline_ref *iref; 2642 int ret; 2643 u32 size; 2644 2645 if (btrfs_backref_iter_is_inline_ref(iter)) { 2646 /* We're still inside the inline refs */ 2647 ASSERT(iter->cur_ptr < iter->end_ptr); 2648 2649 if (btrfs_backref_has_tree_block_info(iter)) { 2650 /* First tree block info */ 2651 size = sizeof(struct btrfs_tree_block_info); 2652 } else { 2653 /* Use inline ref type to determine the size */ 2654 int type; 2655 2656 iref = (struct btrfs_extent_inline_ref *) 2657 ((unsigned long)iter->cur_ptr); 2658 type = btrfs_extent_inline_ref_type(eb, iref); 2659 2660 size = btrfs_extent_inline_ref_size(type); 2661 } 2662 iter->cur_ptr += size; 2663 if (iter->cur_ptr < iter->end_ptr) 2664 return 0; 2665 2666 /* All inline items iterated, fall through */ 2667 } 2668 2669 /* We're at keyed items, there is no inline item, go to the next one */ 2670 extent_root = btrfs_extent_root(iter->fs_info, iter->bytenr); 2671 ret = btrfs_next_item(extent_root, iter->path); 2672 if (ret) 2673 return ret; 2674 2675 btrfs_item_key_to_cpu(path->nodes[0], &iter->cur_key, path->slots[0]); 2676 if (iter->cur_key.objectid != iter->bytenr || 2677 (iter->cur_key.type != BTRFS_TREE_BLOCK_REF_KEY && 2678 iter->cur_key.type != BTRFS_SHARED_BLOCK_REF_KEY)) 2679 return 1; 2680 iter->item_ptr = (u32)btrfs_item_ptr_offset(path->nodes[0], 2681 path->slots[0]); 2682 iter->cur_ptr = iter->item_ptr; 2683 iter->end_ptr = iter->item_ptr + (u32)btrfs_item_size(path->nodes[0], 2684 path->slots[0]); 2685 return 0; 2686 } 2687 2688 void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info, 2689 struct btrfs_backref_cache *cache, int is_reloc) 2690 { 2691 int i; 2692 2693 cache->rb_root = RB_ROOT; 2694 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 2695 INIT_LIST_HEAD(&cache->pending[i]); 2696 INIT_LIST_HEAD(&cache->changed); 2697 INIT_LIST_HEAD(&cache->detached); 2698 INIT_LIST_HEAD(&cache->leaves); 2699 INIT_LIST_HEAD(&cache->pending_edge); 2700 INIT_LIST_HEAD(&cache->useless_node); 2701 cache->fs_info = fs_info; 2702 cache->is_reloc = is_reloc; 2703 } 2704 2705 struct btrfs_backref_node *btrfs_backref_alloc_node( 2706 struct btrfs_backref_cache *cache, u64 bytenr, int level) 2707 { 2708 struct btrfs_backref_node *node; 2709 2710 ASSERT(level >= 0 && level < BTRFS_MAX_LEVEL); 2711 node = kzalloc(sizeof(*node), GFP_NOFS); 2712 if (!node) 2713 return node; 2714 2715 INIT_LIST_HEAD(&node->list); 2716 INIT_LIST_HEAD(&node->upper); 2717 INIT_LIST_HEAD(&node->lower); 2718 RB_CLEAR_NODE(&node->rb_node); 2719 cache->nr_nodes++; 2720 node->level = level; 2721 node->bytenr = bytenr; 2722 2723 return node; 2724 } 2725 2726 struct btrfs_backref_edge *btrfs_backref_alloc_edge( 2727 struct btrfs_backref_cache *cache) 2728 { 2729 struct btrfs_backref_edge *edge; 2730 2731 edge = kzalloc(sizeof(*edge), GFP_NOFS); 2732 if (edge) 2733 cache->nr_edges++; 2734 return edge; 2735 } 2736 2737 /* 2738 * Drop the backref node from cache, also cleaning up all its 2739 * upper edges and any uncached nodes in the path. 2740 * 2741 * This cleanup happens bottom up, thus the node should either 2742 * be the lowest node in the cache or a detached node. 2743 */ 2744 void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache, 2745 struct btrfs_backref_node *node) 2746 { 2747 struct btrfs_backref_node *upper; 2748 struct btrfs_backref_edge *edge; 2749 2750 if (!node) 2751 return; 2752 2753 BUG_ON(!node->lowest && !node->detached); 2754 while (!list_empty(&node->upper)) { 2755 edge = list_entry(node->upper.next, struct btrfs_backref_edge, 2756 list[LOWER]); 2757 upper = edge->node[UPPER]; 2758 list_del(&edge->list[LOWER]); 2759 list_del(&edge->list[UPPER]); 2760 btrfs_backref_free_edge(cache, edge); 2761 2762 /* 2763 * Add the node to leaf node list if no other child block 2764 * cached. 2765 */ 2766 if (list_empty(&upper->lower)) { 2767 list_add_tail(&upper->lower, &cache->leaves); 2768 upper->lowest = 1; 2769 } 2770 } 2771 2772 btrfs_backref_drop_node(cache, node); 2773 } 2774 2775 /* 2776 * Release all nodes/edges from current cache 2777 */ 2778 void btrfs_backref_release_cache(struct btrfs_backref_cache *cache) 2779 { 2780 struct btrfs_backref_node *node; 2781 int i; 2782 2783 while (!list_empty(&cache->detached)) { 2784 node = list_entry(cache->detached.next, 2785 struct btrfs_backref_node, list); 2786 btrfs_backref_cleanup_node(cache, node); 2787 } 2788 2789 while (!list_empty(&cache->leaves)) { 2790 node = list_entry(cache->leaves.next, 2791 struct btrfs_backref_node, lower); 2792 btrfs_backref_cleanup_node(cache, node); 2793 } 2794 2795 cache->last_trans = 0; 2796 2797 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 2798 ASSERT(list_empty(&cache->pending[i])); 2799 ASSERT(list_empty(&cache->pending_edge)); 2800 ASSERT(list_empty(&cache->useless_node)); 2801 ASSERT(list_empty(&cache->changed)); 2802 ASSERT(list_empty(&cache->detached)); 2803 ASSERT(RB_EMPTY_ROOT(&cache->rb_root)); 2804 ASSERT(!cache->nr_nodes); 2805 ASSERT(!cache->nr_edges); 2806 } 2807 2808 /* 2809 * Handle direct tree backref 2810 * 2811 * Direct tree backref means, the backref item shows its parent bytenr 2812 * directly. This is for SHARED_BLOCK_REF backref (keyed or inlined). 2813 * 2814 * @ref_key: The converted backref key. 2815 * For keyed backref, it's the item key. 2816 * For inlined backref, objectid is the bytenr, 2817 * type is btrfs_inline_ref_type, offset is 2818 * btrfs_inline_ref_offset. 2819 */ 2820 static int handle_direct_tree_backref(struct btrfs_backref_cache *cache, 2821 struct btrfs_key *ref_key, 2822 struct btrfs_backref_node *cur) 2823 { 2824 struct btrfs_backref_edge *edge; 2825 struct btrfs_backref_node *upper; 2826 struct rb_node *rb_node; 2827 2828 ASSERT(ref_key->type == BTRFS_SHARED_BLOCK_REF_KEY); 2829 2830 /* Only reloc root uses backref pointing to itself */ 2831 if (ref_key->objectid == ref_key->offset) { 2832 struct btrfs_root *root; 2833 2834 cur->is_reloc_root = 1; 2835 /* Only reloc backref cache cares about a specific root */ 2836 if (cache->is_reloc) { 2837 root = find_reloc_root(cache->fs_info, cur->bytenr); 2838 if (!root) 2839 return -ENOENT; 2840 cur->root = root; 2841 } else { 2842 /* 2843 * For generic purpose backref cache, reloc root node 2844 * is useless. 2845 */ 2846 list_add(&cur->list, &cache->useless_node); 2847 } 2848 return 0; 2849 } 2850 2851 edge = btrfs_backref_alloc_edge(cache); 2852 if (!edge) 2853 return -ENOMEM; 2854 2855 rb_node = rb_simple_search(&cache->rb_root, ref_key->offset); 2856 if (!rb_node) { 2857 /* Parent node not yet cached */ 2858 upper = btrfs_backref_alloc_node(cache, ref_key->offset, 2859 cur->level + 1); 2860 if (!upper) { 2861 btrfs_backref_free_edge(cache, edge); 2862 return -ENOMEM; 2863 } 2864 2865 /* 2866 * Backrefs for the upper level block isn't cached, add the 2867 * block to pending list 2868 */ 2869 list_add_tail(&edge->list[UPPER], &cache->pending_edge); 2870 } else { 2871 /* Parent node already cached */ 2872 upper = rb_entry(rb_node, struct btrfs_backref_node, rb_node); 2873 ASSERT(upper->checked); 2874 INIT_LIST_HEAD(&edge->list[UPPER]); 2875 } 2876 btrfs_backref_link_edge(edge, cur, upper, LINK_LOWER); 2877 return 0; 2878 } 2879 2880 /* 2881 * Handle indirect tree backref 2882 * 2883 * Indirect tree backref means, we only know which tree the node belongs to. 2884 * We still need to do a tree search to find out the parents. This is for 2885 * TREE_BLOCK_REF backref (keyed or inlined). 2886 * 2887 * @ref_key: The same as @ref_key in handle_direct_tree_backref() 2888 * @tree_key: The first key of this tree block. 2889 * @path: A clean (released) path, to avoid allocating path every time 2890 * the function get called. 2891 */ 2892 static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache, 2893 struct btrfs_path *path, 2894 struct btrfs_key *ref_key, 2895 struct btrfs_key *tree_key, 2896 struct btrfs_backref_node *cur) 2897 { 2898 struct btrfs_fs_info *fs_info = cache->fs_info; 2899 struct btrfs_backref_node *upper; 2900 struct btrfs_backref_node *lower; 2901 struct btrfs_backref_edge *edge; 2902 struct extent_buffer *eb; 2903 struct btrfs_root *root; 2904 struct rb_node *rb_node; 2905 int level; 2906 bool need_check = true; 2907 int ret; 2908 2909 root = btrfs_get_fs_root(fs_info, ref_key->offset, false); 2910 if (IS_ERR(root)) 2911 return PTR_ERR(root); 2912 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) 2913 cur->cowonly = 1; 2914 2915 if (btrfs_root_level(&root->root_item) == cur->level) { 2916 /* Tree root */ 2917 ASSERT(btrfs_root_bytenr(&root->root_item) == cur->bytenr); 2918 /* 2919 * For reloc backref cache, we may ignore reloc root. But for 2920 * general purpose backref cache, we can't rely on 2921 * btrfs_should_ignore_reloc_root() as it may conflict with 2922 * current running relocation and lead to missing root. 2923 * 2924 * For general purpose backref cache, reloc root detection is 2925 * completely relying on direct backref (key->offset is parent 2926 * bytenr), thus only do such check for reloc cache. 2927 */ 2928 if (btrfs_should_ignore_reloc_root(root) && cache->is_reloc) { 2929 btrfs_put_root(root); 2930 list_add(&cur->list, &cache->useless_node); 2931 } else { 2932 cur->root = root; 2933 } 2934 return 0; 2935 } 2936 2937 level = cur->level + 1; 2938 2939 /* Search the tree to find parent blocks referring to the block */ 2940 path->search_commit_root = 1; 2941 path->skip_locking = 1; 2942 path->lowest_level = level; 2943 ret = btrfs_search_slot(NULL, root, tree_key, path, 0, 0); 2944 path->lowest_level = 0; 2945 if (ret < 0) { 2946 btrfs_put_root(root); 2947 return ret; 2948 } 2949 if (ret > 0 && path->slots[level] > 0) 2950 path->slots[level]--; 2951 2952 eb = path->nodes[level]; 2953 if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) { 2954 btrfs_err(fs_info, 2955 "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)", 2956 cur->bytenr, level - 1, root->root_key.objectid, 2957 tree_key->objectid, tree_key->type, tree_key->offset); 2958 btrfs_put_root(root); 2959 ret = -ENOENT; 2960 goto out; 2961 } 2962 lower = cur; 2963 2964 /* Add all nodes and edges in the path */ 2965 for (; level < BTRFS_MAX_LEVEL; level++) { 2966 if (!path->nodes[level]) { 2967 ASSERT(btrfs_root_bytenr(&root->root_item) == 2968 lower->bytenr); 2969 /* Same as previous should_ignore_reloc_root() call */ 2970 if (btrfs_should_ignore_reloc_root(root) && 2971 cache->is_reloc) { 2972 btrfs_put_root(root); 2973 list_add(&lower->list, &cache->useless_node); 2974 } else { 2975 lower->root = root; 2976 } 2977 break; 2978 } 2979 2980 edge = btrfs_backref_alloc_edge(cache); 2981 if (!edge) { 2982 btrfs_put_root(root); 2983 ret = -ENOMEM; 2984 goto out; 2985 } 2986 2987 eb = path->nodes[level]; 2988 rb_node = rb_simple_search(&cache->rb_root, eb->start); 2989 if (!rb_node) { 2990 upper = btrfs_backref_alloc_node(cache, eb->start, 2991 lower->level + 1); 2992 if (!upper) { 2993 btrfs_put_root(root); 2994 btrfs_backref_free_edge(cache, edge); 2995 ret = -ENOMEM; 2996 goto out; 2997 } 2998 upper->owner = btrfs_header_owner(eb); 2999 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) 3000 upper->cowonly = 1; 3001 3002 /* 3003 * If we know the block isn't shared we can avoid 3004 * checking its backrefs. 3005 */ 3006 if (btrfs_block_can_be_shared(root, eb)) 3007 upper->checked = 0; 3008 else 3009 upper->checked = 1; 3010 3011 /* 3012 * Add the block to pending list if we need to check its 3013 * backrefs, we only do this once while walking up a 3014 * tree as we will catch anything else later on. 3015 */ 3016 if (!upper->checked && need_check) { 3017 need_check = false; 3018 list_add_tail(&edge->list[UPPER], 3019 &cache->pending_edge); 3020 } else { 3021 if (upper->checked) 3022 need_check = true; 3023 INIT_LIST_HEAD(&edge->list[UPPER]); 3024 } 3025 } else { 3026 upper = rb_entry(rb_node, struct btrfs_backref_node, 3027 rb_node); 3028 ASSERT(upper->checked); 3029 INIT_LIST_HEAD(&edge->list[UPPER]); 3030 if (!upper->owner) 3031 upper->owner = btrfs_header_owner(eb); 3032 } 3033 btrfs_backref_link_edge(edge, lower, upper, LINK_LOWER); 3034 3035 if (rb_node) { 3036 btrfs_put_root(root); 3037 break; 3038 } 3039 lower = upper; 3040 upper = NULL; 3041 } 3042 out: 3043 btrfs_release_path(path); 3044 return ret; 3045 } 3046 3047 /* 3048 * Add backref node @cur into @cache. 3049 * 3050 * NOTE: Even if the function returned 0, @cur is not yet cached as its upper 3051 * links aren't yet bi-directional. Needs to finish such links. 3052 * Use btrfs_backref_finish_upper_links() to finish such linkage. 3053 * 3054 * @path: Released path for indirect tree backref lookup 3055 * @iter: Released backref iter for extent tree search 3056 * @node_key: The first key of the tree block 3057 */ 3058 int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, 3059 struct btrfs_path *path, 3060 struct btrfs_backref_iter *iter, 3061 struct btrfs_key *node_key, 3062 struct btrfs_backref_node *cur) 3063 { 3064 struct btrfs_fs_info *fs_info = cache->fs_info; 3065 struct btrfs_backref_edge *edge; 3066 struct btrfs_backref_node *exist; 3067 int ret; 3068 3069 ret = btrfs_backref_iter_start(iter, cur->bytenr); 3070 if (ret < 0) 3071 return ret; 3072 /* 3073 * We skip the first btrfs_tree_block_info, as we don't use the key 3074 * stored in it, but fetch it from the tree block 3075 */ 3076 if (btrfs_backref_has_tree_block_info(iter)) { 3077 ret = btrfs_backref_iter_next(iter); 3078 if (ret < 0) 3079 goto out; 3080 /* No extra backref? This means the tree block is corrupted */ 3081 if (ret > 0) { 3082 ret = -EUCLEAN; 3083 goto out; 3084 } 3085 } 3086 WARN_ON(cur->checked); 3087 if (!list_empty(&cur->upper)) { 3088 /* 3089 * The backref was added previously when processing backref of 3090 * type BTRFS_TREE_BLOCK_REF_KEY 3091 */ 3092 ASSERT(list_is_singular(&cur->upper)); 3093 edge = list_entry(cur->upper.next, struct btrfs_backref_edge, 3094 list[LOWER]); 3095 ASSERT(list_empty(&edge->list[UPPER])); 3096 exist = edge->node[UPPER]; 3097 /* 3098 * Add the upper level block to pending list if we need check 3099 * its backrefs 3100 */ 3101 if (!exist->checked) 3102 list_add_tail(&edge->list[UPPER], &cache->pending_edge); 3103 } else { 3104 exist = NULL; 3105 } 3106 3107 for (; ret == 0; ret = btrfs_backref_iter_next(iter)) { 3108 struct extent_buffer *eb; 3109 struct btrfs_key key; 3110 int type; 3111 3112 cond_resched(); 3113 eb = btrfs_backref_get_eb(iter); 3114 3115 key.objectid = iter->bytenr; 3116 if (btrfs_backref_iter_is_inline_ref(iter)) { 3117 struct btrfs_extent_inline_ref *iref; 3118 3119 /* Update key for inline backref */ 3120 iref = (struct btrfs_extent_inline_ref *) 3121 ((unsigned long)iter->cur_ptr); 3122 type = btrfs_get_extent_inline_ref_type(eb, iref, 3123 BTRFS_REF_TYPE_BLOCK); 3124 if (type == BTRFS_REF_TYPE_INVALID) { 3125 ret = -EUCLEAN; 3126 goto out; 3127 } 3128 key.type = type; 3129 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3130 } else { 3131 key.type = iter->cur_key.type; 3132 key.offset = iter->cur_key.offset; 3133 } 3134 3135 /* 3136 * Parent node found and matches current inline ref, no need to 3137 * rebuild this node for this inline ref 3138 */ 3139 if (exist && 3140 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 3141 exist->owner == key.offset) || 3142 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 3143 exist->bytenr == key.offset))) { 3144 exist = NULL; 3145 continue; 3146 } 3147 3148 /* SHARED_BLOCK_REF means key.offset is the parent bytenr */ 3149 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 3150 ret = handle_direct_tree_backref(cache, &key, cur); 3151 if (ret < 0) 3152 goto out; 3153 continue; 3154 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { 3155 ret = -EINVAL; 3156 btrfs_print_v0_err(fs_info); 3157 btrfs_handle_fs_error(fs_info, ret, NULL); 3158 goto out; 3159 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 3160 continue; 3161 } 3162 3163 /* 3164 * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset 3165 * means the root objectid. We need to search the tree to get 3166 * its parent bytenr. 3167 */ 3168 ret = handle_indirect_tree_backref(cache, path, &key, node_key, 3169 cur); 3170 if (ret < 0) 3171 goto out; 3172 } 3173 ret = 0; 3174 cur->checked = 1; 3175 WARN_ON(exist); 3176 out: 3177 btrfs_backref_iter_release(iter); 3178 return ret; 3179 } 3180 3181 /* 3182 * Finish the upwards linkage created by btrfs_backref_add_tree_node() 3183 */ 3184 int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache, 3185 struct btrfs_backref_node *start) 3186 { 3187 struct list_head *useless_node = &cache->useless_node; 3188 struct btrfs_backref_edge *edge; 3189 struct rb_node *rb_node; 3190 LIST_HEAD(pending_edge); 3191 3192 ASSERT(start->checked); 3193 3194 /* Insert this node to cache if it's not COW-only */ 3195 if (!start->cowonly) { 3196 rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, 3197 &start->rb_node); 3198 if (rb_node) 3199 btrfs_backref_panic(cache->fs_info, start->bytenr, 3200 -EEXIST); 3201 list_add_tail(&start->lower, &cache->leaves); 3202 } 3203 3204 /* 3205 * Use breadth first search to iterate all related edges. 3206 * 3207 * The starting points are all the edges of this node 3208 */ 3209 list_for_each_entry(edge, &start->upper, list[LOWER]) 3210 list_add_tail(&edge->list[UPPER], &pending_edge); 3211 3212 while (!list_empty(&pending_edge)) { 3213 struct btrfs_backref_node *upper; 3214 struct btrfs_backref_node *lower; 3215 3216 edge = list_first_entry(&pending_edge, 3217 struct btrfs_backref_edge, list[UPPER]); 3218 list_del_init(&edge->list[UPPER]); 3219 upper = edge->node[UPPER]; 3220 lower = edge->node[LOWER]; 3221 3222 /* Parent is detached, no need to keep any edges */ 3223 if (upper->detached) { 3224 list_del(&edge->list[LOWER]); 3225 btrfs_backref_free_edge(cache, edge); 3226 3227 /* Lower node is orphan, queue for cleanup */ 3228 if (list_empty(&lower->upper)) 3229 list_add(&lower->list, useless_node); 3230 continue; 3231 } 3232 3233 /* 3234 * All new nodes added in current build_backref_tree() haven't 3235 * been linked to the cache rb tree. 3236 * So if we have upper->rb_node populated, this means a cache 3237 * hit. We only need to link the edge, as @upper and all its 3238 * parents have already been linked. 3239 */ 3240 if (!RB_EMPTY_NODE(&upper->rb_node)) { 3241 if (upper->lowest) { 3242 list_del_init(&upper->lower); 3243 upper->lowest = 0; 3244 } 3245 3246 list_add_tail(&edge->list[UPPER], &upper->lower); 3247 continue; 3248 } 3249 3250 /* Sanity check, we shouldn't have any unchecked nodes */ 3251 if (!upper->checked) { 3252 ASSERT(0); 3253 return -EUCLEAN; 3254 } 3255 3256 /* Sanity check, COW-only node has non-COW-only parent */ 3257 if (start->cowonly != upper->cowonly) { 3258 ASSERT(0); 3259 return -EUCLEAN; 3260 } 3261 3262 /* Only cache non-COW-only (subvolume trees) tree blocks */ 3263 if (!upper->cowonly) { 3264 rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr, 3265 &upper->rb_node); 3266 if (rb_node) { 3267 btrfs_backref_panic(cache->fs_info, 3268 upper->bytenr, -EEXIST); 3269 return -EUCLEAN; 3270 } 3271 } 3272 3273 list_add_tail(&edge->list[UPPER], &upper->lower); 3274 3275 /* 3276 * Also queue all the parent edges of this uncached node 3277 * to finish the upper linkage 3278 */ 3279 list_for_each_entry(edge, &upper->upper, list[LOWER]) 3280 list_add_tail(&edge->list[UPPER], &pending_edge); 3281 } 3282 return 0; 3283 } 3284 3285 void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache, 3286 struct btrfs_backref_node *node) 3287 { 3288 struct btrfs_backref_node *lower; 3289 struct btrfs_backref_node *upper; 3290 struct btrfs_backref_edge *edge; 3291 3292 while (!list_empty(&cache->useless_node)) { 3293 lower = list_first_entry(&cache->useless_node, 3294 struct btrfs_backref_node, list); 3295 list_del_init(&lower->list); 3296 } 3297 while (!list_empty(&cache->pending_edge)) { 3298 edge = list_first_entry(&cache->pending_edge, 3299 struct btrfs_backref_edge, list[UPPER]); 3300 list_del(&edge->list[UPPER]); 3301 list_del(&edge->list[LOWER]); 3302 lower = edge->node[LOWER]; 3303 upper = edge->node[UPPER]; 3304 btrfs_backref_free_edge(cache, edge); 3305 3306 /* 3307 * Lower is no longer linked to any upper backref nodes and 3308 * isn't in the cache, we can free it ourselves. 3309 */ 3310 if (list_empty(&lower->upper) && 3311 RB_EMPTY_NODE(&lower->rb_node)) 3312 list_add(&lower->list, &cache->useless_node); 3313 3314 if (!RB_EMPTY_NODE(&upper->rb_node)) 3315 continue; 3316 3317 /* Add this guy's upper edges to the list to process */ 3318 list_for_each_entry(edge, &upper->upper, list[LOWER]) 3319 list_add_tail(&edge->list[UPPER], 3320 &cache->pending_edge); 3321 if (list_empty(&upper->upper)) 3322 list_add(&upper->list, &cache->useless_node); 3323 } 3324 3325 while (!list_empty(&cache->useless_node)) { 3326 lower = list_first_entry(&cache->useless_node, 3327 struct btrfs_backref_node, list); 3328 list_del_init(&lower->list); 3329 if (lower == node) 3330 node = NULL; 3331 btrfs_backref_drop_node(cache, lower); 3332 } 3333 3334 btrfs_backref_cleanup_node(cache, node); 3335 ASSERT(list_empty(&cache->useless_node) && 3336 list_empty(&cache->pending_edge)); 3337 } 3338