1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/vmalloc.h> 20 #include "ctree.h" 21 #include "disk-io.h" 22 #include "backref.h" 23 #include "ulist.h" 24 #include "transaction.h" 25 #include "delayed-ref.h" 26 #include "locking.h" 27 28 struct extent_inode_elem { 29 u64 inum; 30 u64 offset; 31 struct extent_inode_elem *next; 32 }; 33 34 static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, 35 struct btrfs_file_extent_item *fi, 36 u64 extent_item_pos, 37 struct extent_inode_elem **eie) 38 { 39 u64 offset = 0; 40 struct extent_inode_elem *e; 41 42 if (!btrfs_file_extent_compression(eb, fi) && 43 !btrfs_file_extent_encryption(eb, fi) && 44 !btrfs_file_extent_other_encoding(eb, fi)) { 45 u64 data_offset; 46 u64 data_len; 47 48 data_offset = btrfs_file_extent_offset(eb, fi); 49 data_len = btrfs_file_extent_num_bytes(eb, fi); 50 51 if (extent_item_pos < data_offset || 52 extent_item_pos >= data_offset + data_len) 53 return 1; 54 offset = extent_item_pos - data_offset; 55 } 56 57 e = kmalloc(sizeof(*e), GFP_NOFS); 58 if (!e) 59 return -ENOMEM; 60 61 e->next = *eie; 62 e->inum = key->objectid; 63 e->offset = key->offset + offset; 64 *eie = e; 65 66 return 0; 67 } 68 69 static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, 70 u64 extent_item_pos, 71 struct extent_inode_elem **eie) 72 { 73 u64 disk_byte; 74 struct btrfs_key key; 75 struct btrfs_file_extent_item *fi; 76 int slot; 77 int nritems; 78 int extent_type; 79 int ret; 80 81 /* 82 * from the shared data ref, we only have the leaf but we need 83 * the key. thus, we must look into all items and see that we 84 * find one (some) with a reference to our extent item. 85 */ 86 nritems = btrfs_header_nritems(eb); 87 for (slot = 0; slot < nritems; ++slot) { 88 btrfs_item_key_to_cpu(eb, &key, slot); 89 if (key.type != BTRFS_EXTENT_DATA_KEY) 90 continue; 91 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 92 extent_type = btrfs_file_extent_type(eb, fi); 93 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 94 continue; 95 /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ 96 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 97 if (disk_byte != wanted_disk_byte) 98 continue; 99 100 ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); 101 if (ret < 0) 102 return ret; 103 } 104 105 return 0; 106 } 107 108 /* 109 * this structure records all encountered refs on the way up to the root 110 */ 111 struct __prelim_ref { 112 struct list_head list; 113 u64 root_id; 114 struct btrfs_key key_for_search; 115 int level; 116 int count; 117 struct extent_inode_elem *inode_list; 118 u64 parent; 119 u64 wanted_disk_byte; 120 }; 121 122 static struct kmem_cache *btrfs_prelim_ref_cache; 123 124 int __init btrfs_prelim_ref_init(void) 125 { 126 btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", 127 sizeof(struct __prelim_ref), 128 0, 129 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, 130 NULL); 131 if (!btrfs_prelim_ref_cache) 132 return -ENOMEM; 133 return 0; 134 } 135 136 void btrfs_prelim_ref_exit(void) 137 { 138 if (btrfs_prelim_ref_cache) 139 kmem_cache_destroy(btrfs_prelim_ref_cache); 140 } 141 142 /* 143 * the rules for all callers of this function are: 144 * - obtaining the parent is the goal 145 * - if you add a key, you must know that it is a correct key 146 * - if you cannot add the parent or a correct key, then we will look into the 147 * block later to set a correct key 148 * 149 * delayed refs 150 * ============ 151 * backref type | shared | indirect | shared | indirect 152 * information | tree | tree | data | data 153 * --------------------+--------+----------+--------+---------- 154 * parent logical | y | - | - | - 155 * key to resolve | - | y | y | y 156 * tree block logical | - | - | - | - 157 * root for resolving | y | y | y | y 158 * 159 * - column 1: we've the parent -> done 160 * - column 2, 3, 4: we use the key to find the parent 161 * 162 * on disk refs (inline or keyed) 163 * ============================== 164 * backref type | shared | indirect | shared | indirect 165 * information | tree | tree | data | data 166 * --------------------+--------+----------+--------+---------- 167 * parent logical | y | - | y | - 168 * key to resolve | - | - | - | y 169 * tree block logical | y | y | y | y 170 * root for resolving | - | y | y | y 171 * 172 * - column 1, 3: we've the parent -> done 173 * - column 2: we take the first key from the block to find the parent 174 * (see __add_missing_keys) 175 * - column 4: we use the key to find the parent 176 * 177 * additional information that's available but not required to find the parent 178 * block might help in merging entries to gain some speed. 179 */ 180 181 static int __add_prelim_ref(struct list_head *head, u64 root_id, 182 struct btrfs_key *key, int level, 183 u64 parent, u64 wanted_disk_byte, int count, 184 gfp_t gfp_mask) 185 { 186 struct __prelim_ref *ref; 187 188 if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) 189 return 0; 190 191 ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); 192 if (!ref) 193 return -ENOMEM; 194 195 ref->root_id = root_id; 196 if (key) 197 ref->key_for_search = *key; 198 else 199 memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); 200 201 ref->inode_list = NULL; 202 ref->level = level; 203 ref->count = count; 204 ref->parent = parent; 205 ref->wanted_disk_byte = wanted_disk_byte; 206 list_add_tail(&ref->list, head); 207 208 return 0; 209 } 210 211 static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 212 struct ulist *parents, struct __prelim_ref *ref, 213 int level, u64 time_seq, const u64 *extent_item_pos) 214 { 215 int ret = 0; 216 int slot; 217 struct extent_buffer *eb; 218 struct btrfs_key key; 219 struct btrfs_key *key_for_search = &ref->key_for_search; 220 struct btrfs_file_extent_item *fi; 221 struct extent_inode_elem *eie = NULL, *old = NULL; 222 u64 disk_byte; 223 u64 wanted_disk_byte = ref->wanted_disk_byte; 224 u64 count = 0; 225 226 if (level != 0) { 227 eb = path->nodes[level]; 228 ret = ulist_add(parents, eb->start, 0, GFP_NOFS); 229 if (ret < 0) 230 return ret; 231 return 0; 232 } 233 234 /* 235 * We normally enter this function with the path already pointing to 236 * the first item to check. But sometimes, we may enter it with 237 * slot==nritems. In that case, go to the next leaf before we continue. 238 */ 239 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) 240 ret = btrfs_next_old_leaf(root, path, time_seq); 241 242 while (!ret && count < ref->count) { 243 eb = path->nodes[0]; 244 slot = path->slots[0]; 245 246 btrfs_item_key_to_cpu(eb, &key, slot); 247 248 if (key.objectid != key_for_search->objectid || 249 key.type != BTRFS_EXTENT_DATA_KEY) 250 break; 251 252 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 253 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 254 255 if (disk_byte == wanted_disk_byte) { 256 eie = NULL; 257 old = NULL; 258 count++; 259 if (extent_item_pos) { 260 ret = check_extent_in_eb(&key, eb, fi, 261 *extent_item_pos, 262 &eie); 263 if (ret < 0) 264 break; 265 } 266 if (ret > 0) 267 goto next; 268 ret = ulist_add_merge(parents, eb->start, 269 (uintptr_t)eie, 270 (u64 *)&old, GFP_NOFS); 271 if (ret < 0) 272 break; 273 if (!ret && extent_item_pos) { 274 while (old->next) 275 old = old->next; 276 old->next = eie; 277 } 278 } 279 next: 280 ret = btrfs_next_old_item(root, path, time_seq); 281 } 282 283 if (ret > 0) 284 ret = 0; 285 return ret; 286 } 287 288 /* 289 * resolve an indirect backref in the form (root_id, key, level) 290 * to a logical address 291 */ 292 static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, 293 struct btrfs_path *path, u64 time_seq, 294 struct __prelim_ref *ref, 295 struct ulist *parents, 296 const u64 *extent_item_pos) 297 { 298 struct btrfs_root *root; 299 struct btrfs_key root_key; 300 struct extent_buffer *eb; 301 int ret = 0; 302 int root_level; 303 int level = ref->level; 304 int index; 305 306 root_key.objectid = ref->root_id; 307 root_key.type = BTRFS_ROOT_ITEM_KEY; 308 root_key.offset = (u64)-1; 309 310 index = srcu_read_lock(&fs_info->subvol_srcu); 311 312 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 313 if (IS_ERR(root)) { 314 srcu_read_unlock(&fs_info->subvol_srcu, index); 315 ret = PTR_ERR(root); 316 goto out; 317 } 318 319 root_level = btrfs_old_root_level(root, time_seq); 320 321 if (root_level + 1 == level) { 322 srcu_read_unlock(&fs_info->subvol_srcu, index); 323 goto out; 324 } 325 326 path->lowest_level = level; 327 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); 328 329 /* root node has been locked, we can release @subvol_srcu safely here */ 330 srcu_read_unlock(&fs_info->subvol_srcu, index); 331 332 pr_debug("search slot in root %llu (level %d, ref count %d) returned " 333 "%d for key (%llu %u %llu)\n", 334 ref->root_id, level, ref->count, ret, 335 ref->key_for_search.objectid, ref->key_for_search.type, 336 ref->key_for_search.offset); 337 if (ret < 0) 338 goto out; 339 340 eb = path->nodes[level]; 341 while (!eb) { 342 if (WARN_ON(!level)) { 343 ret = 1; 344 goto out; 345 } 346 level--; 347 eb = path->nodes[level]; 348 } 349 350 ret = add_all_parents(root, path, parents, ref, level, time_seq, 351 extent_item_pos); 352 out: 353 path->lowest_level = 0; 354 btrfs_release_path(path); 355 return ret; 356 } 357 358 /* 359 * resolve all indirect backrefs from the list 360 */ 361 static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, 362 struct btrfs_path *path, u64 time_seq, 363 struct list_head *head, 364 const u64 *extent_item_pos) 365 { 366 int err; 367 int ret = 0; 368 struct __prelim_ref *ref; 369 struct __prelim_ref *ref_safe; 370 struct __prelim_ref *new_ref; 371 struct ulist *parents; 372 struct ulist_node *node; 373 struct ulist_iterator uiter; 374 375 parents = ulist_alloc(GFP_NOFS); 376 if (!parents) 377 return -ENOMEM; 378 379 /* 380 * _safe allows us to insert directly after the current item without 381 * iterating over the newly inserted items. 382 * we're also allowed to re-assign ref during iteration. 383 */ 384 list_for_each_entry_safe(ref, ref_safe, head, list) { 385 if (ref->parent) /* already direct */ 386 continue; 387 if (ref->count == 0) 388 continue; 389 err = __resolve_indirect_ref(fs_info, path, time_seq, ref, 390 parents, extent_item_pos); 391 if (err == -ENOMEM) 392 goto out; 393 if (err) 394 continue; 395 396 /* we put the first parent into the ref at hand */ 397 ULIST_ITER_INIT(&uiter); 398 node = ulist_next(parents, &uiter); 399 ref->parent = node ? node->val : 0; 400 ref->inode_list = node ? 401 (struct extent_inode_elem *)(uintptr_t)node->aux : NULL; 402 403 /* additional parents require new refs being added here */ 404 while ((node = ulist_next(parents, &uiter))) { 405 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, 406 GFP_NOFS); 407 if (!new_ref) { 408 ret = -ENOMEM; 409 goto out; 410 } 411 memcpy(new_ref, ref, sizeof(*ref)); 412 new_ref->parent = node->val; 413 new_ref->inode_list = (struct extent_inode_elem *) 414 (uintptr_t)node->aux; 415 list_add(&new_ref->list, &ref->list); 416 } 417 ulist_reinit(parents); 418 } 419 out: 420 ulist_free(parents); 421 return ret; 422 } 423 424 static inline int ref_for_same_block(struct __prelim_ref *ref1, 425 struct __prelim_ref *ref2) 426 { 427 if (ref1->level != ref2->level) 428 return 0; 429 if (ref1->root_id != ref2->root_id) 430 return 0; 431 if (ref1->key_for_search.type != ref2->key_for_search.type) 432 return 0; 433 if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) 434 return 0; 435 if (ref1->key_for_search.offset != ref2->key_for_search.offset) 436 return 0; 437 if (ref1->parent != ref2->parent) 438 return 0; 439 440 return 1; 441 } 442 443 /* 444 * read tree blocks and add keys where required. 445 */ 446 static int __add_missing_keys(struct btrfs_fs_info *fs_info, 447 struct list_head *head) 448 { 449 struct list_head *pos; 450 struct extent_buffer *eb; 451 452 list_for_each(pos, head) { 453 struct __prelim_ref *ref; 454 ref = list_entry(pos, struct __prelim_ref, list); 455 456 if (ref->parent) 457 continue; 458 if (ref->key_for_search.type) 459 continue; 460 BUG_ON(!ref->wanted_disk_byte); 461 eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, 462 fs_info->tree_root->leafsize, 0); 463 if (!eb || !extent_buffer_uptodate(eb)) { 464 free_extent_buffer(eb); 465 return -EIO; 466 } 467 btrfs_tree_read_lock(eb); 468 if (btrfs_header_level(eb) == 0) 469 btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); 470 else 471 btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); 472 btrfs_tree_read_unlock(eb); 473 free_extent_buffer(eb); 474 } 475 return 0; 476 } 477 478 /* 479 * merge two lists of backrefs and adjust counts accordingly 480 * 481 * mode = 1: merge identical keys, if key is set 482 * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. 483 * additionally, we could even add a key range for the blocks we 484 * looked into to merge even more (-> replace unresolved refs by those 485 * having a parent). 486 * mode = 2: merge identical parents 487 */ 488 static void __merge_refs(struct list_head *head, int mode) 489 { 490 struct list_head *pos1; 491 492 list_for_each(pos1, head) { 493 struct list_head *n2; 494 struct list_head *pos2; 495 struct __prelim_ref *ref1; 496 497 ref1 = list_entry(pos1, struct __prelim_ref, list); 498 499 for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; 500 pos2 = n2, n2 = pos2->next) { 501 struct __prelim_ref *ref2; 502 struct __prelim_ref *xchg; 503 struct extent_inode_elem *eie; 504 505 ref2 = list_entry(pos2, struct __prelim_ref, list); 506 507 if (mode == 1) { 508 if (!ref_for_same_block(ref1, ref2)) 509 continue; 510 if (!ref1->parent && ref2->parent) { 511 xchg = ref1; 512 ref1 = ref2; 513 ref2 = xchg; 514 } 515 } else { 516 if (ref1->parent != ref2->parent) 517 continue; 518 } 519 520 eie = ref1->inode_list; 521 while (eie && eie->next) 522 eie = eie->next; 523 if (eie) 524 eie->next = ref2->inode_list; 525 else 526 ref1->inode_list = ref2->inode_list; 527 ref1->count += ref2->count; 528 529 list_del(&ref2->list); 530 kmem_cache_free(btrfs_prelim_ref_cache, ref2); 531 } 532 533 } 534 } 535 536 /* 537 * add all currently queued delayed refs from this head whose seq nr is 538 * smaller or equal that seq to the list 539 */ 540 static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, 541 struct list_head *prefs) 542 { 543 struct btrfs_delayed_extent_op *extent_op = head->extent_op; 544 struct rb_node *n = &head->node.rb_node; 545 struct btrfs_key key; 546 struct btrfs_key op_key = {0}; 547 int sgn; 548 int ret = 0; 549 550 if (extent_op && extent_op->update_key) 551 btrfs_disk_key_to_cpu(&op_key, &extent_op->key); 552 553 spin_lock(&head->lock); 554 n = rb_first(&head->ref_root); 555 while (n) { 556 struct btrfs_delayed_ref_node *node; 557 node = rb_entry(n, struct btrfs_delayed_ref_node, 558 rb_node); 559 n = rb_next(n); 560 if (node->seq > seq) 561 continue; 562 563 switch (node->action) { 564 case BTRFS_ADD_DELAYED_EXTENT: 565 case BTRFS_UPDATE_DELAYED_HEAD: 566 WARN_ON(1); 567 continue; 568 case BTRFS_ADD_DELAYED_REF: 569 sgn = 1; 570 break; 571 case BTRFS_DROP_DELAYED_REF: 572 sgn = -1; 573 break; 574 default: 575 BUG_ON(1); 576 } 577 switch (node->type) { 578 case BTRFS_TREE_BLOCK_REF_KEY: { 579 struct btrfs_delayed_tree_ref *ref; 580 581 ref = btrfs_delayed_node_to_tree_ref(node); 582 ret = __add_prelim_ref(prefs, ref->root, &op_key, 583 ref->level + 1, 0, node->bytenr, 584 node->ref_mod * sgn, GFP_ATOMIC); 585 break; 586 } 587 case BTRFS_SHARED_BLOCK_REF_KEY: { 588 struct btrfs_delayed_tree_ref *ref; 589 590 ref = btrfs_delayed_node_to_tree_ref(node); 591 ret = __add_prelim_ref(prefs, ref->root, NULL, 592 ref->level + 1, ref->parent, 593 node->bytenr, 594 node->ref_mod * sgn, GFP_ATOMIC); 595 break; 596 } 597 case BTRFS_EXTENT_DATA_REF_KEY: { 598 struct btrfs_delayed_data_ref *ref; 599 ref = btrfs_delayed_node_to_data_ref(node); 600 601 key.objectid = ref->objectid; 602 key.type = BTRFS_EXTENT_DATA_KEY; 603 key.offset = ref->offset; 604 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, 605 node->bytenr, 606 node->ref_mod * sgn, GFP_ATOMIC); 607 break; 608 } 609 case BTRFS_SHARED_DATA_REF_KEY: { 610 struct btrfs_delayed_data_ref *ref; 611 612 ref = btrfs_delayed_node_to_data_ref(node); 613 614 key.objectid = ref->objectid; 615 key.type = BTRFS_EXTENT_DATA_KEY; 616 key.offset = ref->offset; 617 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 618 ref->parent, node->bytenr, 619 node->ref_mod * sgn, GFP_ATOMIC); 620 break; 621 } 622 default: 623 WARN_ON(1); 624 } 625 if (ret) 626 break; 627 } 628 spin_unlock(&head->lock); 629 return ret; 630 } 631 632 /* 633 * add all inline backrefs for bytenr to the list 634 */ 635 static int __add_inline_refs(struct btrfs_fs_info *fs_info, 636 struct btrfs_path *path, u64 bytenr, 637 int *info_level, struct list_head *prefs) 638 { 639 int ret = 0; 640 int slot; 641 struct extent_buffer *leaf; 642 struct btrfs_key key; 643 struct btrfs_key found_key; 644 unsigned long ptr; 645 unsigned long end; 646 struct btrfs_extent_item *ei; 647 u64 flags; 648 u64 item_size; 649 650 /* 651 * enumerate all inline refs 652 */ 653 leaf = path->nodes[0]; 654 slot = path->slots[0]; 655 656 item_size = btrfs_item_size_nr(leaf, slot); 657 BUG_ON(item_size < sizeof(*ei)); 658 659 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 660 flags = btrfs_extent_flags(leaf, ei); 661 btrfs_item_key_to_cpu(leaf, &found_key, slot); 662 663 ptr = (unsigned long)(ei + 1); 664 end = (unsigned long)ei + item_size; 665 666 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 667 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 668 struct btrfs_tree_block_info *info; 669 670 info = (struct btrfs_tree_block_info *)ptr; 671 *info_level = btrfs_tree_block_level(leaf, info); 672 ptr += sizeof(struct btrfs_tree_block_info); 673 BUG_ON(ptr > end); 674 } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) { 675 *info_level = found_key.offset; 676 } else { 677 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 678 } 679 680 while (ptr < end) { 681 struct btrfs_extent_inline_ref *iref; 682 u64 offset; 683 int type; 684 685 iref = (struct btrfs_extent_inline_ref *)ptr; 686 type = btrfs_extent_inline_ref_type(leaf, iref); 687 offset = btrfs_extent_inline_ref_offset(leaf, iref); 688 689 switch (type) { 690 case BTRFS_SHARED_BLOCK_REF_KEY: 691 ret = __add_prelim_ref(prefs, 0, NULL, 692 *info_level + 1, offset, 693 bytenr, 1, GFP_NOFS); 694 break; 695 case BTRFS_SHARED_DATA_REF_KEY: { 696 struct btrfs_shared_data_ref *sdref; 697 int count; 698 699 sdref = (struct btrfs_shared_data_ref *)(iref + 1); 700 count = btrfs_shared_data_ref_count(leaf, sdref); 701 ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, 702 bytenr, count, GFP_NOFS); 703 break; 704 } 705 case BTRFS_TREE_BLOCK_REF_KEY: 706 ret = __add_prelim_ref(prefs, offset, NULL, 707 *info_level + 1, 0, 708 bytenr, 1, GFP_NOFS); 709 break; 710 case BTRFS_EXTENT_DATA_REF_KEY: { 711 struct btrfs_extent_data_ref *dref; 712 int count; 713 u64 root; 714 715 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 716 count = btrfs_extent_data_ref_count(leaf, dref); 717 key.objectid = btrfs_extent_data_ref_objectid(leaf, 718 dref); 719 key.type = BTRFS_EXTENT_DATA_KEY; 720 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 721 root = btrfs_extent_data_ref_root(leaf, dref); 722 ret = __add_prelim_ref(prefs, root, &key, 0, 0, 723 bytenr, count, GFP_NOFS); 724 break; 725 } 726 default: 727 WARN_ON(1); 728 } 729 if (ret) 730 return ret; 731 ptr += btrfs_extent_inline_ref_size(type); 732 } 733 734 return 0; 735 } 736 737 /* 738 * add all non-inline backrefs for bytenr to the list 739 */ 740 static int __add_keyed_refs(struct btrfs_fs_info *fs_info, 741 struct btrfs_path *path, u64 bytenr, 742 int info_level, struct list_head *prefs) 743 { 744 struct btrfs_root *extent_root = fs_info->extent_root; 745 int ret; 746 int slot; 747 struct extent_buffer *leaf; 748 struct btrfs_key key; 749 750 while (1) { 751 ret = btrfs_next_item(extent_root, path); 752 if (ret < 0) 753 break; 754 if (ret) { 755 ret = 0; 756 break; 757 } 758 759 slot = path->slots[0]; 760 leaf = path->nodes[0]; 761 btrfs_item_key_to_cpu(leaf, &key, slot); 762 763 if (key.objectid != bytenr) 764 break; 765 if (key.type < BTRFS_TREE_BLOCK_REF_KEY) 766 continue; 767 if (key.type > BTRFS_SHARED_DATA_REF_KEY) 768 break; 769 770 switch (key.type) { 771 case BTRFS_SHARED_BLOCK_REF_KEY: 772 ret = __add_prelim_ref(prefs, 0, NULL, 773 info_level + 1, key.offset, 774 bytenr, 1, GFP_NOFS); 775 break; 776 case BTRFS_SHARED_DATA_REF_KEY: { 777 struct btrfs_shared_data_ref *sdref; 778 int count; 779 780 sdref = btrfs_item_ptr(leaf, slot, 781 struct btrfs_shared_data_ref); 782 count = btrfs_shared_data_ref_count(leaf, sdref); 783 ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, 784 bytenr, count, GFP_NOFS); 785 break; 786 } 787 case BTRFS_TREE_BLOCK_REF_KEY: 788 ret = __add_prelim_ref(prefs, key.offset, NULL, 789 info_level + 1, 0, 790 bytenr, 1, GFP_NOFS); 791 break; 792 case BTRFS_EXTENT_DATA_REF_KEY: { 793 struct btrfs_extent_data_ref *dref; 794 int count; 795 u64 root; 796 797 dref = btrfs_item_ptr(leaf, slot, 798 struct btrfs_extent_data_ref); 799 count = btrfs_extent_data_ref_count(leaf, dref); 800 key.objectid = btrfs_extent_data_ref_objectid(leaf, 801 dref); 802 key.type = BTRFS_EXTENT_DATA_KEY; 803 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 804 root = btrfs_extent_data_ref_root(leaf, dref); 805 ret = __add_prelim_ref(prefs, root, &key, 0, 0, 806 bytenr, count, GFP_NOFS); 807 break; 808 } 809 default: 810 WARN_ON(1); 811 } 812 if (ret) 813 return ret; 814 815 } 816 817 return ret; 818 } 819 820 /* 821 * this adds all existing backrefs (inline backrefs, backrefs and delayed 822 * refs) for the given bytenr to the refs list, merges duplicates and resolves 823 * indirect refs to their parent bytenr. 824 * When roots are found, they're added to the roots list 825 * 826 * FIXME some caching might speed things up 827 */ 828 static int find_parent_nodes(struct btrfs_trans_handle *trans, 829 struct btrfs_fs_info *fs_info, u64 bytenr, 830 u64 time_seq, struct ulist *refs, 831 struct ulist *roots, const u64 *extent_item_pos) 832 { 833 struct btrfs_key key; 834 struct btrfs_path *path; 835 struct btrfs_delayed_ref_root *delayed_refs = NULL; 836 struct btrfs_delayed_ref_head *head; 837 int info_level = 0; 838 int ret; 839 struct list_head prefs_delayed; 840 struct list_head prefs; 841 struct __prelim_ref *ref; 842 843 INIT_LIST_HEAD(&prefs); 844 INIT_LIST_HEAD(&prefs_delayed); 845 846 key.objectid = bytenr; 847 key.offset = (u64)-1; 848 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 849 key.type = BTRFS_METADATA_ITEM_KEY; 850 else 851 key.type = BTRFS_EXTENT_ITEM_KEY; 852 853 path = btrfs_alloc_path(); 854 if (!path) 855 return -ENOMEM; 856 if (!trans) 857 path->search_commit_root = 1; 858 859 /* 860 * grab both a lock on the path and a lock on the delayed ref head. 861 * We need both to get a consistent picture of how the refs look 862 * at a specified point in time 863 */ 864 again: 865 head = NULL; 866 867 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); 868 if (ret < 0) 869 goto out; 870 BUG_ON(ret == 0); 871 872 if (trans) { 873 /* 874 * look if there are updates for this ref queued and lock the 875 * head 876 */ 877 delayed_refs = &trans->transaction->delayed_refs; 878 spin_lock(&delayed_refs->lock); 879 head = btrfs_find_delayed_ref_head(trans, bytenr); 880 if (head) { 881 if (!mutex_trylock(&head->mutex)) { 882 atomic_inc(&head->node.refs); 883 spin_unlock(&delayed_refs->lock); 884 885 btrfs_release_path(path); 886 887 /* 888 * Mutex was contended, block until it's 889 * released and try again 890 */ 891 mutex_lock(&head->mutex); 892 mutex_unlock(&head->mutex); 893 btrfs_put_delayed_ref(&head->node); 894 goto again; 895 } 896 spin_unlock(&delayed_refs->lock); 897 ret = __add_delayed_refs(head, time_seq, 898 &prefs_delayed); 899 mutex_unlock(&head->mutex); 900 if (ret) 901 goto out; 902 } else { 903 spin_unlock(&delayed_refs->lock); 904 } 905 } 906 907 if (path->slots[0]) { 908 struct extent_buffer *leaf; 909 int slot; 910 911 path->slots[0]--; 912 leaf = path->nodes[0]; 913 slot = path->slots[0]; 914 btrfs_item_key_to_cpu(leaf, &key, slot); 915 if (key.objectid == bytenr && 916 (key.type == BTRFS_EXTENT_ITEM_KEY || 917 key.type == BTRFS_METADATA_ITEM_KEY)) { 918 ret = __add_inline_refs(fs_info, path, bytenr, 919 &info_level, &prefs); 920 if (ret) 921 goto out; 922 ret = __add_keyed_refs(fs_info, path, bytenr, 923 info_level, &prefs); 924 if (ret) 925 goto out; 926 } 927 } 928 btrfs_release_path(path); 929 930 list_splice_init(&prefs_delayed, &prefs); 931 932 ret = __add_missing_keys(fs_info, &prefs); 933 if (ret) 934 goto out; 935 936 __merge_refs(&prefs, 1); 937 938 ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, 939 extent_item_pos); 940 if (ret) 941 goto out; 942 943 __merge_refs(&prefs, 2); 944 945 while (!list_empty(&prefs)) { 946 ref = list_first_entry(&prefs, struct __prelim_ref, list); 947 WARN_ON(ref->count < 0); 948 if (ref->count && ref->root_id && ref->parent == 0) { 949 /* no parent == root of tree */ 950 ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); 951 if (ret < 0) 952 goto out; 953 } 954 if (ref->count && ref->parent) { 955 struct extent_inode_elem *eie = NULL; 956 if (extent_item_pos && !ref->inode_list) { 957 u32 bsz; 958 struct extent_buffer *eb; 959 bsz = btrfs_level_size(fs_info->extent_root, 960 info_level); 961 eb = read_tree_block(fs_info->extent_root, 962 ref->parent, bsz, 0); 963 if (!eb || !extent_buffer_uptodate(eb)) { 964 free_extent_buffer(eb); 965 ret = -EIO; 966 goto out; 967 } 968 ret = find_extent_in_eb(eb, bytenr, 969 *extent_item_pos, &eie); 970 free_extent_buffer(eb); 971 if (ret < 0) 972 goto out; 973 ref->inode_list = eie; 974 } 975 ret = ulist_add_merge(refs, ref->parent, 976 (uintptr_t)ref->inode_list, 977 (u64 *)&eie, GFP_NOFS); 978 if (ret < 0) 979 goto out; 980 if (!ret && extent_item_pos) { 981 /* 982 * we've recorded that parent, so we must extend 983 * its inode list here 984 */ 985 BUG_ON(!eie); 986 while (eie->next) 987 eie = eie->next; 988 eie->next = ref->inode_list; 989 } 990 } 991 list_del(&ref->list); 992 kmem_cache_free(btrfs_prelim_ref_cache, ref); 993 } 994 995 out: 996 btrfs_free_path(path); 997 while (!list_empty(&prefs)) { 998 ref = list_first_entry(&prefs, struct __prelim_ref, list); 999 list_del(&ref->list); 1000 kmem_cache_free(btrfs_prelim_ref_cache, ref); 1001 } 1002 while (!list_empty(&prefs_delayed)) { 1003 ref = list_first_entry(&prefs_delayed, struct __prelim_ref, 1004 list); 1005 list_del(&ref->list); 1006 kmem_cache_free(btrfs_prelim_ref_cache, ref); 1007 } 1008 1009 return ret; 1010 } 1011 1012 static void free_leaf_list(struct ulist *blocks) 1013 { 1014 struct ulist_node *node = NULL; 1015 struct extent_inode_elem *eie; 1016 struct extent_inode_elem *eie_next; 1017 struct ulist_iterator uiter; 1018 1019 ULIST_ITER_INIT(&uiter); 1020 while ((node = ulist_next(blocks, &uiter))) { 1021 if (!node->aux) 1022 continue; 1023 eie = (struct extent_inode_elem *)(uintptr_t)node->aux; 1024 for (; eie; eie = eie_next) { 1025 eie_next = eie->next; 1026 kfree(eie); 1027 } 1028 node->aux = 0; 1029 } 1030 1031 ulist_free(blocks); 1032 } 1033 1034 /* 1035 * Finds all leafs with a reference to the specified combination of bytenr and 1036 * offset. key_list_head will point to a list of corresponding keys (caller must 1037 * free each list element). The leafs will be stored in the leafs ulist, which 1038 * must be freed with ulist_free. 1039 * 1040 * returns 0 on success, <0 on error 1041 */ 1042 static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, 1043 struct btrfs_fs_info *fs_info, u64 bytenr, 1044 u64 time_seq, struct ulist **leafs, 1045 const u64 *extent_item_pos) 1046 { 1047 struct ulist *tmp; 1048 int ret; 1049 1050 tmp = ulist_alloc(GFP_NOFS); 1051 if (!tmp) 1052 return -ENOMEM; 1053 *leafs = ulist_alloc(GFP_NOFS); 1054 if (!*leafs) { 1055 ulist_free(tmp); 1056 return -ENOMEM; 1057 } 1058 1059 ret = find_parent_nodes(trans, fs_info, bytenr, 1060 time_seq, *leafs, tmp, extent_item_pos); 1061 ulist_free(tmp); 1062 1063 if (ret < 0 && ret != -ENOENT) { 1064 free_leaf_list(*leafs); 1065 return ret; 1066 } 1067 1068 return 0; 1069 } 1070 1071 /* 1072 * walk all backrefs for a given extent to find all roots that reference this 1073 * extent. Walking a backref means finding all extents that reference this 1074 * extent and in turn walk the backrefs of those, too. Naturally this is a 1075 * recursive process, but here it is implemented in an iterative fashion: We 1076 * find all referencing extents for the extent in question and put them on a 1077 * list. In turn, we find all referencing extents for those, further appending 1078 * to the list. The way we iterate the list allows adding more elements after 1079 * the current while iterating. The process stops when we reach the end of the 1080 * list. Found roots are added to the roots list. 1081 * 1082 * returns 0 on success, < 0 on error. 1083 */ 1084 int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1085 struct btrfs_fs_info *fs_info, u64 bytenr, 1086 u64 time_seq, struct ulist **roots) 1087 { 1088 struct ulist *tmp; 1089 struct ulist_node *node = NULL; 1090 struct ulist_iterator uiter; 1091 int ret; 1092 1093 tmp = ulist_alloc(GFP_NOFS); 1094 if (!tmp) 1095 return -ENOMEM; 1096 *roots = ulist_alloc(GFP_NOFS); 1097 if (!*roots) { 1098 ulist_free(tmp); 1099 return -ENOMEM; 1100 } 1101 1102 ULIST_ITER_INIT(&uiter); 1103 while (1) { 1104 ret = find_parent_nodes(trans, fs_info, bytenr, 1105 time_seq, tmp, *roots, NULL); 1106 if (ret < 0 && ret != -ENOENT) { 1107 ulist_free(tmp); 1108 ulist_free(*roots); 1109 return ret; 1110 } 1111 node = ulist_next(tmp, &uiter); 1112 if (!node) 1113 break; 1114 bytenr = node->val; 1115 } 1116 1117 ulist_free(tmp); 1118 return 0; 1119 } 1120 1121 /* 1122 * this makes the path point to (inum INODE_ITEM ioff) 1123 */ 1124 int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, 1125 struct btrfs_path *path) 1126 { 1127 struct btrfs_key key; 1128 return btrfs_find_item(fs_root, path, inum, ioff, 1129 BTRFS_INODE_ITEM_KEY, &key); 1130 } 1131 1132 static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, 1133 struct btrfs_path *path, 1134 struct btrfs_key *found_key) 1135 { 1136 return btrfs_find_item(fs_root, path, inum, ioff, 1137 BTRFS_INODE_REF_KEY, found_key); 1138 } 1139 1140 int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 1141 u64 start_off, struct btrfs_path *path, 1142 struct btrfs_inode_extref **ret_extref, 1143 u64 *found_off) 1144 { 1145 int ret, slot; 1146 struct btrfs_key key; 1147 struct btrfs_key found_key; 1148 struct btrfs_inode_extref *extref; 1149 struct extent_buffer *leaf; 1150 unsigned long ptr; 1151 1152 key.objectid = inode_objectid; 1153 btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); 1154 key.offset = start_off; 1155 1156 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1157 if (ret < 0) 1158 return ret; 1159 1160 while (1) { 1161 leaf = path->nodes[0]; 1162 slot = path->slots[0]; 1163 if (slot >= btrfs_header_nritems(leaf)) { 1164 /* 1165 * If the item at offset is not found, 1166 * btrfs_search_slot will point us to the slot 1167 * where it should be inserted. In our case 1168 * that will be the slot directly before the 1169 * next INODE_REF_KEY_V2 item. In the case 1170 * that we're pointing to the last slot in a 1171 * leaf, we must move one leaf over. 1172 */ 1173 ret = btrfs_next_leaf(root, path); 1174 if (ret) { 1175 if (ret >= 1) 1176 ret = -ENOENT; 1177 break; 1178 } 1179 continue; 1180 } 1181 1182 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1183 1184 /* 1185 * Check that we're still looking at an extended ref key for 1186 * this particular objectid. If we have different 1187 * objectid or type then there are no more to be found 1188 * in the tree and we can exit. 1189 */ 1190 ret = -ENOENT; 1191 if (found_key.objectid != inode_objectid) 1192 break; 1193 if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) 1194 break; 1195 1196 ret = 0; 1197 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1198 extref = (struct btrfs_inode_extref *)ptr; 1199 *ret_extref = extref; 1200 if (found_off) 1201 *found_off = found_key.offset; 1202 break; 1203 } 1204 1205 return ret; 1206 } 1207 1208 /* 1209 * this iterates to turn a name (from iref/extref) into a full filesystem path. 1210 * Elements of the path are separated by '/' and the path is guaranteed to be 1211 * 0-terminated. the path is only given within the current file system. 1212 * Therefore, it never starts with a '/'. the caller is responsible to provide 1213 * "size" bytes in "dest". the dest buffer will be filled backwards. finally, 1214 * the start point of the resulting string is returned. this pointer is within 1215 * dest, normally. 1216 * in case the path buffer would overflow, the pointer is decremented further 1217 * as if output was written to the buffer, though no more output is actually 1218 * generated. that way, the caller can determine how much space would be 1219 * required for the path to fit into the buffer. in that case, the returned 1220 * value will be smaller than dest. callers must check this! 1221 */ 1222 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 1223 u32 name_len, unsigned long name_off, 1224 struct extent_buffer *eb_in, u64 parent, 1225 char *dest, u32 size) 1226 { 1227 int slot; 1228 u64 next_inum; 1229 int ret; 1230 s64 bytes_left = ((s64)size) - 1; 1231 struct extent_buffer *eb = eb_in; 1232 struct btrfs_key found_key; 1233 int leave_spinning = path->leave_spinning; 1234 struct btrfs_inode_ref *iref; 1235 1236 if (bytes_left >= 0) 1237 dest[bytes_left] = '\0'; 1238 1239 path->leave_spinning = 1; 1240 while (1) { 1241 bytes_left -= name_len; 1242 if (bytes_left >= 0) 1243 read_extent_buffer(eb, dest + bytes_left, 1244 name_off, name_len); 1245 if (eb != eb_in) { 1246 btrfs_tree_read_unlock_blocking(eb); 1247 free_extent_buffer(eb); 1248 } 1249 ret = inode_ref_info(parent, 0, fs_root, path, &found_key); 1250 if (ret > 0) 1251 ret = -ENOENT; 1252 if (ret) 1253 break; 1254 1255 next_inum = found_key.offset; 1256 1257 /* regular exit ahead */ 1258 if (parent == next_inum) 1259 break; 1260 1261 slot = path->slots[0]; 1262 eb = path->nodes[0]; 1263 /* make sure we can use eb after releasing the path */ 1264 if (eb != eb_in) { 1265 atomic_inc(&eb->refs); 1266 btrfs_tree_read_lock(eb); 1267 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1268 } 1269 btrfs_release_path(path); 1270 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1271 1272 name_len = btrfs_inode_ref_name_len(eb, iref); 1273 name_off = (unsigned long)(iref + 1); 1274 1275 parent = next_inum; 1276 --bytes_left; 1277 if (bytes_left >= 0) 1278 dest[bytes_left] = '/'; 1279 } 1280 1281 btrfs_release_path(path); 1282 path->leave_spinning = leave_spinning; 1283 1284 if (ret) 1285 return ERR_PTR(ret); 1286 1287 return dest + bytes_left; 1288 } 1289 1290 /* 1291 * this makes the path point to (logical EXTENT_ITEM *) 1292 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for 1293 * tree blocks and <0 on error. 1294 */ 1295 int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, 1296 struct btrfs_path *path, struct btrfs_key *found_key, 1297 u64 *flags_ret) 1298 { 1299 int ret; 1300 u64 flags; 1301 u64 size = 0; 1302 u32 item_size; 1303 struct extent_buffer *eb; 1304 struct btrfs_extent_item *ei; 1305 struct btrfs_key key; 1306 1307 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 1308 key.type = BTRFS_METADATA_ITEM_KEY; 1309 else 1310 key.type = BTRFS_EXTENT_ITEM_KEY; 1311 key.objectid = logical; 1312 key.offset = (u64)-1; 1313 1314 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 1315 if (ret < 0) 1316 return ret; 1317 1318 while (1) { 1319 u32 nritems; 1320 if (path->slots[0] == 0) { 1321 btrfs_set_path_blocking(path); 1322 ret = btrfs_prev_leaf(fs_info->extent_root, path); 1323 if (ret != 0) { 1324 if (ret > 0) { 1325 pr_debug("logical %llu is not within " 1326 "any extent\n", logical); 1327 ret = -ENOENT; 1328 } 1329 return ret; 1330 } 1331 } else { 1332 path->slots[0]--; 1333 } 1334 nritems = btrfs_header_nritems(path->nodes[0]); 1335 if (nritems == 0) { 1336 pr_debug("logical %llu is not within any extent\n", 1337 logical); 1338 return -ENOENT; 1339 } 1340 if (path->slots[0] == nritems) 1341 path->slots[0]--; 1342 1343 btrfs_item_key_to_cpu(path->nodes[0], found_key, 1344 path->slots[0]); 1345 if (found_key->type == BTRFS_EXTENT_ITEM_KEY || 1346 found_key->type == BTRFS_METADATA_ITEM_KEY) 1347 break; 1348 } 1349 1350 if (found_key->type == BTRFS_METADATA_ITEM_KEY) 1351 size = fs_info->extent_root->leafsize; 1352 else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) 1353 size = found_key->offset; 1354 1355 if (found_key->objectid > logical || 1356 found_key->objectid + size <= logical) { 1357 pr_debug("logical %llu is not within any extent\n", logical); 1358 return -ENOENT; 1359 } 1360 1361 eb = path->nodes[0]; 1362 item_size = btrfs_item_size_nr(eb, path->slots[0]); 1363 BUG_ON(item_size < sizeof(*ei)); 1364 1365 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 1366 flags = btrfs_extent_flags(eb, ei); 1367 1368 pr_debug("logical %llu is at position %llu within the extent (%llu " 1369 "EXTENT_ITEM %llu) flags %#llx size %u\n", 1370 logical, logical - found_key->objectid, found_key->objectid, 1371 found_key->offset, flags, item_size); 1372 1373 WARN_ON(!flags_ret); 1374 if (flags_ret) { 1375 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 1376 *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK; 1377 else if (flags & BTRFS_EXTENT_FLAG_DATA) 1378 *flags_ret = BTRFS_EXTENT_FLAG_DATA; 1379 else 1380 BUG_ON(1); 1381 return 0; 1382 } 1383 1384 return -EIO; 1385 } 1386 1387 /* 1388 * helper function to iterate extent inline refs. ptr must point to a 0 value 1389 * for the first call and may be modified. it is used to track state. 1390 * if more refs exist, 0 is returned and the next call to 1391 * __get_extent_inline_ref must pass the modified ptr parameter to get the 1392 * next ref. after the last ref was processed, 1 is returned. 1393 * returns <0 on error 1394 */ 1395 static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, 1396 struct btrfs_extent_item *ei, u32 item_size, 1397 struct btrfs_extent_inline_ref **out_eiref, 1398 int *out_type) 1399 { 1400 unsigned long end; 1401 u64 flags; 1402 struct btrfs_tree_block_info *info; 1403 1404 if (!*ptr) { 1405 /* first call */ 1406 flags = btrfs_extent_flags(eb, ei); 1407 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1408 info = (struct btrfs_tree_block_info *)(ei + 1); 1409 *out_eiref = 1410 (struct btrfs_extent_inline_ref *)(info + 1); 1411 } else { 1412 *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); 1413 } 1414 *ptr = (unsigned long)*out_eiref; 1415 if ((void *)*ptr >= (void *)ei + item_size) 1416 return -ENOENT; 1417 } 1418 1419 end = (unsigned long)ei + item_size; 1420 *out_eiref = (struct btrfs_extent_inline_ref *)*ptr; 1421 *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); 1422 1423 *ptr += btrfs_extent_inline_ref_size(*out_type); 1424 WARN_ON(*ptr > end); 1425 if (*ptr == end) 1426 return 1; /* last */ 1427 1428 return 0; 1429 } 1430 1431 /* 1432 * reads the tree block backref for an extent. tree level and root are returned 1433 * through out_level and out_root. ptr must point to a 0 value for the first 1434 * call and may be modified (see __get_extent_inline_ref comment). 1435 * returns 0 if data was provided, 1 if there was no more data to provide or 1436 * <0 on error. 1437 */ 1438 int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, 1439 struct btrfs_extent_item *ei, u32 item_size, 1440 u64 *out_root, u8 *out_level) 1441 { 1442 int ret; 1443 int type; 1444 struct btrfs_tree_block_info *info; 1445 struct btrfs_extent_inline_ref *eiref; 1446 1447 if (*ptr == (unsigned long)-1) 1448 return 1; 1449 1450 while (1) { 1451 ret = __get_extent_inline_ref(ptr, eb, ei, item_size, 1452 &eiref, &type); 1453 if (ret < 0) 1454 return ret; 1455 1456 if (type == BTRFS_TREE_BLOCK_REF_KEY || 1457 type == BTRFS_SHARED_BLOCK_REF_KEY) 1458 break; 1459 1460 if (ret == 1) 1461 return 1; 1462 } 1463 1464 /* we can treat both ref types equally here */ 1465 info = (struct btrfs_tree_block_info *)(ei + 1); 1466 *out_root = btrfs_extent_inline_ref_offset(eb, eiref); 1467 *out_level = btrfs_tree_block_level(eb, info); 1468 1469 if (ret == 1) 1470 *ptr = (unsigned long)-1; 1471 1472 return 0; 1473 } 1474 1475 static int iterate_leaf_refs(struct extent_inode_elem *inode_list, 1476 u64 root, u64 extent_item_objectid, 1477 iterate_extent_inodes_t *iterate, void *ctx) 1478 { 1479 struct extent_inode_elem *eie; 1480 int ret = 0; 1481 1482 for (eie = inode_list; eie; eie = eie->next) { 1483 pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " 1484 "root %llu\n", extent_item_objectid, 1485 eie->inum, eie->offset, root); 1486 ret = iterate(eie->inum, eie->offset, root, ctx); 1487 if (ret) { 1488 pr_debug("stopping iteration for %llu due to ret=%d\n", 1489 extent_item_objectid, ret); 1490 break; 1491 } 1492 } 1493 1494 return ret; 1495 } 1496 1497 /* 1498 * calls iterate() for every inode that references the extent identified by 1499 * the given parameters. 1500 * when the iterator function returns a non-zero value, iteration stops. 1501 */ 1502 int iterate_extent_inodes(struct btrfs_fs_info *fs_info, 1503 u64 extent_item_objectid, u64 extent_item_pos, 1504 int search_commit_root, 1505 iterate_extent_inodes_t *iterate, void *ctx) 1506 { 1507 int ret; 1508 struct btrfs_trans_handle *trans = NULL; 1509 struct ulist *refs = NULL; 1510 struct ulist *roots = NULL; 1511 struct ulist_node *ref_node = NULL; 1512 struct ulist_node *root_node = NULL; 1513 struct seq_list tree_mod_seq_elem = {}; 1514 struct ulist_iterator ref_uiter; 1515 struct ulist_iterator root_uiter; 1516 1517 pr_debug("resolving all inodes for extent %llu\n", 1518 extent_item_objectid); 1519 1520 if (!search_commit_root) { 1521 trans = btrfs_join_transaction(fs_info->extent_root); 1522 if (IS_ERR(trans)) 1523 return PTR_ERR(trans); 1524 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1525 } 1526 1527 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 1528 tree_mod_seq_elem.seq, &refs, 1529 &extent_item_pos); 1530 if (ret) 1531 goto out; 1532 1533 ULIST_ITER_INIT(&ref_uiter); 1534 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { 1535 ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, 1536 tree_mod_seq_elem.seq, &roots); 1537 if (ret) 1538 break; 1539 ULIST_ITER_INIT(&root_uiter); 1540 while (!ret && (root_node = ulist_next(roots, &root_uiter))) { 1541 pr_debug("root %llu references leaf %llu, data list " 1542 "%#llx\n", root_node->val, ref_node->val, 1543 ref_node->aux); 1544 ret = iterate_leaf_refs((struct extent_inode_elem *) 1545 (uintptr_t)ref_node->aux, 1546 root_node->val, 1547 extent_item_objectid, 1548 iterate, ctx); 1549 } 1550 ulist_free(roots); 1551 } 1552 1553 free_leaf_list(refs); 1554 out: 1555 if (!search_commit_root) { 1556 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1557 btrfs_end_transaction(trans, fs_info->extent_root); 1558 } 1559 1560 return ret; 1561 } 1562 1563 int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, 1564 struct btrfs_path *path, 1565 iterate_extent_inodes_t *iterate, void *ctx) 1566 { 1567 int ret; 1568 u64 extent_item_pos; 1569 u64 flags = 0; 1570 struct btrfs_key found_key; 1571 int search_commit_root = path->search_commit_root; 1572 1573 ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); 1574 btrfs_release_path(path); 1575 if (ret < 0) 1576 return ret; 1577 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 1578 return -EINVAL; 1579 1580 extent_item_pos = logical - found_key.objectid; 1581 ret = iterate_extent_inodes(fs_info, found_key.objectid, 1582 extent_item_pos, search_commit_root, 1583 iterate, ctx); 1584 1585 return ret; 1586 } 1587 1588 typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off, 1589 struct extent_buffer *eb, void *ctx); 1590 1591 static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, 1592 struct btrfs_path *path, 1593 iterate_irefs_t *iterate, void *ctx) 1594 { 1595 int ret = 0; 1596 int slot; 1597 u32 cur; 1598 u32 len; 1599 u32 name_len; 1600 u64 parent = 0; 1601 int found = 0; 1602 struct extent_buffer *eb; 1603 struct btrfs_item *item; 1604 struct btrfs_inode_ref *iref; 1605 struct btrfs_key found_key; 1606 1607 while (!ret) { 1608 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, 1609 &found_key); 1610 if (ret < 0) 1611 break; 1612 if (ret) { 1613 ret = found ? 0 : -ENOENT; 1614 break; 1615 } 1616 ++found; 1617 1618 parent = found_key.offset; 1619 slot = path->slots[0]; 1620 eb = btrfs_clone_extent_buffer(path->nodes[0]); 1621 if (!eb) { 1622 ret = -ENOMEM; 1623 break; 1624 } 1625 extent_buffer_get(eb); 1626 btrfs_tree_read_lock(eb); 1627 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1628 btrfs_release_path(path); 1629 1630 item = btrfs_item_nr(slot); 1631 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1632 1633 for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { 1634 name_len = btrfs_inode_ref_name_len(eb, iref); 1635 /* path must be released before calling iterate()! */ 1636 pr_debug("following ref at offset %u for inode %llu in " 1637 "tree %llu\n", cur, found_key.objectid, 1638 fs_root->objectid); 1639 ret = iterate(parent, name_len, 1640 (unsigned long)(iref + 1), eb, ctx); 1641 if (ret) 1642 break; 1643 len = sizeof(*iref) + name_len; 1644 iref = (struct btrfs_inode_ref *)((char *)iref + len); 1645 } 1646 btrfs_tree_read_unlock_blocking(eb); 1647 free_extent_buffer(eb); 1648 } 1649 1650 btrfs_release_path(path); 1651 1652 return ret; 1653 } 1654 1655 static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, 1656 struct btrfs_path *path, 1657 iterate_irefs_t *iterate, void *ctx) 1658 { 1659 int ret; 1660 int slot; 1661 u64 offset = 0; 1662 u64 parent; 1663 int found = 0; 1664 struct extent_buffer *eb; 1665 struct btrfs_inode_extref *extref; 1666 struct extent_buffer *leaf; 1667 u32 item_size; 1668 u32 cur_offset; 1669 unsigned long ptr; 1670 1671 while (1) { 1672 ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref, 1673 &offset); 1674 if (ret < 0) 1675 break; 1676 if (ret) { 1677 ret = found ? 0 : -ENOENT; 1678 break; 1679 } 1680 ++found; 1681 1682 slot = path->slots[0]; 1683 eb = btrfs_clone_extent_buffer(path->nodes[0]); 1684 if (!eb) { 1685 ret = -ENOMEM; 1686 break; 1687 } 1688 extent_buffer_get(eb); 1689 1690 btrfs_tree_read_lock(eb); 1691 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1692 btrfs_release_path(path); 1693 1694 leaf = path->nodes[0]; 1695 item_size = btrfs_item_size_nr(leaf, slot); 1696 ptr = btrfs_item_ptr_offset(leaf, slot); 1697 cur_offset = 0; 1698 1699 while (cur_offset < item_size) { 1700 u32 name_len; 1701 1702 extref = (struct btrfs_inode_extref *)(ptr + cur_offset); 1703 parent = btrfs_inode_extref_parent(eb, extref); 1704 name_len = btrfs_inode_extref_name_len(eb, extref); 1705 ret = iterate(parent, name_len, 1706 (unsigned long)&extref->name, eb, ctx); 1707 if (ret) 1708 break; 1709 1710 cur_offset += btrfs_inode_extref_name_len(leaf, extref); 1711 cur_offset += sizeof(*extref); 1712 } 1713 btrfs_tree_read_unlock_blocking(eb); 1714 free_extent_buffer(eb); 1715 1716 offset++; 1717 } 1718 1719 btrfs_release_path(path); 1720 1721 return ret; 1722 } 1723 1724 static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, 1725 struct btrfs_path *path, iterate_irefs_t *iterate, 1726 void *ctx) 1727 { 1728 int ret; 1729 int found_refs = 0; 1730 1731 ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx); 1732 if (!ret) 1733 ++found_refs; 1734 else if (ret != -ENOENT) 1735 return ret; 1736 1737 ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx); 1738 if (ret == -ENOENT && found_refs) 1739 return 0; 1740 1741 return ret; 1742 } 1743 1744 /* 1745 * returns 0 if the path could be dumped (probably truncated) 1746 * returns <0 in case of an error 1747 */ 1748 static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, 1749 struct extent_buffer *eb, void *ctx) 1750 { 1751 struct inode_fs_paths *ipath = ctx; 1752 char *fspath; 1753 char *fspath_min; 1754 int i = ipath->fspath->elem_cnt; 1755 const int s_ptr = sizeof(char *); 1756 u32 bytes_left; 1757 1758 bytes_left = ipath->fspath->bytes_left > s_ptr ? 1759 ipath->fspath->bytes_left - s_ptr : 0; 1760 1761 fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; 1762 fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, 1763 name_off, eb, inum, fspath_min, bytes_left); 1764 if (IS_ERR(fspath)) 1765 return PTR_ERR(fspath); 1766 1767 if (fspath > fspath_min) { 1768 ipath->fspath->val[i] = (u64)(unsigned long)fspath; 1769 ++ipath->fspath->elem_cnt; 1770 ipath->fspath->bytes_left = fspath - fspath_min; 1771 } else { 1772 ++ipath->fspath->elem_missed; 1773 ipath->fspath->bytes_missing += fspath_min - fspath; 1774 ipath->fspath->bytes_left = 0; 1775 } 1776 1777 return 0; 1778 } 1779 1780 /* 1781 * this dumps all file system paths to the inode into the ipath struct, provided 1782 * is has been created large enough. each path is zero-terminated and accessed 1783 * from ipath->fspath->val[i]. 1784 * when it returns, there are ipath->fspath->elem_cnt number of paths available 1785 * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the 1786 * number of missed paths in recored in ipath->fspath->elem_missed, otherwise, 1787 * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would 1788 * have been needed to return all paths. 1789 */ 1790 int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) 1791 { 1792 return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, 1793 inode_to_path, ipath); 1794 } 1795 1796 struct btrfs_data_container *init_data_container(u32 total_bytes) 1797 { 1798 struct btrfs_data_container *data; 1799 size_t alloc_bytes; 1800 1801 alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); 1802 data = vmalloc(alloc_bytes); 1803 if (!data) 1804 return ERR_PTR(-ENOMEM); 1805 1806 if (total_bytes >= sizeof(*data)) { 1807 data->bytes_left = total_bytes - sizeof(*data); 1808 data->bytes_missing = 0; 1809 } else { 1810 data->bytes_missing = sizeof(*data) - total_bytes; 1811 data->bytes_left = 0; 1812 } 1813 1814 data->elem_cnt = 0; 1815 data->elem_missed = 0; 1816 1817 return data; 1818 } 1819 1820 /* 1821 * allocates space to return multiple file system paths for an inode. 1822 * total_bytes to allocate are passed, note that space usable for actual path 1823 * information will be total_bytes - sizeof(struct inode_fs_paths). 1824 * the returned pointer must be freed with free_ipath() in the end. 1825 */ 1826 struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 1827 struct btrfs_path *path) 1828 { 1829 struct inode_fs_paths *ifp; 1830 struct btrfs_data_container *fspath; 1831 1832 fspath = init_data_container(total_bytes); 1833 if (IS_ERR(fspath)) 1834 return (void *)fspath; 1835 1836 ifp = kmalloc(sizeof(*ifp), GFP_NOFS); 1837 if (!ifp) { 1838 kfree(fspath); 1839 return ERR_PTR(-ENOMEM); 1840 } 1841 1842 ifp->btrfs_path = path; 1843 ifp->fspath = fspath; 1844 ifp->fs_root = fs_root; 1845 1846 return ifp; 1847 } 1848 1849 void free_ipath(struct inode_fs_paths *ipath) 1850 { 1851 if (!ipath) 1852 return; 1853 vfree(ipath->fspath); 1854 kfree(ipath); 1855 } 1856