1 /* 2 * Copyright (C) 2011 STRATO. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/vmalloc.h> 20 #include "ctree.h" 21 #include "disk-io.h" 22 #include "backref.h" 23 #include "ulist.h" 24 #include "transaction.h" 25 #include "delayed-ref.h" 26 #include "locking.h" 27 28 struct extent_inode_elem { 29 u64 inum; 30 u64 offset; 31 struct extent_inode_elem *next; 32 }; 33 34 static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, 35 struct btrfs_file_extent_item *fi, 36 u64 extent_item_pos, 37 struct extent_inode_elem **eie) 38 { 39 u64 offset = 0; 40 struct extent_inode_elem *e; 41 42 if (!btrfs_file_extent_compression(eb, fi) && 43 !btrfs_file_extent_encryption(eb, fi) && 44 !btrfs_file_extent_other_encoding(eb, fi)) { 45 u64 data_offset; 46 u64 data_len; 47 48 data_offset = btrfs_file_extent_offset(eb, fi); 49 data_len = btrfs_file_extent_num_bytes(eb, fi); 50 51 if (extent_item_pos < data_offset || 52 extent_item_pos >= data_offset + data_len) 53 return 1; 54 offset = extent_item_pos - data_offset; 55 } 56 57 e = kmalloc(sizeof(*e), GFP_NOFS); 58 if (!e) 59 return -ENOMEM; 60 61 e->next = *eie; 62 e->inum = key->objectid; 63 e->offset = key->offset + offset; 64 *eie = e; 65 66 return 0; 67 } 68 69 static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte, 70 u64 extent_item_pos, 71 struct extent_inode_elem **eie) 72 { 73 u64 disk_byte; 74 struct btrfs_key key; 75 struct btrfs_file_extent_item *fi; 76 int slot; 77 int nritems; 78 int extent_type; 79 int ret; 80 81 /* 82 * from the shared data ref, we only have the leaf but we need 83 * the key. thus, we must look into all items and see that we 84 * find one (some) with a reference to our extent item. 85 */ 86 nritems = btrfs_header_nritems(eb); 87 for (slot = 0; slot < nritems; ++slot) { 88 btrfs_item_key_to_cpu(eb, &key, slot); 89 if (key.type != BTRFS_EXTENT_DATA_KEY) 90 continue; 91 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 92 extent_type = btrfs_file_extent_type(eb, fi); 93 if (extent_type == BTRFS_FILE_EXTENT_INLINE) 94 continue; 95 /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ 96 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 97 if (disk_byte != wanted_disk_byte) 98 continue; 99 100 ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); 101 if (ret < 0) 102 return ret; 103 } 104 105 return 0; 106 } 107 108 /* 109 * this structure records all encountered refs on the way up to the root 110 */ 111 struct __prelim_ref { 112 struct list_head list; 113 u64 root_id; 114 struct btrfs_key key_for_search; 115 int level; 116 int count; 117 struct extent_inode_elem *inode_list; 118 u64 parent; 119 u64 wanted_disk_byte; 120 }; 121 122 static struct kmem_cache *btrfs_prelim_ref_cache; 123 124 int __init btrfs_prelim_ref_init(void) 125 { 126 btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref", 127 sizeof(struct __prelim_ref), 128 0, 129 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, 130 NULL); 131 if (!btrfs_prelim_ref_cache) 132 return -ENOMEM; 133 return 0; 134 } 135 136 void btrfs_prelim_ref_exit(void) 137 { 138 if (btrfs_prelim_ref_cache) 139 kmem_cache_destroy(btrfs_prelim_ref_cache); 140 } 141 142 /* 143 * the rules for all callers of this function are: 144 * - obtaining the parent is the goal 145 * - if you add a key, you must know that it is a correct key 146 * - if you cannot add the parent or a correct key, then we will look into the 147 * block later to set a correct key 148 * 149 * delayed refs 150 * ============ 151 * backref type | shared | indirect | shared | indirect 152 * information | tree | tree | data | data 153 * --------------------+--------+----------+--------+---------- 154 * parent logical | y | - | - | - 155 * key to resolve | - | y | y | y 156 * tree block logical | - | - | - | - 157 * root for resolving | y | y | y | y 158 * 159 * - column 1: we've the parent -> done 160 * - column 2, 3, 4: we use the key to find the parent 161 * 162 * on disk refs (inline or keyed) 163 * ============================== 164 * backref type | shared | indirect | shared | indirect 165 * information | tree | tree | data | data 166 * --------------------+--------+----------+--------+---------- 167 * parent logical | y | - | y | - 168 * key to resolve | - | - | - | y 169 * tree block logical | y | y | y | y 170 * root for resolving | - | y | y | y 171 * 172 * - column 1, 3: we've the parent -> done 173 * - column 2: we take the first key from the block to find the parent 174 * (see __add_missing_keys) 175 * - column 4: we use the key to find the parent 176 * 177 * additional information that's available but not required to find the parent 178 * block might help in merging entries to gain some speed. 179 */ 180 181 static int __add_prelim_ref(struct list_head *head, u64 root_id, 182 struct btrfs_key *key, int level, 183 u64 parent, u64 wanted_disk_byte, int count, 184 gfp_t gfp_mask) 185 { 186 struct __prelim_ref *ref; 187 188 if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID) 189 return 0; 190 191 ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); 192 if (!ref) 193 return -ENOMEM; 194 195 ref->root_id = root_id; 196 if (key) 197 ref->key_for_search = *key; 198 else 199 memset(&ref->key_for_search, 0, sizeof(ref->key_for_search)); 200 201 ref->inode_list = NULL; 202 ref->level = level; 203 ref->count = count; 204 ref->parent = parent; 205 ref->wanted_disk_byte = wanted_disk_byte; 206 list_add_tail(&ref->list, head); 207 208 return 0; 209 } 210 211 static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, 212 struct ulist *parents, int level, 213 struct btrfs_key *key_for_search, u64 time_seq, 214 u64 wanted_disk_byte, 215 const u64 *extent_item_pos) 216 { 217 int ret = 0; 218 int slot; 219 struct extent_buffer *eb; 220 struct btrfs_key key; 221 struct btrfs_file_extent_item *fi; 222 struct extent_inode_elem *eie = NULL, *old = NULL; 223 u64 disk_byte; 224 225 if (level != 0) { 226 eb = path->nodes[level]; 227 ret = ulist_add(parents, eb->start, 0, GFP_NOFS); 228 if (ret < 0) 229 return ret; 230 return 0; 231 } 232 233 /* 234 * We normally enter this function with the path already pointing to 235 * the first item to check. But sometimes, we may enter it with 236 * slot==nritems. In that case, go to the next leaf before we continue. 237 */ 238 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) 239 ret = btrfs_next_old_leaf(root, path, time_seq); 240 241 while (!ret) { 242 eb = path->nodes[0]; 243 slot = path->slots[0]; 244 245 btrfs_item_key_to_cpu(eb, &key, slot); 246 247 if (key.objectid != key_for_search->objectid || 248 key.type != BTRFS_EXTENT_DATA_KEY) 249 break; 250 251 fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); 252 disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); 253 254 if (disk_byte == wanted_disk_byte) { 255 eie = NULL; 256 old = NULL; 257 if (extent_item_pos) { 258 ret = check_extent_in_eb(&key, eb, fi, 259 *extent_item_pos, 260 &eie); 261 if (ret < 0) 262 break; 263 } 264 if (ret > 0) 265 goto next; 266 ret = ulist_add_merge(parents, eb->start, 267 (uintptr_t)eie, 268 (u64 *)&old, GFP_NOFS); 269 if (ret < 0) 270 break; 271 if (!ret && extent_item_pos) { 272 while (old->next) 273 old = old->next; 274 old->next = eie; 275 } 276 } 277 next: 278 ret = btrfs_next_old_item(root, path, time_seq); 279 } 280 281 if (ret > 0) 282 ret = 0; 283 return ret; 284 } 285 286 /* 287 * resolve an indirect backref in the form (root_id, key, level) 288 * to a logical address 289 */ 290 static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, 291 struct btrfs_path *path, u64 time_seq, 292 struct __prelim_ref *ref, 293 struct ulist *parents, 294 const u64 *extent_item_pos) 295 { 296 struct btrfs_root *root; 297 struct btrfs_key root_key; 298 struct extent_buffer *eb; 299 int ret = 0; 300 int root_level; 301 int level = ref->level; 302 303 root_key.objectid = ref->root_id; 304 root_key.type = BTRFS_ROOT_ITEM_KEY; 305 root_key.offset = (u64)-1; 306 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 307 if (IS_ERR(root)) { 308 ret = PTR_ERR(root); 309 goto out; 310 } 311 312 root_level = btrfs_old_root_level(root, time_seq); 313 314 if (root_level + 1 == level) 315 goto out; 316 317 path->lowest_level = level; 318 ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq); 319 pr_debug("search slot in root %llu (level %d, ref count %d) returned " 320 "%d for key (%llu %u %llu)\n", 321 ref->root_id, level, ref->count, ret, 322 ref->key_for_search.objectid, ref->key_for_search.type, 323 ref->key_for_search.offset); 324 if (ret < 0) 325 goto out; 326 327 eb = path->nodes[level]; 328 while (!eb) { 329 if (WARN_ON(!level)) { 330 ret = 1; 331 goto out; 332 } 333 level--; 334 eb = path->nodes[level]; 335 } 336 337 ret = add_all_parents(root, path, parents, level, &ref->key_for_search, 338 time_seq, ref->wanted_disk_byte, 339 extent_item_pos); 340 out: 341 path->lowest_level = 0; 342 btrfs_release_path(path); 343 return ret; 344 } 345 346 /* 347 * resolve all indirect backrefs from the list 348 */ 349 static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, 350 struct btrfs_path *path, u64 time_seq, 351 struct list_head *head, 352 const u64 *extent_item_pos) 353 { 354 int err; 355 int ret = 0; 356 struct __prelim_ref *ref; 357 struct __prelim_ref *ref_safe; 358 struct __prelim_ref *new_ref; 359 struct ulist *parents; 360 struct ulist_node *node; 361 struct ulist_iterator uiter; 362 363 parents = ulist_alloc(GFP_NOFS); 364 if (!parents) 365 return -ENOMEM; 366 367 /* 368 * _safe allows us to insert directly after the current item without 369 * iterating over the newly inserted items. 370 * we're also allowed to re-assign ref during iteration. 371 */ 372 list_for_each_entry_safe(ref, ref_safe, head, list) { 373 if (ref->parent) /* already direct */ 374 continue; 375 if (ref->count == 0) 376 continue; 377 err = __resolve_indirect_ref(fs_info, path, time_seq, ref, 378 parents, extent_item_pos); 379 if (err == -ENOMEM) 380 goto out; 381 if (err) 382 continue; 383 384 /* we put the first parent into the ref at hand */ 385 ULIST_ITER_INIT(&uiter); 386 node = ulist_next(parents, &uiter); 387 ref->parent = node ? node->val : 0; 388 ref->inode_list = node ? 389 (struct extent_inode_elem *)(uintptr_t)node->aux : NULL; 390 391 /* additional parents require new refs being added here */ 392 while ((node = ulist_next(parents, &uiter))) { 393 new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, 394 GFP_NOFS); 395 if (!new_ref) { 396 ret = -ENOMEM; 397 goto out; 398 } 399 memcpy(new_ref, ref, sizeof(*ref)); 400 new_ref->parent = node->val; 401 new_ref->inode_list = (struct extent_inode_elem *) 402 (uintptr_t)node->aux; 403 list_add(&new_ref->list, &ref->list); 404 } 405 ulist_reinit(parents); 406 } 407 out: 408 ulist_free(parents); 409 return ret; 410 } 411 412 static inline int ref_for_same_block(struct __prelim_ref *ref1, 413 struct __prelim_ref *ref2) 414 { 415 if (ref1->level != ref2->level) 416 return 0; 417 if (ref1->root_id != ref2->root_id) 418 return 0; 419 if (ref1->key_for_search.type != ref2->key_for_search.type) 420 return 0; 421 if (ref1->key_for_search.objectid != ref2->key_for_search.objectid) 422 return 0; 423 if (ref1->key_for_search.offset != ref2->key_for_search.offset) 424 return 0; 425 if (ref1->parent != ref2->parent) 426 return 0; 427 428 return 1; 429 } 430 431 /* 432 * read tree blocks and add keys where required. 433 */ 434 static int __add_missing_keys(struct btrfs_fs_info *fs_info, 435 struct list_head *head) 436 { 437 struct list_head *pos; 438 struct extent_buffer *eb; 439 440 list_for_each(pos, head) { 441 struct __prelim_ref *ref; 442 ref = list_entry(pos, struct __prelim_ref, list); 443 444 if (ref->parent) 445 continue; 446 if (ref->key_for_search.type) 447 continue; 448 BUG_ON(!ref->wanted_disk_byte); 449 eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte, 450 fs_info->tree_root->leafsize, 0); 451 if (!eb || !extent_buffer_uptodate(eb)) { 452 free_extent_buffer(eb); 453 return -EIO; 454 } 455 btrfs_tree_read_lock(eb); 456 if (btrfs_header_level(eb) == 0) 457 btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0); 458 else 459 btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0); 460 btrfs_tree_read_unlock(eb); 461 free_extent_buffer(eb); 462 } 463 return 0; 464 } 465 466 /* 467 * merge two lists of backrefs and adjust counts accordingly 468 * 469 * mode = 1: merge identical keys, if key is set 470 * FIXME: if we add more keys in __add_prelim_ref, we can merge more here. 471 * additionally, we could even add a key range for the blocks we 472 * looked into to merge even more (-> replace unresolved refs by those 473 * having a parent). 474 * mode = 2: merge identical parents 475 */ 476 static void __merge_refs(struct list_head *head, int mode) 477 { 478 struct list_head *pos1; 479 480 list_for_each(pos1, head) { 481 struct list_head *n2; 482 struct list_head *pos2; 483 struct __prelim_ref *ref1; 484 485 ref1 = list_entry(pos1, struct __prelim_ref, list); 486 487 for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; 488 pos2 = n2, n2 = pos2->next) { 489 struct __prelim_ref *ref2; 490 struct __prelim_ref *xchg; 491 struct extent_inode_elem *eie; 492 493 ref2 = list_entry(pos2, struct __prelim_ref, list); 494 495 if (mode == 1) { 496 if (!ref_for_same_block(ref1, ref2)) 497 continue; 498 if (!ref1->parent && ref2->parent) { 499 xchg = ref1; 500 ref1 = ref2; 501 ref2 = xchg; 502 } 503 } else { 504 if (ref1->parent != ref2->parent) 505 continue; 506 } 507 508 eie = ref1->inode_list; 509 while (eie && eie->next) 510 eie = eie->next; 511 if (eie) 512 eie->next = ref2->inode_list; 513 else 514 ref1->inode_list = ref2->inode_list; 515 ref1->count += ref2->count; 516 517 list_del(&ref2->list); 518 kmem_cache_free(btrfs_prelim_ref_cache, ref2); 519 } 520 521 } 522 } 523 524 /* 525 * add all currently queued delayed refs from this head whose seq nr is 526 * smaller or equal that seq to the list 527 */ 528 static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, 529 struct list_head *prefs) 530 { 531 struct btrfs_delayed_extent_op *extent_op = head->extent_op; 532 struct rb_node *n = &head->node.rb_node; 533 struct btrfs_key key; 534 struct btrfs_key op_key = {0}; 535 int sgn; 536 int ret = 0; 537 538 if (extent_op && extent_op->update_key) 539 btrfs_disk_key_to_cpu(&op_key, &extent_op->key); 540 541 while ((n = rb_prev(n))) { 542 struct btrfs_delayed_ref_node *node; 543 node = rb_entry(n, struct btrfs_delayed_ref_node, 544 rb_node); 545 if (node->bytenr != head->node.bytenr) 546 break; 547 WARN_ON(node->is_head); 548 549 if (node->seq > seq) 550 continue; 551 552 switch (node->action) { 553 case BTRFS_ADD_DELAYED_EXTENT: 554 case BTRFS_UPDATE_DELAYED_HEAD: 555 WARN_ON(1); 556 continue; 557 case BTRFS_ADD_DELAYED_REF: 558 sgn = 1; 559 break; 560 case BTRFS_DROP_DELAYED_REF: 561 sgn = -1; 562 break; 563 default: 564 BUG_ON(1); 565 } 566 switch (node->type) { 567 case BTRFS_TREE_BLOCK_REF_KEY: { 568 struct btrfs_delayed_tree_ref *ref; 569 570 ref = btrfs_delayed_node_to_tree_ref(node); 571 ret = __add_prelim_ref(prefs, ref->root, &op_key, 572 ref->level + 1, 0, node->bytenr, 573 node->ref_mod * sgn, GFP_ATOMIC); 574 break; 575 } 576 case BTRFS_SHARED_BLOCK_REF_KEY: { 577 struct btrfs_delayed_tree_ref *ref; 578 579 ref = btrfs_delayed_node_to_tree_ref(node); 580 ret = __add_prelim_ref(prefs, ref->root, NULL, 581 ref->level + 1, ref->parent, 582 node->bytenr, 583 node->ref_mod * sgn, GFP_ATOMIC); 584 break; 585 } 586 case BTRFS_EXTENT_DATA_REF_KEY: { 587 struct btrfs_delayed_data_ref *ref; 588 ref = btrfs_delayed_node_to_data_ref(node); 589 590 key.objectid = ref->objectid; 591 key.type = BTRFS_EXTENT_DATA_KEY; 592 key.offset = ref->offset; 593 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0, 594 node->bytenr, 595 node->ref_mod * sgn, GFP_ATOMIC); 596 break; 597 } 598 case BTRFS_SHARED_DATA_REF_KEY: { 599 struct btrfs_delayed_data_ref *ref; 600 601 ref = btrfs_delayed_node_to_data_ref(node); 602 603 key.objectid = ref->objectid; 604 key.type = BTRFS_EXTENT_DATA_KEY; 605 key.offset = ref->offset; 606 ret = __add_prelim_ref(prefs, ref->root, &key, 0, 607 ref->parent, node->bytenr, 608 node->ref_mod * sgn, GFP_ATOMIC); 609 break; 610 } 611 default: 612 WARN_ON(1); 613 } 614 if (ret) 615 return ret; 616 } 617 618 return 0; 619 } 620 621 /* 622 * add all inline backrefs for bytenr to the list 623 */ 624 static int __add_inline_refs(struct btrfs_fs_info *fs_info, 625 struct btrfs_path *path, u64 bytenr, 626 int *info_level, struct list_head *prefs) 627 { 628 int ret = 0; 629 int slot; 630 struct extent_buffer *leaf; 631 struct btrfs_key key; 632 struct btrfs_key found_key; 633 unsigned long ptr; 634 unsigned long end; 635 struct btrfs_extent_item *ei; 636 u64 flags; 637 u64 item_size; 638 639 /* 640 * enumerate all inline refs 641 */ 642 leaf = path->nodes[0]; 643 slot = path->slots[0]; 644 645 item_size = btrfs_item_size_nr(leaf, slot); 646 BUG_ON(item_size < sizeof(*ei)); 647 648 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 649 flags = btrfs_extent_flags(leaf, ei); 650 btrfs_item_key_to_cpu(leaf, &found_key, slot); 651 652 ptr = (unsigned long)(ei + 1); 653 end = (unsigned long)ei + item_size; 654 655 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 656 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 657 struct btrfs_tree_block_info *info; 658 659 info = (struct btrfs_tree_block_info *)ptr; 660 *info_level = btrfs_tree_block_level(leaf, info); 661 ptr += sizeof(struct btrfs_tree_block_info); 662 BUG_ON(ptr > end); 663 } else if (found_key.type == BTRFS_METADATA_ITEM_KEY) { 664 *info_level = found_key.offset; 665 } else { 666 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 667 } 668 669 while (ptr < end) { 670 struct btrfs_extent_inline_ref *iref; 671 u64 offset; 672 int type; 673 674 iref = (struct btrfs_extent_inline_ref *)ptr; 675 type = btrfs_extent_inline_ref_type(leaf, iref); 676 offset = btrfs_extent_inline_ref_offset(leaf, iref); 677 678 switch (type) { 679 case BTRFS_SHARED_BLOCK_REF_KEY: 680 ret = __add_prelim_ref(prefs, 0, NULL, 681 *info_level + 1, offset, 682 bytenr, 1, GFP_NOFS); 683 break; 684 case BTRFS_SHARED_DATA_REF_KEY: { 685 struct btrfs_shared_data_ref *sdref; 686 int count; 687 688 sdref = (struct btrfs_shared_data_ref *)(iref + 1); 689 count = btrfs_shared_data_ref_count(leaf, sdref); 690 ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, 691 bytenr, count, GFP_NOFS); 692 break; 693 } 694 case BTRFS_TREE_BLOCK_REF_KEY: 695 ret = __add_prelim_ref(prefs, offset, NULL, 696 *info_level + 1, 0, 697 bytenr, 1, GFP_NOFS); 698 break; 699 case BTRFS_EXTENT_DATA_REF_KEY: { 700 struct btrfs_extent_data_ref *dref; 701 int count; 702 u64 root; 703 704 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 705 count = btrfs_extent_data_ref_count(leaf, dref); 706 key.objectid = btrfs_extent_data_ref_objectid(leaf, 707 dref); 708 key.type = BTRFS_EXTENT_DATA_KEY; 709 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 710 root = btrfs_extent_data_ref_root(leaf, dref); 711 ret = __add_prelim_ref(prefs, root, &key, 0, 0, 712 bytenr, count, GFP_NOFS); 713 break; 714 } 715 default: 716 WARN_ON(1); 717 } 718 if (ret) 719 return ret; 720 ptr += btrfs_extent_inline_ref_size(type); 721 } 722 723 return 0; 724 } 725 726 /* 727 * add all non-inline backrefs for bytenr to the list 728 */ 729 static int __add_keyed_refs(struct btrfs_fs_info *fs_info, 730 struct btrfs_path *path, u64 bytenr, 731 int info_level, struct list_head *prefs) 732 { 733 struct btrfs_root *extent_root = fs_info->extent_root; 734 int ret; 735 int slot; 736 struct extent_buffer *leaf; 737 struct btrfs_key key; 738 739 while (1) { 740 ret = btrfs_next_item(extent_root, path); 741 if (ret < 0) 742 break; 743 if (ret) { 744 ret = 0; 745 break; 746 } 747 748 slot = path->slots[0]; 749 leaf = path->nodes[0]; 750 btrfs_item_key_to_cpu(leaf, &key, slot); 751 752 if (key.objectid != bytenr) 753 break; 754 if (key.type < BTRFS_TREE_BLOCK_REF_KEY) 755 continue; 756 if (key.type > BTRFS_SHARED_DATA_REF_KEY) 757 break; 758 759 switch (key.type) { 760 case BTRFS_SHARED_BLOCK_REF_KEY: 761 ret = __add_prelim_ref(prefs, 0, NULL, 762 info_level + 1, key.offset, 763 bytenr, 1, GFP_NOFS); 764 break; 765 case BTRFS_SHARED_DATA_REF_KEY: { 766 struct btrfs_shared_data_ref *sdref; 767 int count; 768 769 sdref = btrfs_item_ptr(leaf, slot, 770 struct btrfs_shared_data_ref); 771 count = btrfs_shared_data_ref_count(leaf, sdref); 772 ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset, 773 bytenr, count, GFP_NOFS); 774 break; 775 } 776 case BTRFS_TREE_BLOCK_REF_KEY: 777 ret = __add_prelim_ref(prefs, key.offset, NULL, 778 info_level + 1, 0, 779 bytenr, 1, GFP_NOFS); 780 break; 781 case BTRFS_EXTENT_DATA_REF_KEY: { 782 struct btrfs_extent_data_ref *dref; 783 int count; 784 u64 root; 785 786 dref = btrfs_item_ptr(leaf, slot, 787 struct btrfs_extent_data_ref); 788 count = btrfs_extent_data_ref_count(leaf, dref); 789 key.objectid = btrfs_extent_data_ref_objectid(leaf, 790 dref); 791 key.type = BTRFS_EXTENT_DATA_KEY; 792 key.offset = btrfs_extent_data_ref_offset(leaf, dref); 793 root = btrfs_extent_data_ref_root(leaf, dref); 794 ret = __add_prelim_ref(prefs, root, &key, 0, 0, 795 bytenr, count, GFP_NOFS); 796 break; 797 } 798 default: 799 WARN_ON(1); 800 } 801 if (ret) 802 return ret; 803 804 } 805 806 return ret; 807 } 808 809 /* 810 * this adds all existing backrefs (inline backrefs, backrefs and delayed 811 * refs) for the given bytenr to the refs list, merges duplicates and resolves 812 * indirect refs to their parent bytenr. 813 * When roots are found, they're added to the roots list 814 * 815 * FIXME some caching might speed things up 816 */ 817 static int find_parent_nodes(struct btrfs_trans_handle *trans, 818 struct btrfs_fs_info *fs_info, u64 bytenr, 819 u64 time_seq, struct ulist *refs, 820 struct ulist *roots, const u64 *extent_item_pos) 821 { 822 struct btrfs_key key; 823 struct btrfs_path *path; 824 struct btrfs_delayed_ref_root *delayed_refs = NULL; 825 struct btrfs_delayed_ref_head *head; 826 int info_level = 0; 827 int ret; 828 struct list_head prefs_delayed; 829 struct list_head prefs; 830 struct __prelim_ref *ref; 831 832 INIT_LIST_HEAD(&prefs); 833 INIT_LIST_HEAD(&prefs_delayed); 834 835 key.objectid = bytenr; 836 key.offset = (u64)-1; 837 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 838 key.type = BTRFS_METADATA_ITEM_KEY; 839 else 840 key.type = BTRFS_EXTENT_ITEM_KEY; 841 842 path = btrfs_alloc_path(); 843 if (!path) 844 return -ENOMEM; 845 if (!trans) 846 path->search_commit_root = 1; 847 848 /* 849 * grab both a lock on the path and a lock on the delayed ref head. 850 * We need both to get a consistent picture of how the refs look 851 * at a specified point in time 852 */ 853 again: 854 head = NULL; 855 856 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); 857 if (ret < 0) 858 goto out; 859 BUG_ON(ret == 0); 860 861 if (trans) { 862 /* 863 * look if there are updates for this ref queued and lock the 864 * head 865 */ 866 delayed_refs = &trans->transaction->delayed_refs; 867 spin_lock(&delayed_refs->lock); 868 head = btrfs_find_delayed_ref_head(trans, bytenr); 869 if (head) { 870 if (!mutex_trylock(&head->mutex)) { 871 atomic_inc(&head->node.refs); 872 spin_unlock(&delayed_refs->lock); 873 874 btrfs_release_path(path); 875 876 /* 877 * Mutex was contended, block until it's 878 * released and try again 879 */ 880 mutex_lock(&head->mutex); 881 mutex_unlock(&head->mutex); 882 btrfs_put_delayed_ref(&head->node); 883 goto again; 884 } 885 ret = __add_delayed_refs(head, time_seq, 886 &prefs_delayed); 887 mutex_unlock(&head->mutex); 888 if (ret) { 889 spin_unlock(&delayed_refs->lock); 890 goto out; 891 } 892 } 893 spin_unlock(&delayed_refs->lock); 894 } 895 896 if (path->slots[0]) { 897 struct extent_buffer *leaf; 898 int slot; 899 900 path->slots[0]--; 901 leaf = path->nodes[0]; 902 slot = path->slots[0]; 903 btrfs_item_key_to_cpu(leaf, &key, slot); 904 if (key.objectid == bytenr && 905 (key.type == BTRFS_EXTENT_ITEM_KEY || 906 key.type == BTRFS_METADATA_ITEM_KEY)) { 907 ret = __add_inline_refs(fs_info, path, bytenr, 908 &info_level, &prefs); 909 if (ret) 910 goto out; 911 ret = __add_keyed_refs(fs_info, path, bytenr, 912 info_level, &prefs); 913 if (ret) 914 goto out; 915 } 916 } 917 btrfs_release_path(path); 918 919 list_splice_init(&prefs_delayed, &prefs); 920 921 ret = __add_missing_keys(fs_info, &prefs); 922 if (ret) 923 goto out; 924 925 __merge_refs(&prefs, 1); 926 927 ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, 928 extent_item_pos); 929 if (ret) 930 goto out; 931 932 __merge_refs(&prefs, 2); 933 934 while (!list_empty(&prefs)) { 935 ref = list_first_entry(&prefs, struct __prelim_ref, list); 936 WARN_ON(ref->count < 0); 937 if (ref->count && ref->root_id && ref->parent == 0) { 938 /* no parent == root of tree */ 939 ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); 940 if (ret < 0) 941 goto out; 942 } 943 if (ref->count && ref->parent) { 944 struct extent_inode_elem *eie = NULL; 945 if (extent_item_pos && !ref->inode_list) { 946 u32 bsz; 947 struct extent_buffer *eb; 948 bsz = btrfs_level_size(fs_info->extent_root, 949 info_level); 950 eb = read_tree_block(fs_info->extent_root, 951 ref->parent, bsz, 0); 952 if (!eb || !extent_buffer_uptodate(eb)) { 953 free_extent_buffer(eb); 954 ret = -EIO; 955 goto out; 956 } 957 ret = find_extent_in_eb(eb, bytenr, 958 *extent_item_pos, &eie); 959 free_extent_buffer(eb); 960 if (ret < 0) 961 goto out; 962 ref->inode_list = eie; 963 } 964 ret = ulist_add_merge(refs, ref->parent, 965 (uintptr_t)ref->inode_list, 966 (u64 *)&eie, GFP_NOFS); 967 if (ret < 0) 968 goto out; 969 if (!ret && extent_item_pos) { 970 /* 971 * we've recorded that parent, so we must extend 972 * its inode list here 973 */ 974 BUG_ON(!eie); 975 while (eie->next) 976 eie = eie->next; 977 eie->next = ref->inode_list; 978 } 979 } 980 list_del(&ref->list); 981 kmem_cache_free(btrfs_prelim_ref_cache, ref); 982 } 983 984 out: 985 btrfs_free_path(path); 986 while (!list_empty(&prefs)) { 987 ref = list_first_entry(&prefs, struct __prelim_ref, list); 988 list_del(&ref->list); 989 kmem_cache_free(btrfs_prelim_ref_cache, ref); 990 } 991 while (!list_empty(&prefs_delayed)) { 992 ref = list_first_entry(&prefs_delayed, struct __prelim_ref, 993 list); 994 list_del(&ref->list); 995 kmem_cache_free(btrfs_prelim_ref_cache, ref); 996 } 997 998 return ret; 999 } 1000 1001 static void free_leaf_list(struct ulist *blocks) 1002 { 1003 struct ulist_node *node = NULL; 1004 struct extent_inode_elem *eie; 1005 struct extent_inode_elem *eie_next; 1006 struct ulist_iterator uiter; 1007 1008 ULIST_ITER_INIT(&uiter); 1009 while ((node = ulist_next(blocks, &uiter))) { 1010 if (!node->aux) 1011 continue; 1012 eie = (struct extent_inode_elem *)(uintptr_t)node->aux; 1013 for (; eie; eie = eie_next) { 1014 eie_next = eie->next; 1015 kfree(eie); 1016 } 1017 node->aux = 0; 1018 } 1019 1020 ulist_free(blocks); 1021 } 1022 1023 /* 1024 * Finds all leafs with a reference to the specified combination of bytenr and 1025 * offset. key_list_head will point to a list of corresponding keys (caller must 1026 * free each list element). The leafs will be stored in the leafs ulist, which 1027 * must be freed with ulist_free. 1028 * 1029 * returns 0 on success, <0 on error 1030 */ 1031 static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, 1032 struct btrfs_fs_info *fs_info, u64 bytenr, 1033 u64 time_seq, struct ulist **leafs, 1034 const u64 *extent_item_pos) 1035 { 1036 struct ulist *tmp; 1037 int ret; 1038 1039 tmp = ulist_alloc(GFP_NOFS); 1040 if (!tmp) 1041 return -ENOMEM; 1042 *leafs = ulist_alloc(GFP_NOFS); 1043 if (!*leafs) { 1044 ulist_free(tmp); 1045 return -ENOMEM; 1046 } 1047 1048 ret = find_parent_nodes(trans, fs_info, bytenr, 1049 time_seq, *leafs, tmp, extent_item_pos); 1050 ulist_free(tmp); 1051 1052 if (ret < 0 && ret != -ENOENT) { 1053 free_leaf_list(*leafs); 1054 return ret; 1055 } 1056 1057 return 0; 1058 } 1059 1060 /* 1061 * walk all backrefs for a given extent to find all roots that reference this 1062 * extent. Walking a backref means finding all extents that reference this 1063 * extent and in turn walk the backrefs of those, too. Naturally this is a 1064 * recursive process, but here it is implemented in an iterative fashion: We 1065 * find all referencing extents for the extent in question and put them on a 1066 * list. In turn, we find all referencing extents for those, further appending 1067 * to the list. The way we iterate the list allows adding more elements after 1068 * the current while iterating. The process stops when we reach the end of the 1069 * list. Found roots are added to the roots list. 1070 * 1071 * returns 0 on success, < 0 on error. 1072 */ 1073 int btrfs_find_all_roots(struct btrfs_trans_handle *trans, 1074 struct btrfs_fs_info *fs_info, u64 bytenr, 1075 u64 time_seq, struct ulist **roots) 1076 { 1077 struct ulist *tmp; 1078 struct ulist_node *node = NULL; 1079 struct ulist_iterator uiter; 1080 int ret; 1081 1082 tmp = ulist_alloc(GFP_NOFS); 1083 if (!tmp) 1084 return -ENOMEM; 1085 *roots = ulist_alloc(GFP_NOFS); 1086 if (!*roots) { 1087 ulist_free(tmp); 1088 return -ENOMEM; 1089 } 1090 1091 ULIST_ITER_INIT(&uiter); 1092 while (1) { 1093 ret = find_parent_nodes(trans, fs_info, bytenr, 1094 time_seq, tmp, *roots, NULL); 1095 if (ret < 0 && ret != -ENOENT) { 1096 ulist_free(tmp); 1097 ulist_free(*roots); 1098 return ret; 1099 } 1100 node = ulist_next(tmp, &uiter); 1101 if (!node) 1102 break; 1103 bytenr = node->val; 1104 } 1105 1106 ulist_free(tmp); 1107 return 0; 1108 } 1109 1110 1111 static int __inode_info(u64 inum, u64 ioff, u8 key_type, 1112 struct btrfs_root *fs_root, struct btrfs_path *path, 1113 struct btrfs_key *found_key) 1114 { 1115 int ret; 1116 struct btrfs_key key; 1117 struct extent_buffer *eb; 1118 1119 key.type = key_type; 1120 key.objectid = inum; 1121 key.offset = ioff; 1122 1123 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); 1124 if (ret < 0) 1125 return ret; 1126 1127 eb = path->nodes[0]; 1128 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { 1129 ret = btrfs_next_leaf(fs_root, path); 1130 if (ret) 1131 return ret; 1132 eb = path->nodes[0]; 1133 } 1134 1135 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); 1136 if (found_key->type != key.type || found_key->objectid != key.objectid) 1137 return 1; 1138 1139 return 0; 1140 } 1141 1142 /* 1143 * this makes the path point to (inum INODE_ITEM ioff) 1144 */ 1145 int inode_item_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, 1146 struct btrfs_path *path) 1147 { 1148 struct btrfs_key key; 1149 return __inode_info(inum, ioff, BTRFS_INODE_ITEM_KEY, fs_root, path, 1150 &key); 1151 } 1152 1153 static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root, 1154 struct btrfs_path *path, 1155 struct btrfs_key *found_key) 1156 { 1157 return __inode_info(inum, ioff, BTRFS_INODE_REF_KEY, fs_root, path, 1158 found_key); 1159 } 1160 1161 int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, 1162 u64 start_off, struct btrfs_path *path, 1163 struct btrfs_inode_extref **ret_extref, 1164 u64 *found_off) 1165 { 1166 int ret, slot; 1167 struct btrfs_key key; 1168 struct btrfs_key found_key; 1169 struct btrfs_inode_extref *extref; 1170 struct extent_buffer *leaf; 1171 unsigned long ptr; 1172 1173 key.objectid = inode_objectid; 1174 btrfs_set_key_type(&key, BTRFS_INODE_EXTREF_KEY); 1175 key.offset = start_off; 1176 1177 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1178 if (ret < 0) 1179 return ret; 1180 1181 while (1) { 1182 leaf = path->nodes[0]; 1183 slot = path->slots[0]; 1184 if (slot >= btrfs_header_nritems(leaf)) { 1185 /* 1186 * If the item at offset is not found, 1187 * btrfs_search_slot will point us to the slot 1188 * where it should be inserted. In our case 1189 * that will be the slot directly before the 1190 * next INODE_REF_KEY_V2 item. In the case 1191 * that we're pointing to the last slot in a 1192 * leaf, we must move one leaf over. 1193 */ 1194 ret = btrfs_next_leaf(root, path); 1195 if (ret) { 1196 if (ret >= 1) 1197 ret = -ENOENT; 1198 break; 1199 } 1200 continue; 1201 } 1202 1203 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1204 1205 /* 1206 * Check that we're still looking at an extended ref key for 1207 * this particular objectid. If we have different 1208 * objectid or type then there are no more to be found 1209 * in the tree and we can exit. 1210 */ 1211 ret = -ENOENT; 1212 if (found_key.objectid != inode_objectid) 1213 break; 1214 if (btrfs_key_type(&found_key) != BTRFS_INODE_EXTREF_KEY) 1215 break; 1216 1217 ret = 0; 1218 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1219 extref = (struct btrfs_inode_extref *)ptr; 1220 *ret_extref = extref; 1221 if (found_off) 1222 *found_off = found_key.offset; 1223 break; 1224 } 1225 1226 return ret; 1227 } 1228 1229 /* 1230 * this iterates to turn a name (from iref/extref) into a full filesystem path. 1231 * Elements of the path are separated by '/' and the path is guaranteed to be 1232 * 0-terminated. the path is only given within the current file system. 1233 * Therefore, it never starts with a '/'. the caller is responsible to provide 1234 * "size" bytes in "dest". the dest buffer will be filled backwards. finally, 1235 * the start point of the resulting string is returned. this pointer is within 1236 * dest, normally. 1237 * in case the path buffer would overflow, the pointer is decremented further 1238 * as if output was written to the buffer, though no more output is actually 1239 * generated. that way, the caller can determine how much space would be 1240 * required for the path to fit into the buffer. in that case, the returned 1241 * value will be smaller than dest. callers must check this! 1242 */ 1243 char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, 1244 u32 name_len, unsigned long name_off, 1245 struct extent_buffer *eb_in, u64 parent, 1246 char *dest, u32 size) 1247 { 1248 int slot; 1249 u64 next_inum; 1250 int ret; 1251 s64 bytes_left = ((s64)size) - 1; 1252 struct extent_buffer *eb = eb_in; 1253 struct btrfs_key found_key; 1254 int leave_spinning = path->leave_spinning; 1255 struct btrfs_inode_ref *iref; 1256 1257 if (bytes_left >= 0) 1258 dest[bytes_left] = '\0'; 1259 1260 path->leave_spinning = 1; 1261 while (1) { 1262 bytes_left -= name_len; 1263 if (bytes_left >= 0) 1264 read_extent_buffer(eb, dest + bytes_left, 1265 name_off, name_len); 1266 if (eb != eb_in) { 1267 btrfs_tree_read_unlock_blocking(eb); 1268 free_extent_buffer(eb); 1269 } 1270 ret = inode_ref_info(parent, 0, fs_root, path, &found_key); 1271 if (ret > 0) 1272 ret = -ENOENT; 1273 if (ret) 1274 break; 1275 1276 next_inum = found_key.offset; 1277 1278 /* regular exit ahead */ 1279 if (parent == next_inum) 1280 break; 1281 1282 slot = path->slots[0]; 1283 eb = path->nodes[0]; 1284 /* make sure we can use eb after releasing the path */ 1285 if (eb != eb_in) { 1286 atomic_inc(&eb->refs); 1287 btrfs_tree_read_lock(eb); 1288 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1289 } 1290 btrfs_release_path(path); 1291 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1292 1293 name_len = btrfs_inode_ref_name_len(eb, iref); 1294 name_off = (unsigned long)(iref + 1); 1295 1296 parent = next_inum; 1297 --bytes_left; 1298 if (bytes_left >= 0) 1299 dest[bytes_left] = '/'; 1300 } 1301 1302 btrfs_release_path(path); 1303 path->leave_spinning = leave_spinning; 1304 1305 if (ret) 1306 return ERR_PTR(ret); 1307 1308 return dest + bytes_left; 1309 } 1310 1311 /* 1312 * this makes the path point to (logical EXTENT_ITEM *) 1313 * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for 1314 * tree blocks and <0 on error. 1315 */ 1316 int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, 1317 struct btrfs_path *path, struct btrfs_key *found_key, 1318 u64 *flags_ret) 1319 { 1320 int ret; 1321 u64 flags; 1322 u64 size = 0; 1323 u32 item_size; 1324 struct extent_buffer *eb; 1325 struct btrfs_extent_item *ei; 1326 struct btrfs_key key; 1327 1328 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 1329 key.type = BTRFS_METADATA_ITEM_KEY; 1330 else 1331 key.type = BTRFS_EXTENT_ITEM_KEY; 1332 key.objectid = logical; 1333 key.offset = (u64)-1; 1334 1335 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 1336 if (ret < 0) 1337 return ret; 1338 ret = btrfs_previous_item(fs_info->extent_root, path, 1339 0, BTRFS_EXTENT_ITEM_KEY); 1340 if (ret < 0) 1341 return ret; 1342 1343 btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); 1344 if (found_key->type == BTRFS_METADATA_ITEM_KEY) 1345 size = fs_info->extent_root->leafsize; 1346 else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) 1347 size = found_key->offset; 1348 1349 if ((found_key->type != BTRFS_EXTENT_ITEM_KEY && 1350 found_key->type != BTRFS_METADATA_ITEM_KEY) || 1351 found_key->objectid > logical || 1352 found_key->objectid + size <= logical) { 1353 pr_debug("logical %llu is not within any extent\n", logical); 1354 return -ENOENT; 1355 } 1356 1357 eb = path->nodes[0]; 1358 item_size = btrfs_item_size_nr(eb, path->slots[0]); 1359 BUG_ON(item_size < sizeof(*ei)); 1360 1361 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); 1362 flags = btrfs_extent_flags(eb, ei); 1363 1364 pr_debug("logical %llu is at position %llu within the extent (%llu " 1365 "EXTENT_ITEM %llu) flags %#llx size %u\n", 1366 logical, logical - found_key->objectid, found_key->objectid, 1367 found_key->offset, flags, item_size); 1368 1369 WARN_ON(!flags_ret); 1370 if (flags_ret) { 1371 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 1372 *flags_ret = BTRFS_EXTENT_FLAG_TREE_BLOCK; 1373 else if (flags & BTRFS_EXTENT_FLAG_DATA) 1374 *flags_ret = BTRFS_EXTENT_FLAG_DATA; 1375 else 1376 BUG_ON(1); 1377 return 0; 1378 } 1379 1380 return -EIO; 1381 } 1382 1383 /* 1384 * helper function to iterate extent inline refs. ptr must point to a 0 value 1385 * for the first call and may be modified. it is used to track state. 1386 * if more refs exist, 0 is returned and the next call to 1387 * __get_extent_inline_ref must pass the modified ptr parameter to get the 1388 * next ref. after the last ref was processed, 1 is returned. 1389 * returns <0 on error 1390 */ 1391 static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb, 1392 struct btrfs_extent_item *ei, u32 item_size, 1393 struct btrfs_extent_inline_ref **out_eiref, 1394 int *out_type) 1395 { 1396 unsigned long end; 1397 u64 flags; 1398 struct btrfs_tree_block_info *info; 1399 1400 if (!*ptr) { 1401 /* first call */ 1402 flags = btrfs_extent_flags(eb, ei); 1403 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1404 info = (struct btrfs_tree_block_info *)(ei + 1); 1405 *out_eiref = 1406 (struct btrfs_extent_inline_ref *)(info + 1); 1407 } else { 1408 *out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1); 1409 } 1410 *ptr = (unsigned long)*out_eiref; 1411 if ((void *)*ptr >= (void *)ei + item_size) 1412 return -ENOENT; 1413 } 1414 1415 end = (unsigned long)ei + item_size; 1416 *out_eiref = (struct btrfs_extent_inline_ref *)*ptr; 1417 *out_type = btrfs_extent_inline_ref_type(eb, *out_eiref); 1418 1419 *ptr += btrfs_extent_inline_ref_size(*out_type); 1420 WARN_ON(*ptr > end); 1421 if (*ptr == end) 1422 return 1; /* last */ 1423 1424 return 0; 1425 } 1426 1427 /* 1428 * reads the tree block backref for an extent. tree level and root are returned 1429 * through out_level and out_root. ptr must point to a 0 value for the first 1430 * call and may be modified (see __get_extent_inline_ref comment). 1431 * returns 0 if data was provided, 1 if there was no more data to provide or 1432 * <0 on error. 1433 */ 1434 int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, 1435 struct btrfs_extent_item *ei, u32 item_size, 1436 u64 *out_root, u8 *out_level) 1437 { 1438 int ret; 1439 int type; 1440 struct btrfs_tree_block_info *info; 1441 struct btrfs_extent_inline_ref *eiref; 1442 1443 if (*ptr == (unsigned long)-1) 1444 return 1; 1445 1446 while (1) { 1447 ret = __get_extent_inline_ref(ptr, eb, ei, item_size, 1448 &eiref, &type); 1449 if (ret < 0) 1450 return ret; 1451 1452 if (type == BTRFS_TREE_BLOCK_REF_KEY || 1453 type == BTRFS_SHARED_BLOCK_REF_KEY) 1454 break; 1455 1456 if (ret == 1) 1457 return 1; 1458 } 1459 1460 /* we can treat both ref types equally here */ 1461 info = (struct btrfs_tree_block_info *)(ei + 1); 1462 *out_root = btrfs_extent_inline_ref_offset(eb, eiref); 1463 *out_level = btrfs_tree_block_level(eb, info); 1464 1465 if (ret == 1) 1466 *ptr = (unsigned long)-1; 1467 1468 return 0; 1469 } 1470 1471 static int iterate_leaf_refs(struct extent_inode_elem *inode_list, 1472 u64 root, u64 extent_item_objectid, 1473 iterate_extent_inodes_t *iterate, void *ctx) 1474 { 1475 struct extent_inode_elem *eie; 1476 int ret = 0; 1477 1478 for (eie = inode_list; eie; eie = eie->next) { 1479 pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), " 1480 "root %llu\n", extent_item_objectid, 1481 eie->inum, eie->offset, root); 1482 ret = iterate(eie->inum, eie->offset, root, ctx); 1483 if (ret) { 1484 pr_debug("stopping iteration for %llu due to ret=%d\n", 1485 extent_item_objectid, ret); 1486 break; 1487 } 1488 } 1489 1490 return ret; 1491 } 1492 1493 /* 1494 * calls iterate() for every inode that references the extent identified by 1495 * the given parameters. 1496 * when the iterator function returns a non-zero value, iteration stops. 1497 */ 1498 int iterate_extent_inodes(struct btrfs_fs_info *fs_info, 1499 u64 extent_item_objectid, u64 extent_item_pos, 1500 int search_commit_root, 1501 iterate_extent_inodes_t *iterate, void *ctx) 1502 { 1503 int ret; 1504 struct btrfs_trans_handle *trans = NULL; 1505 struct ulist *refs = NULL; 1506 struct ulist *roots = NULL; 1507 struct ulist_node *ref_node = NULL; 1508 struct ulist_node *root_node = NULL; 1509 struct seq_list tree_mod_seq_elem = {}; 1510 struct ulist_iterator ref_uiter; 1511 struct ulist_iterator root_uiter; 1512 1513 pr_debug("resolving all inodes for extent %llu\n", 1514 extent_item_objectid); 1515 1516 if (!search_commit_root) { 1517 trans = btrfs_join_transaction(fs_info->extent_root); 1518 if (IS_ERR(trans)) 1519 return PTR_ERR(trans); 1520 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1521 } 1522 1523 ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, 1524 tree_mod_seq_elem.seq, &refs, 1525 &extent_item_pos); 1526 if (ret) 1527 goto out; 1528 1529 ULIST_ITER_INIT(&ref_uiter); 1530 while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { 1531 ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, 1532 tree_mod_seq_elem.seq, &roots); 1533 if (ret) 1534 break; 1535 ULIST_ITER_INIT(&root_uiter); 1536 while (!ret && (root_node = ulist_next(roots, &root_uiter))) { 1537 pr_debug("root %llu references leaf %llu, data list " 1538 "%#llx\n", root_node->val, ref_node->val, 1539 ref_node->aux); 1540 ret = iterate_leaf_refs((struct extent_inode_elem *) 1541 (uintptr_t)ref_node->aux, 1542 root_node->val, 1543 extent_item_objectid, 1544 iterate, ctx); 1545 } 1546 ulist_free(roots); 1547 } 1548 1549 free_leaf_list(refs); 1550 out: 1551 if (!search_commit_root) { 1552 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem); 1553 btrfs_end_transaction(trans, fs_info->extent_root); 1554 } 1555 1556 return ret; 1557 } 1558 1559 int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, 1560 struct btrfs_path *path, 1561 iterate_extent_inodes_t *iterate, void *ctx) 1562 { 1563 int ret; 1564 u64 extent_item_pos; 1565 u64 flags = 0; 1566 struct btrfs_key found_key; 1567 int search_commit_root = path->search_commit_root; 1568 1569 ret = extent_from_logical(fs_info, logical, path, &found_key, &flags); 1570 btrfs_release_path(path); 1571 if (ret < 0) 1572 return ret; 1573 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) 1574 return -EINVAL; 1575 1576 extent_item_pos = logical - found_key.objectid; 1577 ret = iterate_extent_inodes(fs_info, found_key.objectid, 1578 extent_item_pos, search_commit_root, 1579 iterate, ctx); 1580 1581 return ret; 1582 } 1583 1584 typedef int (iterate_irefs_t)(u64 parent, u32 name_len, unsigned long name_off, 1585 struct extent_buffer *eb, void *ctx); 1586 1587 static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root, 1588 struct btrfs_path *path, 1589 iterate_irefs_t *iterate, void *ctx) 1590 { 1591 int ret = 0; 1592 int slot; 1593 u32 cur; 1594 u32 len; 1595 u32 name_len; 1596 u64 parent = 0; 1597 int found = 0; 1598 struct extent_buffer *eb; 1599 struct btrfs_item *item; 1600 struct btrfs_inode_ref *iref; 1601 struct btrfs_key found_key; 1602 1603 while (!ret) { 1604 path->leave_spinning = 1; 1605 ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, 1606 &found_key); 1607 if (ret < 0) 1608 break; 1609 if (ret) { 1610 ret = found ? 0 : -ENOENT; 1611 break; 1612 } 1613 ++found; 1614 1615 parent = found_key.offset; 1616 slot = path->slots[0]; 1617 eb = path->nodes[0]; 1618 /* make sure we can use eb after releasing the path */ 1619 atomic_inc(&eb->refs); 1620 btrfs_tree_read_lock(eb); 1621 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1622 btrfs_release_path(path); 1623 1624 item = btrfs_item_nr(slot); 1625 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1626 1627 for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) { 1628 name_len = btrfs_inode_ref_name_len(eb, iref); 1629 /* path must be released before calling iterate()! */ 1630 pr_debug("following ref at offset %u for inode %llu in " 1631 "tree %llu\n", cur, found_key.objectid, 1632 fs_root->objectid); 1633 ret = iterate(parent, name_len, 1634 (unsigned long)(iref + 1), eb, ctx); 1635 if (ret) 1636 break; 1637 len = sizeof(*iref) + name_len; 1638 iref = (struct btrfs_inode_ref *)((char *)iref + len); 1639 } 1640 btrfs_tree_read_unlock_blocking(eb); 1641 free_extent_buffer(eb); 1642 } 1643 1644 btrfs_release_path(path); 1645 1646 return ret; 1647 } 1648 1649 static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, 1650 struct btrfs_path *path, 1651 iterate_irefs_t *iterate, void *ctx) 1652 { 1653 int ret; 1654 int slot; 1655 u64 offset = 0; 1656 u64 parent; 1657 int found = 0; 1658 struct extent_buffer *eb; 1659 struct btrfs_inode_extref *extref; 1660 struct extent_buffer *leaf; 1661 u32 item_size; 1662 u32 cur_offset; 1663 unsigned long ptr; 1664 1665 while (1) { 1666 ret = btrfs_find_one_extref(fs_root, inum, offset, path, &extref, 1667 &offset); 1668 if (ret < 0) 1669 break; 1670 if (ret) { 1671 ret = found ? 0 : -ENOENT; 1672 break; 1673 } 1674 ++found; 1675 1676 slot = path->slots[0]; 1677 eb = path->nodes[0]; 1678 /* make sure we can use eb after releasing the path */ 1679 atomic_inc(&eb->refs); 1680 1681 btrfs_tree_read_lock(eb); 1682 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1683 btrfs_release_path(path); 1684 1685 leaf = path->nodes[0]; 1686 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1687 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1688 cur_offset = 0; 1689 1690 while (cur_offset < item_size) { 1691 u32 name_len; 1692 1693 extref = (struct btrfs_inode_extref *)(ptr + cur_offset); 1694 parent = btrfs_inode_extref_parent(eb, extref); 1695 name_len = btrfs_inode_extref_name_len(eb, extref); 1696 ret = iterate(parent, name_len, 1697 (unsigned long)&extref->name, eb, ctx); 1698 if (ret) 1699 break; 1700 1701 cur_offset += btrfs_inode_extref_name_len(leaf, extref); 1702 cur_offset += sizeof(*extref); 1703 } 1704 btrfs_tree_read_unlock_blocking(eb); 1705 free_extent_buffer(eb); 1706 1707 offset++; 1708 } 1709 1710 btrfs_release_path(path); 1711 1712 return ret; 1713 } 1714 1715 static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, 1716 struct btrfs_path *path, iterate_irefs_t *iterate, 1717 void *ctx) 1718 { 1719 int ret; 1720 int found_refs = 0; 1721 1722 ret = iterate_inode_refs(inum, fs_root, path, iterate, ctx); 1723 if (!ret) 1724 ++found_refs; 1725 else if (ret != -ENOENT) 1726 return ret; 1727 1728 ret = iterate_inode_extrefs(inum, fs_root, path, iterate, ctx); 1729 if (ret == -ENOENT && found_refs) 1730 return 0; 1731 1732 return ret; 1733 } 1734 1735 /* 1736 * returns 0 if the path could be dumped (probably truncated) 1737 * returns <0 in case of an error 1738 */ 1739 static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, 1740 struct extent_buffer *eb, void *ctx) 1741 { 1742 struct inode_fs_paths *ipath = ctx; 1743 char *fspath; 1744 char *fspath_min; 1745 int i = ipath->fspath->elem_cnt; 1746 const int s_ptr = sizeof(char *); 1747 u32 bytes_left; 1748 1749 bytes_left = ipath->fspath->bytes_left > s_ptr ? 1750 ipath->fspath->bytes_left - s_ptr : 0; 1751 1752 fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr; 1753 fspath = btrfs_ref_to_path(ipath->fs_root, ipath->btrfs_path, name_len, 1754 name_off, eb, inum, fspath_min, bytes_left); 1755 if (IS_ERR(fspath)) 1756 return PTR_ERR(fspath); 1757 1758 if (fspath > fspath_min) { 1759 ipath->fspath->val[i] = (u64)(unsigned long)fspath; 1760 ++ipath->fspath->elem_cnt; 1761 ipath->fspath->bytes_left = fspath - fspath_min; 1762 } else { 1763 ++ipath->fspath->elem_missed; 1764 ipath->fspath->bytes_missing += fspath_min - fspath; 1765 ipath->fspath->bytes_left = 0; 1766 } 1767 1768 return 0; 1769 } 1770 1771 /* 1772 * this dumps all file system paths to the inode into the ipath struct, provided 1773 * is has been created large enough. each path is zero-terminated and accessed 1774 * from ipath->fspath->val[i]. 1775 * when it returns, there are ipath->fspath->elem_cnt number of paths available 1776 * in ipath->fspath->val[]. when the allocated space wasn't sufficient, the 1777 * number of missed paths in recored in ipath->fspath->elem_missed, otherwise, 1778 * it's zero. ipath->fspath->bytes_missing holds the number of bytes that would 1779 * have been needed to return all paths. 1780 */ 1781 int paths_from_inode(u64 inum, struct inode_fs_paths *ipath) 1782 { 1783 return iterate_irefs(inum, ipath->fs_root, ipath->btrfs_path, 1784 inode_to_path, ipath); 1785 } 1786 1787 struct btrfs_data_container *init_data_container(u32 total_bytes) 1788 { 1789 struct btrfs_data_container *data; 1790 size_t alloc_bytes; 1791 1792 alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); 1793 data = vmalloc(alloc_bytes); 1794 if (!data) 1795 return ERR_PTR(-ENOMEM); 1796 1797 if (total_bytes >= sizeof(*data)) { 1798 data->bytes_left = total_bytes - sizeof(*data); 1799 data->bytes_missing = 0; 1800 } else { 1801 data->bytes_missing = sizeof(*data) - total_bytes; 1802 data->bytes_left = 0; 1803 } 1804 1805 data->elem_cnt = 0; 1806 data->elem_missed = 0; 1807 1808 return data; 1809 } 1810 1811 /* 1812 * allocates space to return multiple file system paths for an inode. 1813 * total_bytes to allocate are passed, note that space usable for actual path 1814 * information will be total_bytes - sizeof(struct inode_fs_paths). 1815 * the returned pointer must be freed with free_ipath() in the end. 1816 */ 1817 struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, 1818 struct btrfs_path *path) 1819 { 1820 struct inode_fs_paths *ifp; 1821 struct btrfs_data_container *fspath; 1822 1823 fspath = init_data_container(total_bytes); 1824 if (IS_ERR(fspath)) 1825 return (void *)fspath; 1826 1827 ifp = kmalloc(sizeof(*ifp), GFP_NOFS); 1828 if (!ifp) { 1829 kfree(fspath); 1830 return ERR_PTR(-ENOMEM); 1831 } 1832 1833 ifp->btrfs_path = path; 1834 ifp->fspath = fspath; 1835 ifp->fs_root = fs_root; 1836 1837 return ifp; 1838 } 1839 1840 void free_ipath(struct inode_fs_paths *ipath) 1841 { 1842 if (!ipath) 1843 return; 1844 vfree(ipath->fspath); 1845 kfree(ipath); 1846 } 1847