1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2014 Facebook. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/stacktrace.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "delayed-ref.h" 13 #include "ref-verify.h" 14 #include "fs.h" 15 #include "accessors.h" 16 17 /* 18 * Used to keep track the roots and number of refs each root has for a given 19 * bytenr. This just tracks the number of direct references, no shared 20 * references. 21 */ 22 struct root_entry { 23 u64 root_objectid; 24 u64 num_refs; 25 struct rb_node node; 26 }; 27 28 /* 29 * These are meant to represent what should exist in the extent tree, these can 30 * be used to verify the extent tree is consistent as these should all match 31 * what the extent tree says. 32 */ 33 struct ref_entry { 34 u64 root_objectid; 35 u64 parent; 36 u64 owner; 37 u64 offset; 38 u64 num_refs; 39 struct rb_node node; 40 }; 41 42 #define MAX_TRACE 16 43 44 /* 45 * Whenever we add/remove a reference we record the action. The action maps 46 * back to the delayed ref action. We hold the ref we are changing in the 47 * action so we can account for the history properly, and we record the root we 48 * were called with since it could be different from ref_root. We also store 49 * stack traces because that's how I roll. 50 */ 51 struct ref_action { 52 int action; 53 u64 root; 54 struct ref_entry ref; 55 struct list_head list; 56 unsigned long trace[MAX_TRACE]; 57 unsigned int trace_len; 58 }; 59 60 /* 61 * One of these for every block we reference, it holds the roots and references 62 * to it as well as all of the ref actions that have occurred to it. We never 63 * free it until we unmount the file system in order to make sure re-allocations 64 * are happening properly. 65 */ 66 struct block_entry { 67 u64 bytenr; 68 u64 len; 69 u64 num_refs; 70 int metadata; 71 int from_disk; 72 struct rb_root roots; 73 struct rb_root refs; 74 struct rb_node node; 75 struct list_head actions; 76 }; 77 78 static struct block_entry *insert_block_entry(struct rb_root *root, 79 struct block_entry *be) 80 { 81 struct rb_node **p = &root->rb_node; 82 struct rb_node *parent_node = NULL; 83 struct block_entry *entry; 84 85 while (*p) { 86 parent_node = *p; 87 entry = rb_entry(parent_node, struct block_entry, node); 88 if (entry->bytenr > be->bytenr) 89 p = &(*p)->rb_left; 90 else if (entry->bytenr < be->bytenr) 91 p = &(*p)->rb_right; 92 else 93 return entry; 94 } 95 96 rb_link_node(&be->node, parent_node, p); 97 rb_insert_color(&be->node, root); 98 return NULL; 99 } 100 101 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) 102 { 103 struct rb_node *n; 104 struct block_entry *entry = NULL; 105 106 n = root->rb_node; 107 while (n) { 108 entry = rb_entry(n, struct block_entry, node); 109 if (entry->bytenr < bytenr) 110 n = n->rb_right; 111 else if (entry->bytenr > bytenr) 112 n = n->rb_left; 113 else 114 return entry; 115 } 116 return NULL; 117 } 118 119 static struct root_entry *insert_root_entry(struct rb_root *root, 120 struct root_entry *re) 121 { 122 struct rb_node **p = &root->rb_node; 123 struct rb_node *parent_node = NULL; 124 struct root_entry *entry; 125 126 while (*p) { 127 parent_node = *p; 128 entry = rb_entry(parent_node, struct root_entry, node); 129 if (entry->root_objectid > re->root_objectid) 130 p = &(*p)->rb_left; 131 else if (entry->root_objectid < re->root_objectid) 132 p = &(*p)->rb_right; 133 else 134 return entry; 135 } 136 137 rb_link_node(&re->node, parent_node, p); 138 rb_insert_color(&re->node, root); 139 return NULL; 140 141 } 142 143 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) 144 { 145 if (ref1->root_objectid < ref2->root_objectid) 146 return -1; 147 if (ref1->root_objectid > ref2->root_objectid) 148 return 1; 149 if (ref1->parent < ref2->parent) 150 return -1; 151 if (ref1->parent > ref2->parent) 152 return 1; 153 if (ref1->owner < ref2->owner) 154 return -1; 155 if (ref1->owner > ref2->owner) 156 return 1; 157 if (ref1->offset < ref2->offset) 158 return -1; 159 if (ref1->offset > ref2->offset) 160 return 1; 161 return 0; 162 } 163 164 static struct ref_entry *insert_ref_entry(struct rb_root *root, 165 struct ref_entry *ref) 166 { 167 struct rb_node **p = &root->rb_node; 168 struct rb_node *parent_node = NULL; 169 struct ref_entry *entry; 170 int cmp; 171 172 while (*p) { 173 parent_node = *p; 174 entry = rb_entry(parent_node, struct ref_entry, node); 175 cmp = comp_refs(entry, ref); 176 if (cmp > 0) 177 p = &(*p)->rb_left; 178 else if (cmp < 0) 179 p = &(*p)->rb_right; 180 else 181 return entry; 182 } 183 184 rb_link_node(&ref->node, parent_node, p); 185 rb_insert_color(&ref->node, root); 186 return NULL; 187 188 } 189 190 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) 191 { 192 struct rb_node *n; 193 struct root_entry *entry = NULL; 194 195 n = root->rb_node; 196 while (n) { 197 entry = rb_entry(n, struct root_entry, node); 198 if (entry->root_objectid < objectid) 199 n = n->rb_right; 200 else if (entry->root_objectid > objectid) 201 n = n->rb_left; 202 else 203 return entry; 204 } 205 return NULL; 206 } 207 208 #ifdef CONFIG_STACKTRACE 209 static void __save_stack_trace(struct ref_action *ra) 210 { 211 ra->trace_len = stack_trace_save(ra->trace, MAX_TRACE, 2); 212 } 213 214 static void __print_stack_trace(struct btrfs_fs_info *fs_info, 215 struct ref_action *ra) 216 { 217 if (ra->trace_len == 0) { 218 btrfs_err(fs_info, " ref-verify: no stacktrace"); 219 return; 220 } 221 stack_trace_print(ra->trace, ra->trace_len, 2); 222 } 223 #else 224 static inline void __save_stack_trace(struct ref_action *ra) 225 { 226 } 227 228 static inline void __print_stack_trace(struct btrfs_fs_info *fs_info, 229 struct ref_action *ra) 230 { 231 btrfs_err(fs_info, " ref-verify: no stacktrace support"); 232 } 233 #endif 234 235 static void free_block_entry(struct block_entry *be) 236 { 237 struct root_entry *re; 238 struct ref_entry *ref; 239 struct ref_action *ra; 240 struct rb_node *n; 241 242 while ((n = rb_first(&be->roots))) { 243 re = rb_entry(n, struct root_entry, node); 244 rb_erase(&re->node, &be->roots); 245 kfree(re); 246 } 247 248 while((n = rb_first(&be->refs))) { 249 ref = rb_entry(n, struct ref_entry, node); 250 rb_erase(&ref->node, &be->refs); 251 kfree(ref); 252 } 253 254 while (!list_empty(&be->actions)) { 255 ra = list_first_entry(&be->actions, struct ref_action, 256 list); 257 list_del(&ra->list); 258 kfree(ra); 259 } 260 kfree(be); 261 } 262 263 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, 264 u64 bytenr, u64 len, 265 u64 root_objectid) 266 { 267 struct block_entry *be = NULL, *exist; 268 struct root_entry *re = NULL; 269 270 re = kzalloc(sizeof(struct root_entry), GFP_NOFS); 271 be = kzalloc(sizeof(struct block_entry), GFP_NOFS); 272 if (!be || !re) { 273 kfree(re); 274 kfree(be); 275 return ERR_PTR(-ENOMEM); 276 } 277 be->bytenr = bytenr; 278 be->len = len; 279 280 re->root_objectid = root_objectid; 281 re->num_refs = 0; 282 283 spin_lock(&fs_info->ref_verify_lock); 284 exist = insert_block_entry(&fs_info->block_tree, be); 285 if (exist) { 286 if (root_objectid) { 287 struct root_entry *exist_re; 288 289 exist_re = insert_root_entry(&exist->roots, re); 290 if (exist_re) 291 kfree(re); 292 } else { 293 kfree(re); 294 } 295 kfree(be); 296 return exist; 297 } 298 299 be->num_refs = 0; 300 be->metadata = 0; 301 be->from_disk = 0; 302 be->roots = RB_ROOT; 303 be->refs = RB_ROOT; 304 INIT_LIST_HEAD(&be->actions); 305 if (root_objectid) 306 insert_root_entry(&be->roots, re); 307 else 308 kfree(re); 309 return be; 310 } 311 312 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, 313 u64 parent, u64 bytenr, int level) 314 { 315 struct block_entry *be; 316 struct root_entry *re; 317 struct ref_entry *ref = NULL, *exist; 318 319 ref = kmalloc(sizeof(struct ref_entry), GFP_NOFS); 320 if (!ref) 321 return -ENOMEM; 322 323 if (parent) 324 ref->root_objectid = 0; 325 else 326 ref->root_objectid = ref_root; 327 ref->parent = parent; 328 ref->owner = level; 329 ref->offset = 0; 330 ref->num_refs = 1; 331 332 be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); 333 if (IS_ERR(be)) { 334 kfree(ref); 335 return PTR_ERR(be); 336 } 337 be->num_refs++; 338 be->from_disk = 1; 339 be->metadata = 1; 340 341 if (!parent) { 342 ASSERT(ref_root); 343 re = lookup_root_entry(&be->roots, ref_root); 344 ASSERT(re); 345 re->num_refs++; 346 } 347 exist = insert_ref_entry(&be->refs, ref); 348 if (exist) { 349 exist->num_refs++; 350 kfree(ref); 351 } 352 spin_unlock(&fs_info->ref_verify_lock); 353 354 return 0; 355 } 356 357 static int add_shared_data_ref(struct btrfs_fs_info *fs_info, 358 u64 parent, u32 num_refs, u64 bytenr, 359 u64 num_bytes) 360 { 361 struct block_entry *be; 362 struct ref_entry *ref; 363 364 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 365 if (!ref) 366 return -ENOMEM; 367 be = add_block_entry(fs_info, bytenr, num_bytes, 0); 368 if (IS_ERR(be)) { 369 kfree(ref); 370 return PTR_ERR(be); 371 } 372 be->num_refs += num_refs; 373 374 ref->parent = parent; 375 ref->num_refs = num_refs; 376 if (insert_ref_entry(&be->refs, ref)) { 377 spin_unlock(&fs_info->ref_verify_lock); 378 btrfs_err(fs_info, "existing shared ref when reading from disk?"); 379 kfree(ref); 380 return -EINVAL; 381 } 382 spin_unlock(&fs_info->ref_verify_lock); 383 return 0; 384 } 385 386 static int add_extent_data_ref(struct btrfs_fs_info *fs_info, 387 struct extent_buffer *leaf, 388 struct btrfs_extent_data_ref *dref, 389 u64 bytenr, u64 num_bytes) 390 { 391 struct block_entry *be; 392 struct ref_entry *ref; 393 struct root_entry *re; 394 u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); 395 u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); 396 u64 offset = btrfs_extent_data_ref_offset(leaf, dref); 397 u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); 398 399 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 400 if (!ref) 401 return -ENOMEM; 402 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 403 if (IS_ERR(be)) { 404 kfree(ref); 405 return PTR_ERR(be); 406 } 407 be->num_refs += num_refs; 408 409 ref->parent = 0; 410 ref->owner = owner; 411 ref->root_objectid = ref_root; 412 ref->offset = offset; 413 ref->num_refs = num_refs; 414 if (insert_ref_entry(&be->refs, ref)) { 415 spin_unlock(&fs_info->ref_verify_lock); 416 btrfs_err(fs_info, "existing ref when reading from disk?"); 417 kfree(ref); 418 return -EINVAL; 419 } 420 421 re = lookup_root_entry(&be->roots, ref_root); 422 if (!re) { 423 spin_unlock(&fs_info->ref_verify_lock); 424 btrfs_err(fs_info, "missing root in new block entry?"); 425 return -EINVAL; 426 } 427 re->num_refs += num_refs; 428 spin_unlock(&fs_info->ref_verify_lock); 429 return 0; 430 } 431 432 static int process_extent_item(struct btrfs_fs_info *fs_info, 433 struct btrfs_path *path, struct btrfs_key *key, 434 int slot, int *tree_block_level) 435 { 436 struct btrfs_extent_item *ei; 437 struct btrfs_extent_inline_ref *iref; 438 struct btrfs_extent_data_ref *dref; 439 struct btrfs_shared_data_ref *sref; 440 struct extent_buffer *leaf = path->nodes[0]; 441 u32 item_size = btrfs_item_size(leaf, slot); 442 unsigned long end, ptr; 443 u64 offset, flags, count; 444 int type, ret; 445 446 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 447 flags = btrfs_extent_flags(leaf, ei); 448 449 if ((key->type == BTRFS_EXTENT_ITEM_KEY) && 450 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 451 struct btrfs_tree_block_info *info; 452 453 info = (struct btrfs_tree_block_info *)(ei + 1); 454 *tree_block_level = btrfs_tree_block_level(leaf, info); 455 iref = (struct btrfs_extent_inline_ref *)(info + 1); 456 } else { 457 if (key->type == BTRFS_METADATA_ITEM_KEY) 458 *tree_block_level = key->offset; 459 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 460 } 461 462 ptr = (unsigned long)iref; 463 end = (unsigned long)ei + item_size; 464 while (ptr < end) { 465 iref = (struct btrfs_extent_inline_ref *)ptr; 466 type = btrfs_extent_inline_ref_type(leaf, iref); 467 offset = btrfs_extent_inline_ref_offset(leaf, iref); 468 switch (type) { 469 case BTRFS_TREE_BLOCK_REF_KEY: 470 ret = add_tree_block(fs_info, offset, 0, key->objectid, 471 *tree_block_level); 472 break; 473 case BTRFS_SHARED_BLOCK_REF_KEY: 474 ret = add_tree_block(fs_info, 0, offset, key->objectid, 475 *tree_block_level); 476 break; 477 case BTRFS_EXTENT_DATA_REF_KEY: 478 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 479 ret = add_extent_data_ref(fs_info, leaf, dref, 480 key->objectid, key->offset); 481 break; 482 case BTRFS_SHARED_DATA_REF_KEY: 483 sref = (struct btrfs_shared_data_ref *)(iref + 1); 484 count = btrfs_shared_data_ref_count(leaf, sref); 485 ret = add_shared_data_ref(fs_info, offset, count, 486 key->objectid, key->offset); 487 break; 488 default: 489 btrfs_err(fs_info, "invalid key type in iref"); 490 ret = -EINVAL; 491 break; 492 } 493 if (ret) 494 break; 495 ptr += btrfs_extent_inline_ref_size(type); 496 } 497 return ret; 498 } 499 500 static int process_leaf(struct btrfs_root *root, 501 struct btrfs_path *path, u64 *bytenr, u64 *num_bytes, 502 int *tree_block_level) 503 { 504 struct btrfs_fs_info *fs_info = root->fs_info; 505 struct extent_buffer *leaf = path->nodes[0]; 506 struct btrfs_extent_data_ref *dref; 507 struct btrfs_shared_data_ref *sref; 508 u32 count; 509 int i = 0, ret = 0; 510 struct btrfs_key key; 511 int nritems = btrfs_header_nritems(leaf); 512 513 for (i = 0; i < nritems; i++) { 514 btrfs_item_key_to_cpu(leaf, &key, i); 515 switch (key.type) { 516 case BTRFS_EXTENT_ITEM_KEY: 517 *num_bytes = key.offset; 518 fallthrough; 519 case BTRFS_METADATA_ITEM_KEY: 520 *bytenr = key.objectid; 521 ret = process_extent_item(fs_info, path, &key, i, 522 tree_block_level); 523 break; 524 case BTRFS_TREE_BLOCK_REF_KEY: 525 ret = add_tree_block(fs_info, key.offset, 0, 526 key.objectid, *tree_block_level); 527 break; 528 case BTRFS_SHARED_BLOCK_REF_KEY: 529 ret = add_tree_block(fs_info, 0, key.offset, 530 key.objectid, *tree_block_level); 531 break; 532 case BTRFS_EXTENT_DATA_REF_KEY: 533 dref = btrfs_item_ptr(leaf, i, 534 struct btrfs_extent_data_ref); 535 ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, 536 *num_bytes); 537 break; 538 case BTRFS_SHARED_DATA_REF_KEY: 539 sref = btrfs_item_ptr(leaf, i, 540 struct btrfs_shared_data_ref); 541 count = btrfs_shared_data_ref_count(leaf, sref); 542 ret = add_shared_data_ref(fs_info, key.offset, count, 543 *bytenr, *num_bytes); 544 break; 545 default: 546 break; 547 } 548 if (ret) 549 break; 550 } 551 return ret; 552 } 553 554 /* Walk down to the leaf from the given level */ 555 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, 556 int level, u64 *bytenr, u64 *num_bytes, 557 int *tree_block_level) 558 { 559 struct extent_buffer *eb; 560 int ret = 0; 561 562 while (level >= 0) { 563 if (level) { 564 eb = btrfs_read_node_slot(path->nodes[level], 565 path->slots[level]); 566 if (IS_ERR(eb)) 567 return PTR_ERR(eb); 568 btrfs_tree_read_lock(eb); 569 path->nodes[level-1] = eb; 570 path->slots[level-1] = 0; 571 path->locks[level-1] = BTRFS_READ_LOCK; 572 } else { 573 ret = process_leaf(root, path, bytenr, num_bytes, 574 tree_block_level); 575 if (ret) 576 break; 577 } 578 level--; 579 } 580 return ret; 581 } 582 583 /* Walk up to the next node that needs to be processed */ 584 static int walk_up_tree(struct btrfs_path *path, int *level) 585 { 586 int l; 587 588 for (l = 0; l < BTRFS_MAX_LEVEL; l++) { 589 if (!path->nodes[l]) 590 continue; 591 if (l) { 592 path->slots[l]++; 593 if (path->slots[l] < 594 btrfs_header_nritems(path->nodes[l])) { 595 *level = l; 596 return 0; 597 } 598 } 599 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); 600 free_extent_buffer(path->nodes[l]); 601 path->nodes[l] = NULL; 602 path->slots[l] = 0; 603 path->locks[l] = 0; 604 } 605 606 return 1; 607 } 608 609 static void dump_ref_action(struct btrfs_fs_info *fs_info, 610 struct ref_action *ra) 611 { 612 btrfs_err(fs_info, 613 " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 614 ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, 615 ra->ref.owner, ra->ref.offset, ra->ref.num_refs); 616 __print_stack_trace(fs_info, ra); 617 } 618 619 /* 620 * Dumps all the information from the block entry to printk, it's going to be 621 * awesome. 622 */ 623 static void dump_block_entry(struct btrfs_fs_info *fs_info, 624 struct block_entry *be) 625 { 626 struct ref_entry *ref; 627 struct root_entry *re; 628 struct ref_action *ra; 629 struct rb_node *n; 630 631 btrfs_err(fs_info, 632 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", 633 be->bytenr, be->len, be->num_refs, be->metadata, 634 be->from_disk); 635 636 for (n = rb_first(&be->refs); n; n = rb_next(n)) { 637 ref = rb_entry(n, struct ref_entry, node); 638 btrfs_err(fs_info, 639 " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 640 ref->root_objectid, ref->parent, ref->owner, 641 ref->offset, ref->num_refs); 642 } 643 644 for (n = rb_first(&be->roots); n; n = rb_next(n)) { 645 re = rb_entry(n, struct root_entry, node); 646 btrfs_err(fs_info, " root entry %llu, num_refs %llu", 647 re->root_objectid, re->num_refs); 648 } 649 650 list_for_each_entry(ra, &be->actions, list) 651 dump_ref_action(fs_info, ra); 652 } 653 654 /* 655 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr 656 * 657 * This will add an action item to the given bytenr and do sanity checks to make 658 * sure we haven't messed something up. If we are making a new allocation and 659 * this block entry has history we will delete all previous actions as long as 660 * our sanity checks pass as they are no longer needed. 661 */ 662 int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, 663 struct btrfs_ref *generic_ref) 664 { 665 struct ref_entry *ref = NULL, *exist; 666 struct ref_action *ra = NULL; 667 struct block_entry *be = NULL; 668 struct root_entry *re = NULL; 669 int action = generic_ref->action; 670 int ret = 0; 671 bool metadata; 672 u64 bytenr = generic_ref->bytenr; 673 u64 num_bytes = generic_ref->len; 674 u64 parent = generic_ref->parent; 675 u64 ref_root = 0; 676 u64 owner = 0; 677 u64 offset = 0; 678 679 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 680 return 0; 681 682 if (generic_ref->type == BTRFS_REF_METADATA) { 683 if (!parent) 684 ref_root = generic_ref->tree_ref.owning_root; 685 owner = generic_ref->tree_ref.level; 686 } else if (!parent) { 687 ref_root = generic_ref->data_ref.owning_root; 688 owner = generic_ref->data_ref.ino; 689 offset = generic_ref->data_ref.offset; 690 } 691 metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 692 693 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 694 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); 695 if (!ra || !ref) { 696 kfree(ref); 697 kfree(ra); 698 ret = -ENOMEM; 699 goto out; 700 } 701 702 ref->parent = parent; 703 ref->owner = owner; 704 ref->root_objectid = ref_root; 705 ref->offset = offset; 706 ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; 707 708 memcpy(&ra->ref, ref, sizeof(struct ref_entry)); 709 /* 710 * Save the extra info from the delayed ref in the ref action to make it 711 * easier to figure out what is happening. The real ref's we add to the 712 * ref tree need to reflect what we save on disk so it matches any 713 * on-disk refs we pre-loaded. 714 */ 715 ra->ref.owner = owner; 716 ra->ref.offset = offset; 717 ra->ref.root_objectid = ref_root; 718 __save_stack_trace(ra); 719 720 INIT_LIST_HEAD(&ra->list); 721 ra->action = action; 722 ra->root = generic_ref->real_root; 723 724 /* 725 * This is an allocation, preallocate the block_entry in case we haven't 726 * used it before. 727 */ 728 ret = -EINVAL; 729 if (action == BTRFS_ADD_DELAYED_EXTENT) { 730 /* 731 * For subvol_create we'll just pass in whatever the parent root 732 * is and the new root objectid, so let's not treat the passed 733 * in root as if it really has a ref for this bytenr. 734 */ 735 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 736 if (IS_ERR(be)) { 737 kfree(ref); 738 kfree(ra); 739 ret = PTR_ERR(be); 740 goto out; 741 } 742 be->num_refs++; 743 if (metadata) 744 be->metadata = 1; 745 746 if (be->num_refs != 1) { 747 btrfs_err(fs_info, 748 "re-allocated a block that still has references to it!"); 749 dump_block_entry(fs_info, be); 750 dump_ref_action(fs_info, ra); 751 kfree(ref); 752 kfree(ra); 753 goto out_unlock; 754 } 755 756 while (!list_empty(&be->actions)) { 757 struct ref_action *tmp; 758 759 tmp = list_first_entry(&be->actions, struct ref_action, 760 list); 761 list_del(&tmp->list); 762 kfree(tmp); 763 } 764 } else { 765 struct root_entry *tmp; 766 767 if (!parent) { 768 re = kmalloc(sizeof(struct root_entry), GFP_NOFS); 769 if (!re) { 770 kfree(ref); 771 kfree(ra); 772 ret = -ENOMEM; 773 goto out; 774 } 775 /* 776 * This is the root that is modifying us, so it's the 777 * one we want to lookup below when we modify the 778 * re->num_refs. 779 */ 780 ref_root = generic_ref->real_root; 781 re->root_objectid = generic_ref->real_root; 782 re->num_refs = 0; 783 } 784 785 spin_lock(&fs_info->ref_verify_lock); 786 be = lookup_block_entry(&fs_info->block_tree, bytenr); 787 if (!be) { 788 btrfs_err(fs_info, 789 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", 790 action, bytenr, num_bytes); 791 dump_ref_action(fs_info, ra); 792 kfree(ref); 793 kfree(ra); 794 kfree(re); 795 goto out_unlock; 796 } else if (be->num_refs == 0) { 797 btrfs_err(fs_info, 798 "trying to do action %d for a bytenr that has 0 total references", 799 action); 800 dump_block_entry(fs_info, be); 801 dump_ref_action(fs_info, ra); 802 kfree(ref); 803 kfree(ra); 804 kfree(re); 805 goto out_unlock; 806 } 807 808 if (!parent) { 809 tmp = insert_root_entry(&be->roots, re); 810 if (tmp) { 811 kfree(re); 812 re = tmp; 813 } 814 } 815 } 816 817 exist = insert_ref_entry(&be->refs, ref); 818 if (exist) { 819 if (action == BTRFS_DROP_DELAYED_REF) { 820 if (exist->num_refs == 0) { 821 btrfs_err(fs_info, 822 "dropping a ref for a existing root that doesn't have a ref on the block"); 823 dump_block_entry(fs_info, be); 824 dump_ref_action(fs_info, ra); 825 kfree(ref); 826 kfree(ra); 827 goto out_unlock; 828 } 829 exist->num_refs--; 830 if (exist->num_refs == 0) { 831 rb_erase(&exist->node, &be->refs); 832 kfree(exist); 833 } 834 } else if (!be->metadata) { 835 exist->num_refs++; 836 } else { 837 btrfs_err(fs_info, 838 "attempting to add another ref for an existing ref on a tree block"); 839 dump_block_entry(fs_info, be); 840 dump_ref_action(fs_info, ra); 841 kfree(ref); 842 kfree(ra); 843 goto out_unlock; 844 } 845 kfree(ref); 846 } else { 847 if (action == BTRFS_DROP_DELAYED_REF) { 848 btrfs_err(fs_info, 849 "dropping a ref for a root that doesn't have a ref on the block"); 850 dump_block_entry(fs_info, be); 851 dump_ref_action(fs_info, ra); 852 rb_erase(&ref->node, &be->refs); 853 kfree(ref); 854 kfree(ra); 855 goto out_unlock; 856 } 857 } 858 859 if (!parent && !re) { 860 re = lookup_root_entry(&be->roots, ref_root); 861 if (!re) { 862 /* 863 * This shouldn't happen because we will add our re 864 * above when we lookup the be with !parent, but just in 865 * case catch this case so we don't panic because I 866 * didn't think of some other corner case. 867 */ 868 btrfs_err(fs_info, "failed to find root %llu for %llu", 869 generic_ref->real_root, be->bytenr); 870 dump_block_entry(fs_info, be); 871 dump_ref_action(fs_info, ra); 872 kfree(ra); 873 goto out_unlock; 874 } 875 } 876 if (action == BTRFS_DROP_DELAYED_REF) { 877 if (re) 878 re->num_refs--; 879 be->num_refs--; 880 } else if (action == BTRFS_ADD_DELAYED_REF) { 881 be->num_refs++; 882 if (re) 883 re->num_refs++; 884 } 885 list_add_tail(&ra->list, &be->actions); 886 ret = 0; 887 out_unlock: 888 spin_unlock(&fs_info->ref_verify_lock); 889 out: 890 if (ret) { 891 btrfs_free_ref_cache(fs_info); 892 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 893 } 894 return ret; 895 } 896 897 /* Free up the ref cache */ 898 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) 899 { 900 struct block_entry *be; 901 struct rb_node *n; 902 903 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 904 return; 905 906 spin_lock(&fs_info->ref_verify_lock); 907 while ((n = rb_first(&fs_info->block_tree))) { 908 be = rb_entry(n, struct block_entry, node); 909 rb_erase(&be->node, &fs_info->block_tree); 910 free_block_entry(be); 911 cond_resched_lock(&fs_info->ref_verify_lock); 912 } 913 spin_unlock(&fs_info->ref_verify_lock); 914 } 915 916 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, 917 u64 len) 918 { 919 struct block_entry *be = NULL, *entry; 920 struct rb_node *n; 921 922 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 923 return; 924 925 spin_lock(&fs_info->ref_verify_lock); 926 n = fs_info->block_tree.rb_node; 927 while (n) { 928 entry = rb_entry(n, struct block_entry, node); 929 if (entry->bytenr < start) { 930 n = n->rb_right; 931 } else if (entry->bytenr > start) { 932 n = n->rb_left; 933 } else { 934 be = entry; 935 break; 936 } 937 /* We want to get as close to start as possible */ 938 if (be == NULL || 939 (entry->bytenr < start && be->bytenr > start) || 940 (entry->bytenr < start && entry->bytenr > be->bytenr)) 941 be = entry; 942 } 943 944 /* 945 * Could have an empty block group, maybe have something to check for 946 * this case to verify we were actually empty? 947 */ 948 if (!be) { 949 spin_unlock(&fs_info->ref_verify_lock); 950 return; 951 } 952 953 n = &be->node; 954 while (n) { 955 be = rb_entry(n, struct block_entry, node); 956 n = rb_next(n); 957 if (be->bytenr < start && be->bytenr + be->len > start) { 958 btrfs_err(fs_info, 959 "block entry overlaps a block group [%llu,%llu]!", 960 start, len); 961 dump_block_entry(fs_info, be); 962 continue; 963 } 964 if (be->bytenr < start) 965 continue; 966 if (be->bytenr >= start + len) 967 break; 968 if (be->bytenr + be->len > start + len) { 969 btrfs_err(fs_info, 970 "block entry overlaps a block group [%llu,%llu]!", 971 start, len); 972 dump_block_entry(fs_info, be); 973 } 974 rb_erase(&be->node, &fs_info->block_tree); 975 free_block_entry(be); 976 } 977 spin_unlock(&fs_info->ref_verify_lock); 978 } 979 980 /* Walk down all roots and build the ref tree, meant to be called at mount */ 981 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) 982 { 983 struct btrfs_root *extent_root; 984 struct btrfs_path *path; 985 struct extent_buffer *eb; 986 int tree_block_level = 0; 987 u64 bytenr = 0, num_bytes = 0; 988 int ret, level; 989 990 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 991 return 0; 992 993 path = btrfs_alloc_path(); 994 if (!path) 995 return -ENOMEM; 996 997 extent_root = btrfs_extent_root(fs_info, 0); 998 eb = btrfs_read_lock_root_node(extent_root); 999 level = btrfs_header_level(eb); 1000 path->nodes[level] = eb; 1001 path->slots[level] = 0; 1002 path->locks[level] = BTRFS_READ_LOCK; 1003 1004 while (1) { 1005 /* 1006 * We have to keep track of the bytenr/num_bytes we last hit 1007 * because we could have run out of space for an inline ref, and 1008 * would have had to added a ref key item which may appear on a 1009 * different leaf from the original extent item. 1010 */ 1011 ret = walk_down_tree(extent_root, path, level, 1012 &bytenr, &num_bytes, &tree_block_level); 1013 if (ret) 1014 break; 1015 ret = walk_up_tree(path, &level); 1016 if (ret < 0) 1017 break; 1018 if (ret > 0) { 1019 ret = 0; 1020 break; 1021 } 1022 } 1023 if (ret) { 1024 btrfs_free_ref_cache(fs_info); 1025 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 1026 } 1027 btrfs_free_path(path); 1028 return ret; 1029 } 1030