1 /* 2 * Copyright (C) 2014 Facebook. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/stacktrace.h> 21 #include "ctree.h" 22 #include "disk-io.h" 23 #include "locking.h" 24 #include "delayed-ref.h" 25 #include "ref-verify.h" 26 27 /* 28 * Used to keep track the roots and number of refs each root has for a given 29 * bytenr. This just tracks the number of direct references, no shared 30 * references. 31 */ 32 struct root_entry { 33 u64 root_objectid; 34 u64 num_refs; 35 struct rb_node node; 36 }; 37 38 /* 39 * These are meant to represent what should exist in the extent tree, these can 40 * be used to verify the extent tree is consistent as these should all match 41 * what the extent tree says. 42 */ 43 struct ref_entry { 44 u64 root_objectid; 45 u64 parent; 46 u64 owner; 47 u64 offset; 48 u64 num_refs; 49 struct rb_node node; 50 }; 51 52 #define MAX_TRACE 16 53 54 /* 55 * Whenever we add/remove a reference we record the action. The action maps 56 * back to the delayed ref action. We hold the ref we are changing in the 57 * action so we can account for the history properly, and we record the root we 58 * were called with since it could be different from ref_root. We also store 59 * stack traces because thats how I roll. 60 */ 61 struct ref_action { 62 int action; 63 u64 root; 64 struct ref_entry ref; 65 struct list_head list; 66 unsigned long trace[MAX_TRACE]; 67 unsigned int trace_len; 68 }; 69 70 /* 71 * One of these for every block we reference, it holds the roots and references 72 * to it as well as all of the ref actions that have occured to it. We never 73 * free it until we unmount the file system in order to make sure re-allocations 74 * are happening properly. 75 */ 76 struct block_entry { 77 u64 bytenr; 78 u64 len; 79 u64 num_refs; 80 int metadata; 81 int from_disk; 82 struct rb_root roots; 83 struct rb_root refs; 84 struct rb_node node; 85 struct list_head actions; 86 }; 87 88 static struct block_entry *insert_block_entry(struct rb_root *root, 89 struct block_entry *be) 90 { 91 struct rb_node **p = &root->rb_node; 92 struct rb_node *parent_node = NULL; 93 struct block_entry *entry; 94 95 while (*p) { 96 parent_node = *p; 97 entry = rb_entry(parent_node, struct block_entry, node); 98 if (entry->bytenr > be->bytenr) 99 p = &(*p)->rb_left; 100 else if (entry->bytenr < be->bytenr) 101 p = &(*p)->rb_right; 102 else 103 return entry; 104 } 105 106 rb_link_node(&be->node, parent_node, p); 107 rb_insert_color(&be->node, root); 108 return NULL; 109 } 110 111 static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) 112 { 113 struct rb_node *n; 114 struct block_entry *entry = NULL; 115 116 n = root->rb_node; 117 while (n) { 118 entry = rb_entry(n, struct block_entry, node); 119 if (entry->bytenr < bytenr) 120 n = n->rb_right; 121 else if (entry->bytenr > bytenr) 122 n = n->rb_left; 123 else 124 return entry; 125 } 126 return NULL; 127 } 128 129 static struct root_entry *insert_root_entry(struct rb_root *root, 130 struct root_entry *re) 131 { 132 struct rb_node **p = &root->rb_node; 133 struct rb_node *parent_node = NULL; 134 struct root_entry *entry; 135 136 while (*p) { 137 parent_node = *p; 138 entry = rb_entry(parent_node, struct root_entry, node); 139 if (entry->root_objectid > re->root_objectid) 140 p = &(*p)->rb_left; 141 else if (entry->root_objectid < re->root_objectid) 142 p = &(*p)->rb_right; 143 else 144 return entry; 145 } 146 147 rb_link_node(&re->node, parent_node, p); 148 rb_insert_color(&re->node, root); 149 return NULL; 150 151 } 152 153 static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) 154 { 155 if (ref1->root_objectid < ref2->root_objectid) 156 return -1; 157 if (ref1->root_objectid > ref2->root_objectid) 158 return 1; 159 if (ref1->parent < ref2->parent) 160 return -1; 161 if (ref1->parent > ref2->parent) 162 return 1; 163 if (ref1->owner < ref2->owner) 164 return -1; 165 if (ref1->owner > ref2->owner) 166 return 1; 167 if (ref1->offset < ref2->offset) 168 return -1; 169 if (ref1->offset > ref2->offset) 170 return 1; 171 return 0; 172 } 173 174 static struct ref_entry *insert_ref_entry(struct rb_root *root, 175 struct ref_entry *ref) 176 { 177 struct rb_node **p = &root->rb_node; 178 struct rb_node *parent_node = NULL; 179 struct ref_entry *entry; 180 int cmp; 181 182 while (*p) { 183 parent_node = *p; 184 entry = rb_entry(parent_node, struct ref_entry, node); 185 cmp = comp_refs(entry, ref); 186 if (cmp > 0) 187 p = &(*p)->rb_left; 188 else if (cmp < 0) 189 p = &(*p)->rb_right; 190 else 191 return entry; 192 } 193 194 rb_link_node(&ref->node, parent_node, p); 195 rb_insert_color(&ref->node, root); 196 return NULL; 197 198 } 199 200 static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) 201 { 202 struct rb_node *n; 203 struct root_entry *entry = NULL; 204 205 n = root->rb_node; 206 while (n) { 207 entry = rb_entry(n, struct root_entry, node); 208 if (entry->root_objectid < objectid) 209 n = n->rb_right; 210 else if (entry->root_objectid > objectid) 211 n = n->rb_left; 212 else 213 return entry; 214 } 215 return NULL; 216 } 217 218 #ifdef CONFIG_STACKTRACE 219 static void __save_stack_trace(struct ref_action *ra) 220 { 221 struct stack_trace stack_trace; 222 223 stack_trace.max_entries = MAX_TRACE; 224 stack_trace.nr_entries = 0; 225 stack_trace.entries = ra->trace; 226 stack_trace.skip = 2; 227 save_stack_trace(&stack_trace); 228 ra->trace_len = stack_trace.nr_entries; 229 } 230 231 static void __print_stack_trace(struct btrfs_fs_info *fs_info, 232 struct ref_action *ra) 233 { 234 struct stack_trace trace; 235 236 if (ra->trace_len == 0) { 237 btrfs_err(fs_info, " ref-verify: no stacktrace"); 238 return; 239 } 240 trace.nr_entries = ra->trace_len; 241 trace.entries = ra->trace; 242 print_stack_trace(&trace, 2); 243 } 244 #else 245 static void inline __save_stack_trace(struct ref_action *ra) 246 { 247 } 248 249 static void inline __print_stack_trace(struct btrfs_fs_info *fs_info, 250 struct ref_action *ra) 251 { 252 btrfs_err(fs_info, " ref-verify: no stacktrace support"); 253 } 254 #endif 255 256 static void free_block_entry(struct block_entry *be) 257 { 258 struct root_entry *re; 259 struct ref_entry *ref; 260 struct ref_action *ra; 261 struct rb_node *n; 262 263 while ((n = rb_first(&be->roots))) { 264 re = rb_entry(n, struct root_entry, node); 265 rb_erase(&re->node, &be->roots); 266 kfree(re); 267 } 268 269 while((n = rb_first(&be->refs))) { 270 ref = rb_entry(n, struct ref_entry, node); 271 rb_erase(&ref->node, &be->refs); 272 kfree(ref); 273 } 274 275 while (!list_empty(&be->actions)) { 276 ra = list_first_entry(&be->actions, struct ref_action, 277 list); 278 list_del(&ra->list); 279 kfree(ra); 280 } 281 kfree(be); 282 } 283 284 static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, 285 u64 bytenr, u64 len, 286 u64 root_objectid) 287 { 288 struct block_entry *be = NULL, *exist; 289 struct root_entry *re = NULL; 290 291 re = kzalloc(sizeof(struct root_entry), GFP_KERNEL); 292 be = kzalloc(sizeof(struct block_entry), GFP_KERNEL); 293 if (!be || !re) { 294 kfree(re); 295 kfree(be); 296 return ERR_PTR(-ENOMEM); 297 } 298 be->bytenr = bytenr; 299 be->len = len; 300 301 re->root_objectid = root_objectid; 302 re->num_refs = 0; 303 304 spin_lock(&fs_info->ref_verify_lock); 305 exist = insert_block_entry(&fs_info->block_tree, be); 306 if (exist) { 307 if (root_objectid) { 308 struct root_entry *exist_re; 309 310 exist_re = insert_root_entry(&exist->roots, re); 311 if (exist_re) 312 kfree(re); 313 } 314 kfree(be); 315 return exist; 316 } 317 318 be->num_refs = 0; 319 be->metadata = 0; 320 be->from_disk = 0; 321 be->roots = RB_ROOT; 322 be->refs = RB_ROOT; 323 INIT_LIST_HEAD(&be->actions); 324 if (root_objectid) 325 insert_root_entry(&be->roots, re); 326 else 327 kfree(re); 328 return be; 329 } 330 331 static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, 332 u64 parent, u64 bytenr, int level) 333 { 334 struct block_entry *be; 335 struct root_entry *re; 336 struct ref_entry *ref = NULL, *exist; 337 338 ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL); 339 if (!ref) 340 return -ENOMEM; 341 342 if (parent) 343 ref->root_objectid = 0; 344 else 345 ref->root_objectid = ref_root; 346 ref->parent = parent; 347 ref->owner = level; 348 ref->offset = 0; 349 ref->num_refs = 1; 350 351 be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); 352 if (IS_ERR(be)) { 353 kfree(ref); 354 return PTR_ERR(be); 355 } 356 be->num_refs++; 357 be->from_disk = 1; 358 be->metadata = 1; 359 360 if (!parent) { 361 ASSERT(ref_root); 362 re = lookup_root_entry(&be->roots, ref_root); 363 ASSERT(re); 364 re->num_refs++; 365 } 366 exist = insert_ref_entry(&be->refs, ref); 367 if (exist) { 368 exist->num_refs++; 369 kfree(ref); 370 } 371 spin_unlock(&fs_info->ref_verify_lock); 372 373 return 0; 374 } 375 376 static int add_shared_data_ref(struct btrfs_fs_info *fs_info, 377 u64 parent, u32 num_refs, u64 bytenr, 378 u64 num_bytes) 379 { 380 struct block_entry *be; 381 struct ref_entry *ref; 382 383 ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); 384 if (!ref) 385 return -ENOMEM; 386 be = add_block_entry(fs_info, bytenr, num_bytes, 0); 387 if (IS_ERR(be)) { 388 kfree(ref); 389 return PTR_ERR(be); 390 } 391 be->num_refs += num_refs; 392 393 ref->parent = parent; 394 ref->num_refs = num_refs; 395 if (insert_ref_entry(&be->refs, ref)) { 396 spin_unlock(&fs_info->ref_verify_lock); 397 btrfs_err(fs_info, "existing shared ref when reading from disk?"); 398 kfree(ref); 399 return -EINVAL; 400 } 401 spin_unlock(&fs_info->ref_verify_lock); 402 return 0; 403 } 404 405 static int add_extent_data_ref(struct btrfs_fs_info *fs_info, 406 struct extent_buffer *leaf, 407 struct btrfs_extent_data_ref *dref, 408 u64 bytenr, u64 num_bytes) 409 { 410 struct block_entry *be; 411 struct ref_entry *ref; 412 struct root_entry *re; 413 u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); 414 u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); 415 u64 offset = btrfs_extent_data_ref_offset(leaf, dref); 416 u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); 417 418 ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); 419 if (!ref) 420 return -ENOMEM; 421 be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); 422 if (IS_ERR(be)) { 423 kfree(ref); 424 return PTR_ERR(be); 425 } 426 be->num_refs += num_refs; 427 428 ref->parent = 0; 429 ref->owner = owner; 430 ref->root_objectid = ref_root; 431 ref->offset = offset; 432 ref->num_refs = num_refs; 433 if (insert_ref_entry(&be->refs, ref)) { 434 spin_unlock(&fs_info->ref_verify_lock); 435 btrfs_err(fs_info, "existing ref when reading from disk?"); 436 kfree(ref); 437 return -EINVAL; 438 } 439 440 re = lookup_root_entry(&be->roots, ref_root); 441 if (!re) { 442 spin_unlock(&fs_info->ref_verify_lock); 443 btrfs_err(fs_info, "missing root in new block entry?"); 444 return -EINVAL; 445 } 446 re->num_refs += num_refs; 447 spin_unlock(&fs_info->ref_verify_lock); 448 return 0; 449 } 450 451 static int process_extent_item(struct btrfs_fs_info *fs_info, 452 struct btrfs_path *path, struct btrfs_key *key, 453 int slot, int *tree_block_level) 454 { 455 struct btrfs_extent_item *ei; 456 struct btrfs_extent_inline_ref *iref; 457 struct btrfs_extent_data_ref *dref; 458 struct btrfs_shared_data_ref *sref; 459 struct extent_buffer *leaf = path->nodes[0]; 460 u32 item_size = btrfs_item_size_nr(leaf, slot); 461 unsigned long end, ptr; 462 u64 offset, flags, count; 463 int type, ret; 464 465 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 466 flags = btrfs_extent_flags(leaf, ei); 467 468 if ((key->type == BTRFS_EXTENT_ITEM_KEY) && 469 flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 470 struct btrfs_tree_block_info *info; 471 472 info = (struct btrfs_tree_block_info *)(ei + 1); 473 *tree_block_level = btrfs_tree_block_level(leaf, info); 474 iref = (struct btrfs_extent_inline_ref *)(info + 1); 475 } else { 476 if (key->type == BTRFS_METADATA_ITEM_KEY) 477 *tree_block_level = key->offset; 478 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 479 } 480 481 ptr = (unsigned long)iref; 482 end = (unsigned long)ei + item_size; 483 while (ptr < end) { 484 iref = (struct btrfs_extent_inline_ref *)ptr; 485 type = btrfs_extent_inline_ref_type(leaf, iref); 486 offset = btrfs_extent_inline_ref_offset(leaf, iref); 487 switch (type) { 488 case BTRFS_TREE_BLOCK_REF_KEY: 489 ret = add_tree_block(fs_info, offset, 0, key->objectid, 490 *tree_block_level); 491 break; 492 case BTRFS_SHARED_BLOCK_REF_KEY: 493 ret = add_tree_block(fs_info, 0, offset, key->objectid, 494 *tree_block_level); 495 break; 496 case BTRFS_EXTENT_DATA_REF_KEY: 497 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 498 ret = add_extent_data_ref(fs_info, leaf, dref, 499 key->objectid, key->offset); 500 break; 501 case BTRFS_SHARED_DATA_REF_KEY: 502 sref = (struct btrfs_shared_data_ref *)(iref + 1); 503 count = btrfs_shared_data_ref_count(leaf, sref); 504 ret = add_shared_data_ref(fs_info, offset, count, 505 key->objectid, key->offset); 506 break; 507 default: 508 btrfs_err(fs_info, "invalid key type in iref"); 509 ret = -EINVAL; 510 break; 511 } 512 if (ret) 513 break; 514 ptr += btrfs_extent_inline_ref_size(type); 515 } 516 return ret; 517 } 518 519 static int process_leaf(struct btrfs_root *root, 520 struct btrfs_path *path, u64 *bytenr, u64 *num_bytes) 521 { 522 struct btrfs_fs_info *fs_info = root->fs_info; 523 struct extent_buffer *leaf = path->nodes[0]; 524 struct btrfs_extent_data_ref *dref; 525 struct btrfs_shared_data_ref *sref; 526 u32 count; 527 int i = 0, tree_block_level = 0, ret; 528 struct btrfs_key key; 529 int nritems = btrfs_header_nritems(leaf); 530 531 for (i = 0; i < nritems; i++) { 532 btrfs_item_key_to_cpu(leaf, &key, i); 533 switch (key.type) { 534 case BTRFS_EXTENT_ITEM_KEY: 535 *num_bytes = key.offset; 536 case BTRFS_METADATA_ITEM_KEY: 537 *bytenr = key.objectid; 538 ret = process_extent_item(fs_info, path, &key, i, 539 &tree_block_level); 540 break; 541 case BTRFS_TREE_BLOCK_REF_KEY: 542 ret = add_tree_block(fs_info, key.offset, 0, 543 key.objectid, tree_block_level); 544 break; 545 case BTRFS_SHARED_BLOCK_REF_KEY: 546 ret = add_tree_block(fs_info, 0, key.offset, 547 key.objectid, tree_block_level); 548 break; 549 case BTRFS_EXTENT_DATA_REF_KEY: 550 dref = btrfs_item_ptr(leaf, i, 551 struct btrfs_extent_data_ref); 552 ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, 553 *num_bytes); 554 break; 555 case BTRFS_SHARED_DATA_REF_KEY: 556 sref = btrfs_item_ptr(leaf, i, 557 struct btrfs_shared_data_ref); 558 count = btrfs_shared_data_ref_count(leaf, sref); 559 ret = add_shared_data_ref(fs_info, key.offset, count, 560 *bytenr, *num_bytes); 561 break; 562 default: 563 break; 564 } 565 if (ret) 566 break; 567 } 568 return ret; 569 } 570 571 /* Walk down to the leaf from the given level */ 572 static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, 573 int level, u64 *bytenr, u64 *num_bytes) 574 { 575 struct btrfs_fs_info *fs_info = root->fs_info; 576 struct extent_buffer *eb; 577 u64 block_bytenr, gen; 578 int ret = 0; 579 580 while (level >= 0) { 581 if (level) { 582 block_bytenr = btrfs_node_blockptr(path->nodes[level], 583 path->slots[level]); 584 gen = btrfs_node_ptr_generation(path->nodes[level], 585 path->slots[level]); 586 eb = read_tree_block(fs_info, block_bytenr, gen); 587 if (IS_ERR(eb)) 588 return PTR_ERR(eb); 589 if (!extent_buffer_uptodate(eb)) { 590 free_extent_buffer(eb); 591 return -EIO; 592 } 593 btrfs_tree_read_lock(eb); 594 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 595 path->nodes[level-1] = eb; 596 path->slots[level-1] = 0; 597 path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING; 598 } else { 599 ret = process_leaf(root, path, bytenr, num_bytes); 600 if (ret) 601 break; 602 } 603 level--; 604 } 605 return ret; 606 } 607 608 /* Walk up to the next node that needs to be processed */ 609 static int walk_up_tree(struct btrfs_path *path, int *level) 610 { 611 int l; 612 613 for (l = 0; l < BTRFS_MAX_LEVEL; l++) { 614 if (!path->nodes[l]) 615 continue; 616 if (l) { 617 path->slots[l]++; 618 if (path->slots[l] < 619 btrfs_header_nritems(path->nodes[l])) { 620 *level = l; 621 return 0; 622 } 623 } 624 btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); 625 free_extent_buffer(path->nodes[l]); 626 path->nodes[l] = NULL; 627 path->slots[l] = 0; 628 path->locks[l] = 0; 629 } 630 631 return 1; 632 } 633 634 static void dump_ref_action(struct btrfs_fs_info *fs_info, 635 struct ref_action *ra) 636 { 637 btrfs_err(fs_info, 638 " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 639 ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, 640 ra->ref.owner, ra->ref.offset, ra->ref.num_refs); 641 __print_stack_trace(fs_info, ra); 642 } 643 644 /* 645 * Dumps all the information from the block entry to printk, it's going to be 646 * awesome. 647 */ 648 static void dump_block_entry(struct btrfs_fs_info *fs_info, 649 struct block_entry *be) 650 { 651 struct ref_entry *ref; 652 struct root_entry *re; 653 struct ref_action *ra; 654 struct rb_node *n; 655 656 btrfs_err(fs_info, 657 "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", 658 be->bytenr, be->len, be->num_refs, be->metadata, 659 be->from_disk); 660 661 for (n = rb_first(&be->refs); n; n = rb_next(n)) { 662 ref = rb_entry(n, struct ref_entry, node); 663 btrfs_err(fs_info, 664 " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", 665 ref->root_objectid, ref->parent, ref->owner, 666 ref->offset, ref->num_refs); 667 } 668 669 for (n = rb_first(&be->roots); n; n = rb_next(n)) { 670 re = rb_entry(n, struct root_entry, node); 671 btrfs_err(fs_info, " root entry %llu, num_refs %llu", 672 re->root_objectid, re->num_refs); 673 } 674 675 list_for_each_entry(ra, &be->actions, list) 676 dump_ref_action(fs_info, ra); 677 } 678 679 /* 680 * btrfs_ref_tree_mod: called when we modify a ref for a bytenr 681 * @root: the root we are making this modification from. 682 * @bytenr: the bytenr we are modifying. 683 * @num_bytes: number of bytes. 684 * @parent: the parent bytenr. 685 * @ref_root: the original root owner of the bytenr. 686 * @owner: level in the case of metadata, inode in the case of data. 687 * @offset: 0 for metadata, file offset for data. 688 * @action: the action that we are doing, this is the same as the delayed ref 689 * action. 690 * 691 * This will add an action item to the given bytenr and do sanity checks to make 692 * sure we haven't messed something up. If we are making a new allocation and 693 * this block entry has history we will delete all previous actions as long as 694 * our sanity checks pass as they are no longer needed. 695 */ 696 int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, 697 u64 parent, u64 ref_root, u64 owner, u64 offset, 698 int action) 699 { 700 struct btrfs_fs_info *fs_info = root->fs_info; 701 struct ref_entry *ref = NULL, *exist; 702 struct ref_action *ra = NULL; 703 struct block_entry *be = NULL; 704 struct root_entry *re = NULL; 705 int ret = 0; 706 bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; 707 708 if (!btrfs_test_opt(root->fs_info, REF_VERIFY)) 709 return 0; 710 711 ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); 712 ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); 713 if (!ra || !ref) { 714 kfree(ref); 715 kfree(ra); 716 ret = -ENOMEM; 717 goto out; 718 } 719 720 if (parent) { 721 ref->parent = parent; 722 } else { 723 ref->root_objectid = ref_root; 724 ref->owner = owner; 725 ref->offset = offset; 726 } 727 ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; 728 729 memcpy(&ra->ref, ref, sizeof(struct ref_entry)); 730 /* 731 * Save the extra info from the delayed ref in the ref action to make it 732 * easier to figure out what is happening. The real ref's we add to the 733 * ref tree need to reflect what we save on disk so it matches any 734 * on-disk refs we pre-loaded. 735 */ 736 ra->ref.owner = owner; 737 ra->ref.offset = offset; 738 ra->ref.root_objectid = ref_root; 739 __save_stack_trace(ra); 740 741 INIT_LIST_HEAD(&ra->list); 742 ra->action = action; 743 ra->root = root->objectid; 744 745 /* 746 * This is an allocation, preallocate the block_entry in case we haven't 747 * used it before. 748 */ 749 ret = -EINVAL; 750 if (action == BTRFS_ADD_DELAYED_EXTENT) { 751 /* 752 * For subvol_create we'll just pass in whatever the parent root 753 * is and the new root objectid, so let's not treat the passed 754 * in root as if it really has a ref for this bytenr. 755 */ 756 be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root); 757 if (IS_ERR(be)) { 758 kfree(ra); 759 ret = PTR_ERR(be); 760 goto out; 761 } 762 be->num_refs++; 763 if (metadata) 764 be->metadata = 1; 765 766 if (be->num_refs != 1) { 767 btrfs_err(fs_info, 768 "re-allocated a block that still has references to it!"); 769 dump_block_entry(fs_info, be); 770 dump_ref_action(fs_info, ra); 771 goto out_unlock; 772 } 773 774 while (!list_empty(&be->actions)) { 775 struct ref_action *tmp; 776 777 tmp = list_first_entry(&be->actions, struct ref_action, 778 list); 779 list_del(&tmp->list); 780 kfree(tmp); 781 } 782 } else { 783 struct root_entry *tmp; 784 785 if (!parent) { 786 re = kmalloc(sizeof(struct root_entry), GFP_NOFS); 787 if (!re) { 788 kfree(ref); 789 kfree(ra); 790 ret = -ENOMEM; 791 goto out; 792 } 793 /* 794 * This is the root that is modifying us, so it's the 795 * one we want to lookup below when we modify the 796 * re->num_refs. 797 */ 798 ref_root = root->objectid; 799 re->root_objectid = root->objectid; 800 re->num_refs = 0; 801 } 802 803 spin_lock(&root->fs_info->ref_verify_lock); 804 be = lookup_block_entry(&root->fs_info->block_tree, bytenr); 805 if (!be) { 806 btrfs_err(fs_info, 807 "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", 808 action, (unsigned long long)bytenr, 809 (unsigned long long)num_bytes); 810 dump_ref_action(fs_info, ra); 811 kfree(ref); 812 kfree(ra); 813 goto out_unlock; 814 } 815 816 if (!parent) { 817 tmp = insert_root_entry(&be->roots, re); 818 if (tmp) { 819 kfree(re); 820 re = tmp; 821 } 822 } 823 } 824 825 exist = insert_ref_entry(&be->refs, ref); 826 if (exist) { 827 if (action == BTRFS_DROP_DELAYED_REF) { 828 if (exist->num_refs == 0) { 829 btrfs_err(fs_info, 830 "dropping a ref for a existing root that doesn't have a ref on the block"); 831 dump_block_entry(fs_info, be); 832 dump_ref_action(fs_info, ra); 833 kfree(ra); 834 goto out_unlock; 835 } 836 exist->num_refs--; 837 if (exist->num_refs == 0) { 838 rb_erase(&exist->node, &be->refs); 839 kfree(exist); 840 } 841 } else if (!be->metadata) { 842 exist->num_refs++; 843 } else { 844 btrfs_err(fs_info, 845 "attempting to add another ref for an existing ref on a tree block"); 846 dump_block_entry(fs_info, be); 847 dump_ref_action(fs_info, ra); 848 kfree(ra); 849 goto out_unlock; 850 } 851 kfree(ref); 852 } else { 853 if (action == BTRFS_DROP_DELAYED_REF) { 854 btrfs_err(fs_info, 855 "dropping a ref for a root that doesn't have a ref on the block"); 856 dump_block_entry(fs_info, be); 857 dump_ref_action(fs_info, ra); 858 kfree(ra); 859 goto out_unlock; 860 } 861 } 862 863 if (!parent && !re) { 864 re = lookup_root_entry(&be->roots, ref_root); 865 if (!re) { 866 /* 867 * This shouldn't happen because we will add our re 868 * above when we lookup the be with !parent, but just in 869 * case catch this case so we don't panic because I 870 * didn't thik of some other corner case. 871 */ 872 btrfs_err(fs_info, "failed to find root %llu for %llu", 873 root->objectid, be->bytenr); 874 dump_block_entry(fs_info, be); 875 dump_ref_action(fs_info, ra); 876 kfree(ra); 877 goto out_unlock; 878 } 879 } 880 if (action == BTRFS_DROP_DELAYED_REF) { 881 if (re) 882 re->num_refs--; 883 be->num_refs--; 884 } else if (action == BTRFS_ADD_DELAYED_REF) { 885 be->num_refs++; 886 if (re) 887 re->num_refs++; 888 } 889 list_add_tail(&ra->list, &be->actions); 890 ret = 0; 891 out_unlock: 892 spin_unlock(&root->fs_info->ref_verify_lock); 893 out: 894 if (ret) 895 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 896 return ret; 897 } 898 899 /* Free up the ref cache */ 900 void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) 901 { 902 struct block_entry *be; 903 struct rb_node *n; 904 905 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 906 return; 907 908 spin_lock(&fs_info->ref_verify_lock); 909 while ((n = rb_first(&fs_info->block_tree))) { 910 be = rb_entry(n, struct block_entry, node); 911 rb_erase(&be->node, &fs_info->block_tree); 912 free_block_entry(be); 913 cond_resched_lock(&fs_info->ref_verify_lock); 914 } 915 spin_unlock(&fs_info->ref_verify_lock); 916 } 917 918 void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, 919 u64 len) 920 { 921 struct block_entry *be = NULL, *entry; 922 struct rb_node *n; 923 924 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 925 return; 926 927 spin_lock(&fs_info->ref_verify_lock); 928 n = fs_info->block_tree.rb_node; 929 while (n) { 930 entry = rb_entry(n, struct block_entry, node); 931 if (entry->bytenr < start) { 932 n = n->rb_right; 933 } else if (entry->bytenr > start) { 934 n = n->rb_left; 935 } else { 936 be = entry; 937 break; 938 } 939 /* We want to get as close to start as possible */ 940 if (be == NULL || 941 (entry->bytenr < start && be->bytenr > start) || 942 (entry->bytenr < start && entry->bytenr > be->bytenr)) 943 be = entry; 944 } 945 946 /* 947 * Could have an empty block group, maybe have something to check for 948 * this case to verify we were actually empty? 949 */ 950 if (!be) { 951 spin_unlock(&fs_info->ref_verify_lock); 952 return; 953 } 954 955 n = &be->node; 956 while (n) { 957 be = rb_entry(n, struct block_entry, node); 958 n = rb_next(n); 959 if (be->bytenr < start && be->bytenr + be->len > start) { 960 btrfs_err(fs_info, 961 "block entry overlaps a block group [%llu,%llu]!", 962 start, len); 963 dump_block_entry(fs_info, be); 964 continue; 965 } 966 if (be->bytenr < start) 967 continue; 968 if (be->bytenr >= start + len) 969 break; 970 if (be->bytenr + be->len > start + len) { 971 btrfs_err(fs_info, 972 "block entry overlaps a block group [%llu,%llu]!", 973 start, len); 974 dump_block_entry(fs_info, be); 975 } 976 rb_erase(&be->node, &fs_info->block_tree); 977 free_block_entry(be); 978 } 979 spin_unlock(&fs_info->ref_verify_lock); 980 } 981 982 /* Walk down all roots and build the ref tree, meant to be called at mount */ 983 int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) 984 { 985 struct btrfs_path *path; 986 struct extent_buffer *eb; 987 u64 bytenr = 0, num_bytes = 0; 988 int ret, level; 989 990 if (!btrfs_test_opt(fs_info, REF_VERIFY)) 991 return 0; 992 993 path = btrfs_alloc_path(); 994 if (!path) 995 return -ENOMEM; 996 997 eb = btrfs_read_lock_root_node(fs_info->extent_root); 998 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 999 level = btrfs_header_level(eb); 1000 path->nodes[level] = eb; 1001 path->slots[level] = 0; 1002 path->locks[level] = BTRFS_READ_LOCK_BLOCKING; 1003 1004 while (1) { 1005 /* 1006 * We have to keep track of the bytenr/num_bytes we last hit 1007 * because we could have run out of space for an inline ref, and 1008 * would have had to added a ref key item which may appear on a 1009 * different leaf from the original extent item. 1010 */ 1011 ret = walk_down_tree(fs_info->extent_root, path, level, 1012 &bytenr, &num_bytes); 1013 if (ret) 1014 break; 1015 ret = walk_up_tree(path, &level); 1016 if (ret < 0) 1017 break; 1018 if (ret > 0) { 1019 ret = 0; 1020 break; 1021 } 1022 } 1023 if (ret) { 1024 btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); 1025 btrfs_free_ref_cache(fs_info); 1026 } 1027 btrfs_free_path(path); 1028 return ret; 1029 } 1030