1 /* 2 * Copyright (C) 2009 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include "ctree.h" 26 #include "disk-io.h" 27 #include "transaction.h" 28 #include "volumes.h" 29 #include "locking.h" 30 #include "btrfs_inode.h" 31 #include "async-thread.h" 32 #include "free-space-cache.h" 33 #include "inode-map.h" 34 35 /* 36 * backref_node, mapping_node and tree_block start with this 37 */ 38 struct tree_entry { 39 struct rb_node rb_node; 40 u64 bytenr; 41 }; 42 43 /* 44 * present a tree block in the backref cache 45 */ 46 struct backref_node { 47 struct rb_node rb_node; 48 u64 bytenr; 49 50 u64 new_bytenr; 51 /* objectid of tree block owner, can be not uptodate */ 52 u64 owner; 53 /* link to pending, changed or detached list */ 54 struct list_head list; 55 /* list of upper level blocks reference this block */ 56 struct list_head upper; 57 /* list of child blocks in the cache */ 58 struct list_head lower; 59 /* NULL if this node is not tree root */ 60 struct btrfs_root *root; 61 /* extent buffer got by COW the block */ 62 struct extent_buffer *eb; 63 /* level of tree block */ 64 unsigned int level:8; 65 /* is the block in non-reference counted tree */ 66 unsigned int cowonly:1; 67 /* 1 if no child node in the cache */ 68 unsigned int lowest:1; 69 /* is the extent buffer locked */ 70 unsigned int locked:1; 71 /* has the block been processed */ 72 unsigned int processed:1; 73 /* have backrefs of this block been checked */ 74 unsigned int checked:1; 75 /* 76 * 1 if corresponding block has been cowed but some upper 77 * level block pointers may not point to the new location 78 */ 79 unsigned int pending:1; 80 /* 81 * 1 if the backref node isn't connected to any other 82 * backref node. 83 */ 84 unsigned int detached:1; 85 }; 86 87 /* 88 * present a block pointer in the backref cache 89 */ 90 struct backref_edge { 91 struct list_head list[2]; 92 struct backref_node *node[2]; 93 }; 94 95 #define LOWER 0 96 #define UPPER 1 97 #define RELOCATION_RESERVED_NODES 256 98 99 struct backref_cache { 100 /* red black tree of all backref nodes in the cache */ 101 struct rb_root rb_root; 102 /* for passing backref nodes to btrfs_reloc_cow_block */ 103 struct backref_node *path[BTRFS_MAX_LEVEL]; 104 /* 105 * list of blocks that have been cowed but some block 106 * pointers in upper level blocks may not reflect the 107 * new location 108 */ 109 struct list_head pending[BTRFS_MAX_LEVEL]; 110 /* list of backref nodes with no child node */ 111 struct list_head leaves; 112 /* list of blocks that have been cowed in current transaction */ 113 struct list_head changed; 114 /* list of detached backref node. */ 115 struct list_head detached; 116 117 u64 last_trans; 118 119 int nr_nodes; 120 int nr_edges; 121 }; 122 123 /* 124 * map address of tree root to tree 125 */ 126 struct mapping_node { 127 struct rb_node rb_node; 128 u64 bytenr; 129 void *data; 130 }; 131 132 struct mapping_tree { 133 struct rb_root rb_root; 134 spinlock_t lock; 135 }; 136 137 /* 138 * present a tree block to process 139 */ 140 struct tree_block { 141 struct rb_node rb_node; 142 u64 bytenr; 143 struct btrfs_key key; 144 unsigned int level:8; 145 unsigned int key_ready:1; 146 }; 147 148 #define MAX_EXTENTS 128 149 150 struct file_extent_cluster { 151 u64 start; 152 u64 end; 153 u64 boundary[MAX_EXTENTS]; 154 unsigned int nr; 155 }; 156 157 struct reloc_control { 158 /* block group to relocate */ 159 struct btrfs_block_group_cache *block_group; 160 /* extent tree */ 161 struct btrfs_root *extent_root; 162 /* inode for moving data */ 163 struct inode *data_inode; 164 165 struct btrfs_block_rsv *block_rsv; 166 167 struct backref_cache backref_cache; 168 169 struct file_extent_cluster cluster; 170 /* tree blocks have been processed */ 171 struct extent_io_tree processed_blocks; 172 /* map start of tree root to corresponding reloc tree */ 173 struct mapping_tree reloc_root_tree; 174 /* list of reloc trees */ 175 struct list_head reloc_roots; 176 /* size of metadata reservation for merging reloc trees */ 177 u64 merging_rsv_size; 178 /* size of relocated tree nodes */ 179 u64 nodes_relocated; 180 /* reserved size for block group relocation*/ 181 u64 reserved_bytes; 182 183 u64 search_start; 184 u64 extents_found; 185 186 unsigned int stage:8; 187 unsigned int create_reloc_tree:1; 188 unsigned int merge_reloc_tree:1; 189 unsigned int found_file_extent:1; 190 }; 191 192 /* stages of data relocation */ 193 #define MOVE_DATA_EXTENTS 0 194 #define UPDATE_DATA_PTRS 1 195 196 static void remove_backref_node(struct backref_cache *cache, 197 struct backref_node *node); 198 static void __mark_block_processed(struct reloc_control *rc, 199 struct backref_node *node); 200 201 static void mapping_tree_init(struct mapping_tree *tree) 202 { 203 tree->rb_root = RB_ROOT; 204 spin_lock_init(&tree->lock); 205 } 206 207 static void backref_cache_init(struct backref_cache *cache) 208 { 209 int i; 210 cache->rb_root = RB_ROOT; 211 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 212 INIT_LIST_HEAD(&cache->pending[i]); 213 INIT_LIST_HEAD(&cache->changed); 214 INIT_LIST_HEAD(&cache->detached); 215 INIT_LIST_HEAD(&cache->leaves); 216 } 217 218 static void backref_cache_cleanup(struct backref_cache *cache) 219 { 220 struct backref_node *node; 221 int i; 222 223 while (!list_empty(&cache->detached)) { 224 node = list_entry(cache->detached.next, 225 struct backref_node, list); 226 remove_backref_node(cache, node); 227 } 228 229 while (!list_empty(&cache->leaves)) { 230 node = list_entry(cache->leaves.next, 231 struct backref_node, lower); 232 remove_backref_node(cache, node); 233 } 234 235 cache->last_trans = 0; 236 237 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 238 ASSERT(list_empty(&cache->pending[i])); 239 ASSERT(list_empty(&cache->changed)); 240 ASSERT(list_empty(&cache->detached)); 241 ASSERT(RB_EMPTY_ROOT(&cache->rb_root)); 242 ASSERT(!cache->nr_nodes); 243 ASSERT(!cache->nr_edges); 244 } 245 246 static struct backref_node *alloc_backref_node(struct backref_cache *cache) 247 { 248 struct backref_node *node; 249 250 node = kzalloc(sizeof(*node), GFP_NOFS); 251 if (node) { 252 INIT_LIST_HEAD(&node->list); 253 INIT_LIST_HEAD(&node->upper); 254 INIT_LIST_HEAD(&node->lower); 255 RB_CLEAR_NODE(&node->rb_node); 256 cache->nr_nodes++; 257 } 258 return node; 259 } 260 261 static void free_backref_node(struct backref_cache *cache, 262 struct backref_node *node) 263 { 264 if (node) { 265 cache->nr_nodes--; 266 kfree(node); 267 } 268 } 269 270 static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) 271 { 272 struct backref_edge *edge; 273 274 edge = kzalloc(sizeof(*edge), GFP_NOFS); 275 if (edge) 276 cache->nr_edges++; 277 return edge; 278 } 279 280 static void free_backref_edge(struct backref_cache *cache, 281 struct backref_edge *edge) 282 { 283 if (edge) { 284 cache->nr_edges--; 285 kfree(edge); 286 } 287 } 288 289 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 290 struct rb_node *node) 291 { 292 struct rb_node **p = &root->rb_node; 293 struct rb_node *parent = NULL; 294 struct tree_entry *entry; 295 296 while (*p) { 297 parent = *p; 298 entry = rb_entry(parent, struct tree_entry, rb_node); 299 300 if (bytenr < entry->bytenr) 301 p = &(*p)->rb_left; 302 else if (bytenr > entry->bytenr) 303 p = &(*p)->rb_right; 304 else 305 return parent; 306 } 307 308 rb_link_node(node, parent, p); 309 rb_insert_color(node, root); 310 return NULL; 311 } 312 313 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) 314 { 315 struct rb_node *n = root->rb_node; 316 struct tree_entry *entry; 317 318 while (n) { 319 entry = rb_entry(n, struct tree_entry, rb_node); 320 321 if (bytenr < entry->bytenr) 322 n = n->rb_left; 323 else if (bytenr > entry->bytenr) 324 n = n->rb_right; 325 else 326 return n; 327 } 328 return NULL; 329 } 330 331 static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) 332 { 333 334 struct btrfs_fs_info *fs_info = NULL; 335 struct backref_node *bnode = rb_entry(rb_node, struct backref_node, 336 rb_node); 337 if (bnode->root) 338 fs_info = bnode->root->fs_info; 339 btrfs_panic(fs_info, errno, "Inconsistency in backref cache " 340 "found at offset %llu", bytenr); 341 } 342 343 /* 344 * walk up backref nodes until reach node presents tree root 345 */ 346 static struct backref_node *walk_up_backref(struct backref_node *node, 347 struct backref_edge *edges[], 348 int *index) 349 { 350 struct backref_edge *edge; 351 int idx = *index; 352 353 while (!list_empty(&node->upper)) { 354 edge = list_entry(node->upper.next, 355 struct backref_edge, list[LOWER]); 356 edges[idx++] = edge; 357 node = edge->node[UPPER]; 358 } 359 BUG_ON(node->detached); 360 *index = idx; 361 return node; 362 } 363 364 /* 365 * walk down backref nodes to find start of next reference path 366 */ 367 static struct backref_node *walk_down_backref(struct backref_edge *edges[], 368 int *index) 369 { 370 struct backref_edge *edge; 371 struct backref_node *lower; 372 int idx = *index; 373 374 while (idx > 0) { 375 edge = edges[idx - 1]; 376 lower = edge->node[LOWER]; 377 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 378 idx--; 379 continue; 380 } 381 edge = list_entry(edge->list[LOWER].next, 382 struct backref_edge, list[LOWER]); 383 edges[idx - 1] = edge; 384 *index = idx; 385 return edge->node[UPPER]; 386 } 387 *index = 0; 388 return NULL; 389 } 390 391 static void unlock_node_buffer(struct backref_node *node) 392 { 393 if (node->locked) { 394 btrfs_tree_unlock(node->eb); 395 node->locked = 0; 396 } 397 } 398 399 static void drop_node_buffer(struct backref_node *node) 400 { 401 if (node->eb) { 402 unlock_node_buffer(node); 403 free_extent_buffer(node->eb); 404 node->eb = NULL; 405 } 406 } 407 408 static void drop_backref_node(struct backref_cache *tree, 409 struct backref_node *node) 410 { 411 BUG_ON(!list_empty(&node->upper)); 412 413 drop_node_buffer(node); 414 list_del(&node->list); 415 list_del(&node->lower); 416 if (!RB_EMPTY_NODE(&node->rb_node)) 417 rb_erase(&node->rb_node, &tree->rb_root); 418 free_backref_node(tree, node); 419 } 420 421 /* 422 * remove a backref node from the backref cache 423 */ 424 static void remove_backref_node(struct backref_cache *cache, 425 struct backref_node *node) 426 { 427 struct backref_node *upper; 428 struct backref_edge *edge; 429 430 if (!node) 431 return; 432 433 BUG_ON(!node->lowest && !node->detached); 434 while (!list_empty(&node->upper)) { 435 edge = list_entry(node->upper.next, struct backref_edge, 436 list[LOWER]); 437 upper = edge->node[UPPER]; 438 list_del(&edge->list[LOWER]); 439 list_del(&edge->list[UPPER]); 440 free_backref_edge(cache, edge); 441 442 if (RB_EMPTY_NODE(&upper->rb_node)) { 443 BUG_ON(!list_empty(&node->upper)); 444 drop_backref_node(cache, node); 445 node = upper; 446 node->lowest = 1; 447 continue; 448 } 449 /* 450 * add the node to leaf node list if no other 451 * child block cached. 452 */ 453 if (list_empty(&upper->lower)) { 454 list_add_tail(&upper->lower, &cache->leaves); 455 upper->lowest = 1; 456 } 457 } 458 459 drop_backref_node(cache, node); 460 } 461 462 static void update_backref_node(struct backref_cache *cache, 463 struct backref_node *node, u64 bytenr) 464 { 465 struct rb_node *rb_node; 466 rb_erase(&node->rb_node, &cache->rb_root); 467 node->bytenr = bytenr; 468 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 469 if (rb_node) 470 backref_tree_panic(rb_node, -EEXIST, bytenr); 471 } 472 473 /* 474 * update backref cache after a transaction commit 475 */ 476 static int update_backref_cache(struct btrfs_trans_handle *trans, 477 struct backref_cache *cache) 478 { 479 struct backref_node *node; 480 int level = 0; 481 482 if (cache->last_trans == 0) { 483 cache->last_trans = trans->transid; 484 return 0; 485 } 486 487 if (cache->last_trans == trans->transid) 488 return 0; 489 490 /* 491 * detached nodes are used to avoid unnecessary backref 492 * lookup. transaction commit changes the extent tree. 493 * so the detached nodes are no longer useful. 494 */ 495 while (!list_empty(&cache->detached)) { 496 node = list_entry(cache->detached.next, 497 struct backref_node, list); 498 remove_backref_node(cache, node); 499 } 500 501 while (!list_empty(&cache->changed)) { 502 node = list_entry(cache->changed.next, 503 struct backref_node, list); 504 list_del_init(&node->list); 505 BUG_ON(node->pending); 506 update_backref_node(cache, node, node->new_bytenr); 507 } 508 509 /* 510 * some nodes can be left in the pending list if there were 511 * errors during processing the pending nodes. 512 */ 513 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 514 list_for_each_entry(node, &cache->pending[level], list) { 515 BUG_ON(!node->pending); 516 if (node->bytenr == node->new_bytenr) 517 continue; 518 update_backref_node(cache, node, node->new_bytenr); 519 } 520 } 521 522 cache->last_trans = 0; 523 return 1; 524 } 525 526 527 static int should_ignore_root(struct btrfs_root *root) 528 { 529 struct btrfs_root *reloc_root; 530 531 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 532 return 0; 533 534 reloc_root = root->reloc_root; 535 if (!reloc_root) 536 return 0; 537 538 if (btrfs_root_last_snapshot(&reloc_root->root_item) == 539 root->fs_info->running_transaction->transid - 1) 540 return 0; 541 /* 542 * if there is reloc tree and it was created in previous 543 * transaction backref lookup can find the reloc tree, 544 * so backref node for the fs tree root is useless for 545 * relocation. 546 */ 547 return 1; 548 } 549 /* 550 * find reloc tree by address of tree root 551 */ 552 static struct btrfs_root *find_reloc_root(struct reloc_control *rc, 553 u64 bytenr) 554 { 555 struct rb_node *rb_node; 556 struct mapping_node *node; 557 struct btrfs_root *root = NULL; 558 559 spin_lock(&rc->reloc_root_tree.lock); 560 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); 561 if (rb_node) { 562 node = rb_entry(rb_node, struct mapping_node, rb_node); 563 root = (struct btrfs_root *)node->data; 564 } 565 spin_unlock(&rc->reloc_root_tree.lock); 566 return root; 567 } 568 569 static int is_cowonly_root(u64 root_objectid) 570 { 571 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 572 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 573 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 574 root_objectid == BTRFS_DEV_TREE_OBJECTID || 575 root_objectid == BTRFS_TREE_LOG_OBJECTID || 576 root_objectid == BTRFS_CSUM_TREE_OBJECTID || 577 root_objectid == BTRFS_UUID_TREE_OBJECTID || 578 root_objectid == BTRFS_QUOTA_TREE_OBJECTID || 579 root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) 580 return 1; 581 return 0; 582 } 583 584 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 585 u64 root_objectid) 586 { 587 struct btrfs_key key; 588 589 key.objectid = root_objectid; 590 key.type = BTRFS_ROOT_ITEM_KEY; 591 if (is_cowonly_root(root_objectid)) 592 key.offset = 0; 593 else 594 key.offset = (u64)-1; 595 596 return btrfs_get_fs_root(fs_info, &key, false); 597 } 598 599 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 600 static noinline_for_stack 601 struct btrfs_root *find_tree_root(struct reloc_control *rc, 602 struct extent_buffer *leaf, 603 struct btrfs_extent_ref_v0 *ref0) 604 { 605 struct btrfs_root *root; 606 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 607 u64 generation = btrfs_ref_generation_v0(leaf, ref0); 608 609 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 610 611 root = read_fs_root(rc->extent_root->fs_info, root_objectid); 612 BUG_ON(IS_ERR(root)); 613 614 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && 615 generation != btrfs_root_generation(&root->root_item)) 616 return NULL; 617 618 return root; 619 } 620 #endif 621 622 static noinline_for_stack 623 int find_inline_backref(struct extent_buffer *leaf, int slot, 624 unsigned long *ptr, unsigned long *end) 625 { 626 struct btrfs_key key; 627 struct btrfs_extent_item *ei; 628 struct btrfs_tree_block_info *bi; 629 u32 item_size; 630 631 btrfs_item_key_to_cpu(leaf, &key, slot); 632 633 item_size = btrfs_item_size_nr(leaf, slot); 634 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 635 if (item_size < sizeof(*ei)) { 636 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 637 return 1; 638 } 639 #endif 640 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 641 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 642 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 643 644 if (key.type == BTRFS_EXTENT_ITEM_KEY && 645 item_size <= sizeof(*ei) + sizeof(*bi)) { 646 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 647 return 1; 648 } 649 if (key.type == BTRFS_METADATA_ITEM_KEY && 650 item_size <= sizeof(*ei)) { 651 WARN_ON(item_size < sizeof(*ei)); 652 return 1; 653 } 654 655 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 656 bi = (struct btrfs_tree_block_info *)(ei + 1); 657 *ptr = (unsigned long)(bi + 1); 658 } else { 659 *ptr = (unsigned long)(ei + 1); 660 } 661 *end = (unsigned long)ei + item_size; 662 return 0; 663 } 664 665 /* 666 * build backref tree for a given tree block. root of the backref tree 667 * corresponds the tree block, leaves of the backref tree correspond 668 * roots of b-trees that reference the tree block. 669 * 670 * the basic idea of this function is check backrefs of a given block 671 * to find upper level blocks that reference the block, and then check 672 * backrefs of these upper level blocks recursively. the recursion stop 673 * when tree root is reached or backrefs for the block is cached. 674 * 675 * NOTE: if we find backrefs for a block are cached, we know backrefs 676 * for all upper level blocks that directly/indirectly reference the 677 * block are also cached. 678 */ 679 static noinline_for_stack 680 struct backref_node *build_backref_tree(struct reloc_control *rc, 681 struct btrfs_key *node_key, 682 int level, u64 bytenr) 683 { 684 struct backref_cache *cache = &rc->backref_cache; 685 struct btrfs_path *path1; 686 struct btrfs_path *path2; 687 struct extent_buffer *eb; 688 struct btrfs_root *root; 689 struct backref_node *cur; 690 struct backref_node *upper; 691 struct backref_node *lower; 692 struct backref_node *node = NULL; 693 struct backref_node *exist = NULL; 694 struct backref_edge *edge; 695 struct rb_node *rb_node; 696 struct btrfs_key key; 697 unsigned long end; 698 unsigned long ptr; 699 LIST_HEAD(list); 700 LIST_HEAD(useless); 701 int cowonly; 702 int ret; 703 int err = 0; 704 bool need_check = true; 705 706 path1 = btrfs_alloc_path(); 707 path2 = btrfs_alloc_path(); 708 if (!path1 || !path2) { 709 err = -ENOMEM; 710 goto out; 711 } 712 path1->reada = READA_FORWARD; 713 path2->reada = READA_FORWARD; 714 715 node = alloc_backref_node(cache); 716 if (!node) { 717 err = -ENOMEM; 718 goto out; 719 } 720 721 node->bytenr = bytenr; 722 node->level = level; 723 node->lowest = 1; 724 cur = node; 725 again: 726 end = 0; 727 ptr = 0; 728 key.objectid = cur->bytenr; 729 key.type = BTRFS_METADATA_ITEM_KEY; 730 key.offset = (u64)-1; 731 732 path1->search_commit_root = 1; 733 path1->skip_locking = 1; 734 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, 735 0, 0); 736 if (ret < 0) { 737 err = ret; 738 goto out; 739 } 740 ASSERT(ret); 741 ASSERT(path1->slots[0]); 742 743 path1->slots[0]--; 744 745 WARN_ON(cur->checked); 746 if (!list_empty(&cur->upper)) { 747 /* 748 * the backref was added previously when processing 749 * backref of type BTRFS_TREE_BLOCK_REF_KEY 750 */ 751 ASSERT(list_is_singular(&cur->upper)); 752 edge = list_entry(cur->upper.next, struct backref_edge, 753 list[LOWER]); 754 ASSERT(list_empty(&edge->list[UPPER])); 755 exist = edge->node[UPPER]; 756 /* 757 * add the upper level block to pending list if we need 758 * check its backrefs 759 */ 760 if (!exist->checked) 761 list_add_tail(&edge->list[UPPER], &list); 762 } else { 763 exist = NULL; 764 } 765 766 while (1) { 767 cond_resched(); 768 eb = path1->nodes[0]; 769 770 if (ptr >= end) { 771 if (path1->slots[0] >= btrfs_header_nritems(eb)) { 772 ret = btrfs_next_leaf(rc->extent_root, path1); 773 if (ret < 0) { 774 err = ret; 775 goto out; 776 } 777 if (ret > 0) 778 break; 779 eb = path1->nodes[0]; 780 } 781 782 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); 783 if (key.objectid != cur->bytenr) { 784 WARN_ON(exist); 785 break; 786 } 787 788 if (key.type == BTRFS_EXTENT_ITEM_KEY || 789 key.type == BTRFS_METADATA_ITEM_KEY) { 790 ret = find_inline_backref(eb, path1->slots[0], 791 &ptr, &end); 792 if (ret) 793 goto next; 794 } 795 } 796 797 if (ptr < end) { 798 /* update key for inline back ref */ 799 struct btrfs_extent_inline_ref *iref; 800 iref = (struct btrfs_extent_inline_ref *)ptr; 801 key.type = btrfs_extent_inline_ref_type(eb, iref); 802 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 803 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 804 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 805 } 806 807 if (exist && 808 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 809 exist->owner == key.offset) || 810 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 811 exist->bytenr == key.offset))) { 812 exist = NULL; 813 goto next; 814 } 815 816 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 817 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 818 key.type == BTRFS_EXTENT_REF_V0_KEY) { 819 if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 820 struct btrfs_extent_ref_v0 *ref0; 821 ref0 = btrfs_item_ptr(eb, path1->slots[0], 822 struct btrfs_extent_ref_v0); 823 if (key.objectid == key.offset) { 824 root = find_tree_root(rc, eb, ref0); 825 if (root && !should_ignore_root(root)) 826 cur->root = root; 827 else 828 list_add(&cur->list, &useless); 829 break; 830 } 831 if (is_cowonly_root(btrfs_ref_root_v0(eb, 832 ref0))) 833 cur->cowonly = 1; 834 } 835 #else 836 ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); 837 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 838 #endif 839 if (key.objectid == key.offset) { 840 /* 841 * only root blocks of reloc trees use 842 * backref of this type. 843 */ 844 root = find_reloc_root(rc, cur->bytenr); 845 ASSERT(root); 846 cur->root = root; 847 break; 848 } 849 850 edge = alloc_backref_edge(cache); 851 if (!edge) { 852 err = -ENOMEM; 853 goto out; 854 } 855 rb_node = tree_search(&cache->rb_root, key.offset); 856 if (!rb_node) { 857 upper = alloc_backref_node(cache); 858 if (!upper) { 859 free_backref_edge(cache, edge); 860 err = -ENOMEM; 861 goto out; 862 } 863 upper->bytenr = key.offset; 864 upper->level = cur->level + 1; 865 /* 866 * backrefs for the upper level block isn't 867 * cached, add the block to pending list 868 */ 869 list_add_tail(&edge->list[UPPER], &list); 870 } else { 871 upper = rb_entry(rb_node, struct backref_node, 872 rb_node); 873 ASSERT(upper->checked); 874 INIT_LIST_HEAD(&edge->list[UPPER]); 875 } 876 list_add_tail(&edge->list[LOWER], &cur->upper); 877 edge->node[LOWER] = cur; 878 edge->node[UPPER] = upper; 879 880 goto next; 881 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 882 goto next; 883 } 884 885 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ 886 root = read_fs_root(rc->extent_root->fs_info, key.offset); 887 if (IS_ERR(root)) { 888 err = PTR_ERR(root); 889 goto out; 890 } 891 892 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 893 cur->cowonly = 1; 894 895 if (btrfs_root_level(&root->root_item) == cur->level) { 896 /* tree root */ 897 ASSERT(btrfs_root_bytenr(&root->root_item) == 898 cur->bytenr); 899 if (should_ignore_root(root)) 900 list_add(&cur->list, &useless); 901 else 902 cur->root = root; 903 break; 904 } 905 906 level = cur->level + 1; 907 908 /* 909 * searching the tree to find upper level blocks 910 * reference the block. 911 */ 912 path2->search_commit_root = 1; 913 path2->skip_locking = 1; 914 path2->lowest_level = level; 915 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); 916 path2->lowest_level = 0; 917 if (ret < 0) { 918 err = ret; 919 goto out; 920 } 921 if (ret > 0 && path2->slots[level] > 0) 922 path2->slots[level]--; 923 924 eb = path2->nodes[level]; 925 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 926 cur->bytenr); 927 928 lower = cur; 929 need_check = true; 930 for (; level < BTRFS_MAX_LEVEL; level++) { 931 if (!path2->nodes[level]) { 932 ASSERT(btrfs_root_bytenr(&root->root_item) == 933 lower->bytenr); 934 if (should_ignore_root(root)) 935 list_add(&lower->list, &useless); 936 else 937 lower->root = root; 938 break; 939 } 940 941 edge = alloc_backref_edge(cache); 942 if (!edge) { 943 err = -ENOMEM; 944 goto out; 945 } 946 947 eb = path2->nodes[level]; 948 rb_node = tree_search(&cache->rb_root, eb->start); 949 if (!rb_node) { 950 upper = alloc_backref_node(cache); 951 if (!upper) { 952 free_backref_edge(cache, edge); 953 err = -ENOMEM; 954 goto out; 955 } 956 upper->bytenr = eb->start; 957 upper->owner = btrfs_header_owner(eb); 958 upper->level = lower->level + 1; 959 if (!test_bit(BTRFS_ROOT_REF_COWS, 960 &root->state)) 961 upper->cowonly = 1; 962 963 /* 964 * if we know the block isn't shared 965 * we can void checking its backrefs. 966 */ 967 if (btrfs_block_can_be_shared(root, eb)) 968 upper->checked = 0; 969 else 970 upper->checked = 1; 971 972 /* 973 * add the block to pending list if we 974 * need check its backrefs, we only do this once 975 * while walking up a tree as we will catch 976 * anything else later on. 977 */ 978 if (!upper->checked && need_check) { 979 need_check = false; 980 list_add_tail(&edge->list[UPPER], 981 &list); 982 } else { 983 if (upper->checked) 984 need_check = true; 985 INIT_LIST_HEAD(&edge->list[UPPER]); 986 } 987 } else { 988 upper = rb_entry(rb_node, struct backref_node, 989 rb_node); 990 ASSERT(upper->checked); 991 INIT_LIST_HEAD(&edge->list[UPPER]); 992 if (!upper->owner) 993 upper->owner = btrfs_header_owner(eb); 994 } 995 list_add_tail(&edge->list[LOWER], &lower->upper); 996 edge->node[LOWER] = lower; 997 edge->node[UPPER] = upper; 998 999 if (rb_node) 1000 break; 1001 lower = upper; 1002 upper = NULL; 1003 } 1004 btrfs_release_path(path2); 1005 next: 1006 if (ptr < end) { 1007 ptr += btrfs_extent_inline_ref_size(key.type); 1008 if (ptr >= end) { 1009 WARN_ON(ptr > end); 1010 ptr = 0; 1011 end = 0; 1012 } 1013 } 1014 if (ptr >= end) 1015 path1->slots[0]++; 1016 } 1017 btrfs_release_path(path1); 1018 1019 cur->checked = 1; 1020 WARN_ON(exist); 1021 1022 /* the pending list isn't empty, take the first block to process */ 1023 if (!list_empty(&list)) { 1024 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 1025 list_del_init(&edge->list[UPPER]); 1026 cur = edge->node[UPPER]; 1027 goto again; 1028 } 1029 1030 /* 1031 * everything goes well, connect backref nodes and insert backref nodes 1032 * into the cache. 1033 */ 1034 ASSERT(node->checked); 1035 cowonly = node->cowonly; 1036 if (!cowonly) { 1037 rb_node = tree_insert(&cache->rb_root, node->bytenr, 1038 &node->rb_node); 1039 if (rb_node) 1040 backref_tree_panic(rb_node, -EEXIST, node->bytenr); 1041 list_add_tail(&node->lower, &cache->leaves); 1042 } 1043 1044 list_for_each_entry(edge, &node->upper, list[LOWER]) 1045 list_add_tail(&edge->list[UPPER], &list); 1046 1047 while (!list_empty(&list)) { 1048 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 1049 list_del_init(&edge->list[UPPER]); 1050 upper = edge->node[UPPER]; 1051 if (upper->detached) { 1052 list_del(&edge->list[LOWER]); 1053 lower = edge->node[LOWER]; 1054 free_backref_edge(cache, edge); 1055 if (list_empty(&lower->upper)) 1056 list_add(&lower->list, &useless); 1057 continue; 1058 } 1059 1060 if (!RB_EMPTY_NODE(&upper->rb_node)) { 1061 if (upper->lowest) { 1062 list_del_init(&upper->lower); 1063 upper->lowest = 0; 1064 } 1065 1066 list_add_tail(&edge->list[UPPER], &upper->lower); 1067 continue; 1068 } 1069 1070 if (!upper->checked) { 1071 /* 1072 * Still want to blow up for developers since this is a 1073 * logic bug. 1074 */ 1075 ASSERT(0); 1076 err = -EINVAL; 1077 goto out; 1078 } 1079 if (cowonly != upper->cowonly) { 1080 ASSERT(0); 1081 err = -EINVAL; 1082 goto out; 1083 } 1084 1085 if (!cowonly) { 1086 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 1087 &upper->rb_node); 1088 if (rb_node) 1089 backref_tree_panic(rb_node, -EEXIST, 1090 upper->bytenr); 1091 } 1092 1093 list_add_tail(&edge->list[UPPER], &upper->lower); 1094 1095 list_for_each_entry(edge, &upper->upper, list[LOWER]) 1096 list_add_tail(&edge->list[UPPER], &list); 1097 } 1098 /* 1099 * process useless backref nodes. backref nodes for tree leaves 1100 * are deleted from the cache. backref nodes for upper level 1101 * tree blocks are left in the cache to avoid unnecessary backref 1102 * lookup. 1103 */ 1104 while (!list_empty(&useless)) { 1105 upper = list_entry(useless.next, struct backref_node, list); 1106 list_del_init(&upper->list); 1107 ASSERT(list_empty(&upper->upper)); 1108 if (upper == node) 1109 node = NULL; 1110 if (upper->lowest) { 1111 list_del_init(&upper->lower); 1112 upper->lowest = 0; 1113 } 1114 while (!list_empty(&upper->lower)) { 1115 edge = list_entry(upper->lower.next, 1116 struct backref_edge, list[UPPER]); 1117 list_del(&edge->list[UPPER]); 1118 list_del(&edge->list[LOWER]); 1119 lower = edge->node[LOWER]; 1120 free_backref_edge(cache, edge); 1121 1122 if (list_empty(&lower->upper)) 1123 list_add(&lower->list, &useless); 1124 } 1125 __mark_block_processed(rc, upper); 1126 if (upper->level > 0) { 1127 list_add(&upper->list, &cache->detached); 1128 upper->detached = 1; 1129 } else { 1130 rb_erase(&upper->rb_node, &cache->rb_root); 1131 free_backref_node(cache, upper); 1132 } 1133 } 1134 out: 1135 btrfs_free_path(path1); 1136 btrfs_free_path(path2); 1137 if (err) { 1138 while (!list_empty(&useless)) { 1139 lower = list_entry(useless.next, 1140 struct backref_node, list); 1141 list_del_init(&lower->list); 1142 } 1143 while (!list_empty(&list)) { 1144 edge = list_first_entry(&list, struct backref_edge, 1145 list[UPPER]); 1146 list_del(&edge->list[UPPER]); 1147 list_del(&edge->list[LOWER]); 1148 lower = edge->node[LOWER]; 1149 upper = edge->node[UPPER]; 1150 free_backref_edge(cache, edge); 1151 1152 /* 1153 * Lower is no longer linked to any upper backref nodes 1154 * and isn't in the cache, we can free it ourselves. 1155 */ 1156 if (list_empty(&lower->upper) && 1157 RB_EMPTY_NODE(&lower->rb_node)) 1158 list_add(&lower->list, &useless); 1159 1160 if (!RB_EMPTY_NODE(&upper->rb_node)) 1161 continue; 1162 1163 /* Add this guy's upper edges to the list to process */ 1164 list_for_each_entry(edge, &upper->upper, list[LOWER]) 1165 list_add_tail(&edge->list[UPPER], &list); 1166 if (list_empty(&upper->upper)) 1167 list_add(&upper->list, &useless); 1168 } 1169 1170 while (!list_empty(&useless)) { 1171 lower = list_entry(useless.next, 1172 struct backref_node, list); 1173 list_del_init(&lower->list); 1174 if (lower == node) 1175 node = NULL; 1176 free_backref_node(cache, lower); 1177 } 1178 1179 free_backref_node(cache, node); 1180 return ERR_PTR(err); 1181 } 1182 ASSERT(!node || !node->detached); 1183 return node; 1184 } 1185 1186 /* 1187 * helper to add backref node for the newly created snapshot. 1188 * the backref node is created by cloning backref node that 1189 * corresponds to root of source tree 1190 */ 1191 static int clone_backref_node(struct btrfs_trans_handle *trans, 1192 struct reloc_control *rc, 1193 struct btrfs_root *src, 1194 struct btrfs_root *dest) 1195 { 1196 struct btrfs_root *reloc_root = src->reloc_root; 1197 struct backref_cache *cache = &rc->backref_cache; 1198 struct backref_node *node = NULL; 1199 struct backref_node *new_node; 1200 struct backref_edge *edge; 1201 struct backref_edge *new_edge; 1202 struct rb_node *rb_node; 1203 1204 if (cache->last_trans > 0) 1205 update_backref_cache(trans, cache); 1206 1207 rb_node = tree_search(&cache->rb_root, src->commit_root->start); 1208 if (rb_node) { 1209 node = rb_entry(rb_node, struct backref_node, rb_node); 1210 if (node->detached) 1211 node = NULL; 1212 else 1213 BUG_ON(node->new_bytenr != reloc_root->node->start); 1214 } 1215 1216 if (!node) { 1217 rb_node = tree_search(&cache->rb_root, 1218 reloc_root->commit_root->start); 1219 if (rb_node) { 1220 node = rb_entry(rb_node, struct backref_node, 1221 rb_node); 1222 BUG_ON(node->detached); 1223 } 1224 } 1225 1226 if (!node) 1227 return 0; 1228 1229 new_node = alloc_backref_node(cache); 1230 if (!new_node) 1231 return -ENOMEM; 1232 1233 new_node->bytenr = dest->node->start; 1234 new_node->level = node->level; 1235 new_node->lowest = node->lowest; 1236 new_node->checked = 1; 1237 new_node->root = dest; 1238 1239 if (!node->lowest) { 1240 list_for_each_entry(edge, &node->lower, list[UPPER]) { 1241 new_edge = alloc_backref_edge(cache); 1242 if (!new_edge) 1243 goto fail; 1244 1245 new_edge->node[UPPER] = new_node; 1246 new_edge->node[LOWER] = edge->node[LOWER]; 1247 list_add_tail(&new_edge->list[UPPER], 1248 &new_node->lower); 1249 } 1250 } else { 1251 list_add_tail(&new_node->lower, &cache->leaves); 1252 } 1253 1254 rb_node = tree_insert(&cache->rb_root, new_node->bytenr, 1255 &new_node->rb_node); 1256 if (rb_node) 1257 backref_tree_panic(rb_node, -EEXIST, new_node->bytenr); 1258 1259 if (!new_node->lowest) { 1260 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { 1261 list_add_tail(&new_edge->list[LOWER], 1262 &new_edge->node[LOWER]->upper); 1263 } 1264 } 1265 return 0; 1266 fail: 1267 while (!list_empty(&new_node->lower)) { 1268 new_edge = list_entry(new_node->lower.next, 1269 struct backref_edge, list[UPPER]); 1270 list_del(&new_edge->list[UPPER]); 1271 free_backref_edge(cache, new_edge); 1272 } 1273 free_backref_node(cache, new_node); 1274 return -ENOMEM; 1275 } 1276 1277 /* 1278 * helper to add 'address of tree root -> reloc tree' mapping 1279 */ 1280 static int __must_check __add_reloc_root(struct btrfs_root *root) 1281 { 1282 struct rb_node *rb_node; 1283 struct mapping_node *node; 1284 struct reloc_control *rc = root->fs_info->reloc_ctl; 1285 1286 node = kmalloc(sizeof(*node), GFP_NOFS); 1287 if (!node) 1288 return -ENOMEM; 1289 1290 node->bytenr = root->node->start; 1291 node->data = root; 1292 1293 spin_lock(&rc->reloc_root_tree.lock); 1294 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1295 node->bytenr, &node->rb_node); 1296 spin_unlock(&rc->reloc_root_tree.lock); 1297 if (rb_node) { 1298 btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " 1299 "for start=%llu while inserting into relocation " 1300 "tree", node->bytenr); 1301 kfree(node); 1302 return -EEXIST; 1303 } 1304 1305 list_add_tail(&root->root_list, &rc->reloc_roots); 1306 return 0; 1307 } 1308 1309 /* 1310 * helper to delete the 'address of tree root -> reloc tree' 1311 * mapping 1312 */ 1313 static void __del_reloc_root(struct btrfs_root *root) 1314 { 1315 struct rb_node *rb_node; 1316 struct mapping_node *node = NULL; 1317 struct reloc_control *rc = root->fs_info->reloc_ctl; 1318 1319 spin_lock(&rc->reloc_root_tree.lock); 1320 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1321 root->node->start); 1322 if (rb_node) { 1323 node = rb_entry(rb_node, struct mapping_node, rb_node); 1324 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1325 } 1326 spin_unlock(&rc->reloc_root_tree.lock); 1327 1328 if (!node) 1329 return; 1330 BUG_ON((struct btrfs_root *)node->data != root); 1331 1332 spin_lock(&root->fs_info->trans_lock); 1333 list_del_init(&root->root_list); 1334 spin_unlock(&root->fs_info->trans_lock); 1335 kfree(node); 1336 } 1337 1338 /* 1339 * helper to update the 'address of tree root -> reloc tree' 1340 * mapping 1341 */ 1342 static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) 1343 { 1344 struct rb_node *rb_node; 1345 struct mapping_node *node = NULL; 1346 struct reloc_control *rc = root->fs_info->reloc_ctl; 1347 1348 spin_lock(&rc->reloc_root_tree.lock); 1349 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1350 root->node->start); 1351 if (rb_node) { 1352 node = rb_entry(rb_node, struct mapping_node, rb_node); 1353 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1354 } 1355 spin_unlock(&rc->reloc_root_tree.lock); 1356 1357 if (!node) 1358 return 0; 1359 BUG_ON((struct btrfs_root *)node->data != root); 1360 1361 spin_lock(&rc->reloc_root_tree.lock); 1362 node->bytenr = new_bytenr; 1363 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1364 node->bytenr, &node->rb_node); 1365 spin_unlock(&rc->reloc_root_tree.lock); 1366 if (rb_node) 1367 backref_tree_panic(rb_node, -EEXIST, node->bytenr); 1368 return 0; 1369 } 1370 1371 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, 1372 struct btrfs_root *root, u64 objectid) 1373 { 1374 struct btrfs_root *reloc_root; 1375 struct extent_buffer *eb; 1376 struct btrfs_root_item *root_item; 1377 struct btrfs_key root_key; 1378 u64 last_snap = 0; 1379 int ret; 1380 1381 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 1382 BUG_ON(!root_item); 1383 1384 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 1385 root_key.type = BTRFS_ROOT_ITEM_KEY; 1386 root_key.offset = objectid; 1387 1388 if (root->root_key.objectid == objectid) { 1389 /* called by btrfs_init_reloc_root */ 1390 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 1391 BTRFS_TREE_RELOC_OBJECTID); 1392 BUG_ON(ret); 1393 1394 last_snap = btrfs_root_last_snapshot(&root->root_item); 1395 btrfs_set_root_last_snapshot(&root->root_item, 1396 trans->transid - 1); 1397 } else { 1398 /* 1399 * called by btrfs_reloc_post_snapshot_hook. 1400 * the source tree is a reloc tree, all tree blocks 1401 * modified after it was created have RELOC flag 1402 * set in their headers. so it's OK to not update 1403 * the 'last_snapshot'. 1404 */ 1405 ret = btrfs_copy_root(trans, root, root->node, &eb, 1406 BTRFS_TREE_RELOC_OBJECTID); 1407 BUG_ON(ret); 1408 } 1409 1410 memcpy(root_item, &root->root_item, sizeof(*root_item)); 1411 btrfs_set_root_bytenr(root_item, eb->start); 1412 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 1413 btrfs_set_root_generation(root_item, trans->transid); 1414 1415 if (root->root_key.objectid == objectid) { 1416 btrfs_set_root_refs(root_item, 0); 1417 memset(&root_item->drop_progress, 0, 1418 sizeof(struct btrfs_disk_key)); 1419 root_item->drop_level = 0; 1420 /* 1421 * abuse rtransid, it is safe because it is impossible to 1422 * receive data into a relocation tree. 1423 */ 1424 btrfs_set_root_rtransid(root_item, last_snap); 1425 btrfs_set_root_otransid(root_item, trans->transid); 1426 } 1427 1428 btrfs_tree_unlock(eb); 1429 free_extent_buffer(eb); 1430 1431 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 1432 &root_key, root_item); 1433 BUG_ON(ret); 1434 kfree(root_item); 1435 1436 reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key); 1437 BUG_ON(IS_ERR(reloc_root)); 1438 reloc_root->last_trans = trans->transid; 1439 return reloc_root; 1440 } 1441 1442 /* 1443 * create reloc tree for a given fs tree. reloc tree is just a 1444 * snapshot of the fs tree with special root objectid. 1445 */ 1446 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 1447 struct btrfs_root *root) 1448 { 1449 struct btrfs_root *reloc_root; 1450 struct reloc_control *rc = root->fs_info->reloc_ctl; 1451 struct btrfs_block_rsv *rsv; 1452 int clear_rsv = 0; 1453 int ret; 1454 1455 if (root->reloc_root) { 1456 reloc_root = root->reloc_root; 1457 reloc_root->last_trans = trans->transid; 1458 return 0; 1459 } 1460 1461 if (!rc || !rc->create_reloc_tree || 1462 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1463 return 0; 1464 1465 if (!trans->reloc_reserved) { 1466 rsv = trans->block_rsv; 1467 trans->block_rsv = rc->block_rsv; 1468 clear_rsv = 1; 1469 } 1470 reloc_root = create_reloc_root(trans, root, root->root_key.objectid); 1471 if (clear_rsv) 1472 trans->block_rsv = rsv; 1473 1474 ret = __add_reloc_root(reloc_root); 1475 BUG_ON(ret < 0); 1476 root->reloc_root = reloc_root; 1477 return 0; 1478 } 1479 1480 /* 1481 * update root item of reloc tree 1482 */ 1483 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 1484 struct btrfs_root *root) 1485 { 1486 struct btrfs_root *reloc_root; 1487 struct btrfs_root_item *root_item; 1488 int ret; 1489 1490 if (!root->reloc_root) 1491 goto out; 1492 1493 reloc_root = root->reloc_root; 1494 root_item = &reloc_root->root_item; 1495 1496 if (root->fs_info->reloc_ctl->merge_reloc_tree && 1497 btrfs_root_refs(root_item) == 0) { 1498 root->reloc_root = NULL; 1499 __del_reloc_root(reloc_root); 1500 } 1501 1502 if (reloc_root->commit_root != reloc_root->node) { 1503 btrfs_set_root_node(root_item, reloc_root->node); 1504 free_extent_buffer(reloc_root->commit_root); 1505 reloc_root->commit_root = btrfs_root_node(reloc_root); 1506 } 1507 1508 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1509 &reloc_root->root_key, root_item); 1510 BUG_ON(ret); 1511 1512 out: 1513 return 0; 1514 } 1515 1516 /* 1517 * helper to find first cached inode with inode number >= objectid 1518 * in a subvolume 1519 */ 1520 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 1521 { 1522 struct rb_node *node; 1523 struct rb_node *prev; 1524 struct btrfs_inode *entry; 1525 struct inode *inode; 1526 1527 spin_lock(&root->inode_lock); 1528 again: 1529 node = root->inode_tree.rb_node; 1530 prev = NULL; 1531 while (node) { 1532 prev = node; 1533 entry = rb_entry(node, struct btrfs_inode, rb_node); 1534 1535 if (objectid < btrfs_ino(&entry->vfs_inode)) 1536 node = node->rb_left; 1537 else if (objectid > btrfs_ino(&entry->vfs_inode)) 1538 node = node->rb_right; 1539 else 1540 break; 1541 } 1542 if (!node) { 1543 while (prev) { 1544 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1545 if (objectid <= btrfs_ino(&entry->vfs_inode)) { 1546 node = prev; 1547 break; 1548 } 1549 prev = rb_next(prev); 1550 } 1551 } 1552 while (node) { 1553 entry = rb_entry(node, struct btrfs_inode, rb_node); 1554 inode = igrab(&entry->vfs_inode); 1555 if (inode) { 1556 spin_unlock(&root->inode_lock); 1557 return inode; 1558 } 1559 1560 objectid = btrfs_ino(&entry->vfs_inode) + 1; 1561 if (cond_resched_lock(&root->inode_lock)) 1562 goto again; 1563 1564 node = rb_next(node); 1565 } 1566 spin_unlock(&root->inode_lock); 1567 return NULL; 1568 } 1569 1570 static int in_block_group(u64 bytenr, 1571 struct btrfs_block_group_cache *block_group) 1572 { 1573 if (bytenr >= block_group->key.objectid && 1574 bytenr < block_group->key.objectid + block_group->key.offset) 1575 return 1; 1576 return 0; 1577 } 1578 1579 /* 1580 * get new location of data 1581 */ 1582 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1583 u64 bytenr, u64 num_bytes) 1584 { 1585 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1586 struct btrfs_path *path; 1587 struct btrfs_file_extent_item *fi; 1588 struct extent_buffer *leaf; 1589 int ret; 1590 1591 path = btrfs_alloc_path(); 1592 if (!path) 1593 return -ENOMEM; 1594 1595 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1596 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode), 1597 bytenr, 0); 1598 if (ret < 0) 1599 goto out; 1600 if (ret > 0) { 1601 ret = -ENOENT; 1602 goto out; 1603 } 1604 1605 leaf = path->nodes[0]; 1606 fi = btrfs_item_ptr(leaf, path->slots[0], 1607 struct btrfs_file_extent_item); 1608 1609 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1610 btrfs_file_extent_compression(leaf, fi) || 1611 btrfs_file_extent_encryption(leaf, fi) || 1612 btrfs_file_extent_other_encoding(leaf, fi)); 1613 1614 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1615 ret = -EINVAL; 1616 goto out; 1617 } 1618 1619 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1620 ret = 0; 1621 out: 1622 btrfs_free_path(path); 1623 return ret; 1624 } 1625 1626 /* 1627 * update file extent items in the tree leaf to point to 1628 * the new locations. 1629 */ 1630 static noinline_for_stack 1631 int replace_file_extents(struct btrfs_trans_handle *trans, 1632 struct reloc_control *rc, 1633 struct btrfs_root *root, 1634 struct extent_buffer *leaf) 1635 { 1636 struct btrfs_key key; 1637 struct btrfs_file_extent_item *fi; 1638 struct inode *inode = NULL; 1639 u64 parent; 1640 u64 bytenr; 1641 u64 new_bytenr = 0; 1642 u64 num_bytes; 1643 u64 end; 1644 u32 nritems; 1645 u32 i; 1646 int ret = 0; 1647 int first = 1; 1648 int dirty = 0; 1649 1650 if (rc->stage != UPDATE_DATA_PTRS) 1651 return 0; 1652 1653 /* reloc trees always use full backref */ 1654 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1655 parent = leaf->start; 1656 else 1657 parent = 0; 1658 1659 nritems = btrfs_header_nritems(leaf); 1660 for (i = 0; i < nritems; i++) { 1661 cond_resched(); 1662 btrfs_item_key_to_cpu(leaf, &key, i); 1663 if (key.type != BTRFS_EXTENT_DATA_KEY) 1664 continue; 1665 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1666 if (btrfs_file_extent_type(leaf, fi) == 1667 BTRFS_FILE_EXTENT_INLINE) 1668 continue; 1669 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1670 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1671 if (bytenr == 0) 1672 continue; 1673 if (!in_block_group(bytenr, rc->block_group)) 1674 continue; 1675 1676 /* 1677 * if we are modifying block in fs tree, wait for readpage 1678 * to complete and drop the extent cache 1679 */ 1680 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1681 if (first) { 1682 inode = find_next_inode(root, key.objectid); 1683 first = 0; 1684 } else if (inode && btrfs_ino(inode) < key.objectid) { 1685 btrfs_add_delayed_iput(inode); 1686 inode = find_next_inode(root, key.objectid); 1687 } 1688 if (inode && btrfs_ino(inode) == key.objectid) { 1689 end = key.offset + 1690 btrfs_file_extent_num_bytes(leaf, fi); 1691 WARN_ON(!IS_ALIGNED(key.offset, 1692 root->sectorsize)); 1693 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1694 end--; 1695 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1696 key.offset, end); 1697 if (!ret) 1698 continue; 1699 1700 btrfs_drop_extent_cache(inode, key.offset, end, 1701 1); 1702 unlock_extent(&BTRFS_I(inode)->io_tree, 1703 key.offset, end); 1704 } 1705 } 1706 1707 ret = get_new_location(rc->data_inode, &new_bytenr, 1708 bytenr, num_bytes); 1709 if (ret) { 1710 /* 1711 * Don't have to abort since we've not changed anything 1712 * in the file extent yet. 1713 */ 1714 break; 1715 } 1716 1717 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1718 dirty = 1; 1719 1720 key.offset -= btrfs_file_extent_offset(leaf, fi); 1721 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1722 num_bytes, parent, 1723 btrfs_header_owner(leaf), 1724 key.objectid, key.offset); 1725 if (ret) { 1726 btrfs_abort_transaction(trans, ret); 1727 break; 1728 } 1729 1730 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1731 parent, btrfs_header_owner(leaf), 1732 key.objectid, key.offset); 1733 if (ret) { 1734 btrfs_abort_transaction(trans, ret); 1735 break; 1736 } 1737 } 1738 if (dirty) 1739 btrfs_mark_buffer_dirty(leaf); 1740 if (inode) 1741 btrfs_add_delayed_iput(inode); 1742 return ret; 1743 } 1744 1745 static noinline_for_stack 1746 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1747 struct btrfs_path *path, int level) 1748 { 1749 struct btrfs_disk_key key1; 1750 struct btrfs_disk_key key2; 1751 btrfs_node_key(eb, &key1, slot); 1752 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1753 return memcmp(&key1, &key2, sizeof(key1)); 1754 } 1755 1756 /* 1757 * try to replace tree blocks in fs tree with the new blocks 1758 * in reloc tree. tree blocks haven't been modified since the 1759 * reloc tree was create can be replaced. 1760 * 1761 * if a block was replaced, level of the block + 1 is returned. 1762 * if no block got replaced, 0 is returned. if there are other 1763 * errors, a negative error number is returned. 1764 */ 1765 static noinline_for_stack 1766 int replace_path(struct btrfs_trans_handle *trans, 1767 struct btrfs_root *dest, struct btrfs_root *src, 1768 struct btrfs_path *path, struct btrfs_key *next_key, 1769 int lowest_level, int max_level) 1770 { 1771 struct extent_buffer *eb; 1772 struct extent_buffer *parent; 1773 struct btrfs_key key; 1774 u64 old_bytenr; 1775 u64 new_bytenr; 1776 u64 old_ptr_gen; 1777 u64 new_ptr_gen; 1778 u64 last_snapshot; 1779 u32 blocksize; 1780 int cow = 0; 1781 int level; 1782 int ret; 1783 int slot; 1784 1785 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1786 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1787 1788 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1789 again: 1790 slot = path->slots[lowest_level]; 1791 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1792 1793 eb = btrfs_lock_root_node(dest); 1794 btrfs_set_lock_blocking(eb); 1795 level = btrfs_header_level(eb); 1796 1797 if (level < lowest_level) { 1798 btrfs_tree_unlock(eb); 1799 free_extent_buffer(eb); 1800 return 0; 1801 } 1802 1803 if (cow) { 1804 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1805 BUG_ON(ret); 1806 } 1807 btrfs_set_lock_blocking(eb); 1808 1809 if (next_key) { 1810 next_key->objectid = (u64)-1; 1811 next_key->type = (u8)-1; 1812 next_key->offset = (u64)-1; 1813 } 1814 1815 parent = eb; 1816 while (1) { 1817 level = btrfs_header_level(parent); 1818 BUG_ON(level < lowest_level); 1819 1820 ret = btrfs_bin_search(parent, &key, level, &slot); 1821 if (ret && slot > 0) 1822 slot--; 1823 1824 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1825 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1826 1827 old_bytenr = btrfs_node_blockptr(parent, slot); 1828 blocksize = dest->nodesize; 1829 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1830 1831 if (level <= max_level) { 1832 eb = path->nodes[level]; 1833 new_bytenr = btrfs_node_blockptr(eb, 1834 path->slots[level]); 1835 new_ptr_gen = btrfs_node_ptr_generation(eb, 1836 path->slots[level]); 1837 } else { 1838 new_bytenr = 0; 1839 new_ptr_gen = 0; 1840 } 1841 1842 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) { 1843 ret = level; 1844 break; 1845 } 1846 1847 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1848 memcmp_node_keys(parent, slot, path, level)) { 1849 if (level <= lowest_level) { 1850 ret = 0; 1851 break; 1852 } 1853 1854 eb = read_tree_block(dest, old_bytenr, old_ptr_gen); 1855 if (IS_ERR(eb)) { 1856 ret = PTR_ERR(eb); 1857 break; 1858 } else if (!extent_buffer_uptodate(eb)) { 1859 ret = -EIO; 1860 free_extent_buffer(eb); 1861 break; 1862 } 1863 btrfs_tree_lock(eb); 1864 if (cow) { 1865 ret = btrfs_cow_block(trans, dest, eb, parent, 1866 slot, &eb); 1867 BUG_ON(ret); 1868 } 1869 btrfs_set_lock_blocking(eb); 1870 1871 btrfs_tree_unlock(parent); 1872 free_extent_buffer(parent); 1873 1874 parent = eb; 1875 continue; 1876 } 1877 1878 if (!cow) { 1879 btrfs_tree_unlock(parent); 1880 free_extent_buffer(parent); 1881 cow = 1; 1882 goto again; 1883 } 1884 1885 btrfs_node_key_to_cpu(path->nodes[level], &key, 1886 path->slots[level]); 1887 btrfs_release_path(path); 1888 1889 path->lowest_level = level; 1890 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1891 path->lowest_level = 0; 1892 BUG_ON(ret); 1893 1894 /* 1895 * swap blocks in fs tree and reloc tree. 1896 */ 1897 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1898 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1899 btrfs_mark_buffer_dirty(parent); 1900 1901 btrfs_set_node_blockptr(path->nodes[level], 1902 path->slots[level], old_bytenr); 1903 btrfs_set_node_ptr_generation(path->nodes[level], 1904 path->slots[level], old_ptr_gen); 1905 btrfs_mark_buffer_dirty(path->nodes[level]); 1906 1907 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, 1908 path->nodes[level]->start, 1909 src->root_key.objectid, level - 1, 0); 1910 BUG_ON(ret); 1911 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 1912 0, dest->root_key.objectid, level - 1, 1913 0); 1914 BUG_ON(ret); 1915 1916 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1917 path->nodes[level]->start, 1918 src->root_key.objectid, level - 1, 0); 1919 BUG_ON(ret); 1920 1921 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1922 0, dest->root_key.objectid, level - 1, 1923 0); 1924 BUG_ON(ret); 1925 1926 btrfs_unlock_up_safe(path, 0); 1927 1928 ret = level; 1929 break; 1930 } 1931 btrfs_tree_unlock(parent); 1932 free_extent_buffer(parent); 1933 return ret; 1934 } 1935 1936 /* 1937 * helper to find next relocated block in reloc tree 1938 */ 1939 static noinline_for_stack 1940 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1941 int *level) 1942 { 1943 struct extent_buffer *eb; 1944 int i; 1945 u64 last_snapshot; 1946 u32 nritems; 1947 1948 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1949 1950 for (i = 0; i < *level; i++) { 1951 free_extent_buffer(path->nodes[i]); 1952 path->nodes[i] = NULL; 1953 } 1954 1955 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1956 eb = path->nodes[i]; 1957 nritems = btrfs_header_nritems(eb); 1958 while (path->slots[i] + 1 < nritems) { 1959 path->slots[i]++; 1960 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1961 last_snapshot) 1962 continue; 1963 1964 *level = i; 1965 return 0; 1966 } 1967 free_extent_buffer(path->nodes[i]); 1968 path->nodes[i] = NULL; 1969 } 1970 return 1; 1971 } 1972 1973 /* 1974 * walk down reloc tree to find relocated block of lowest level 1975 */ 1976 static noinline_for_stack 1977 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1978 int *level) 1979 { 1980 struct extent_buffer *eb = NULL; 1981 int i; 1982 u64 bytenr; 1983 u64 ptr_gen = 0; 1984 u64 last_snapshot; 1985 u32 nritems; 1986 1987 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1988 1989 for (i = *level; i > 0; i--) { 1990 eb = path->nodes[i]; 1991 nritems = btrfs_header_nritems(eb); 1992 while (path->slots[i] < nritems) { 1993 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1994 if (ptr_gen > last_snapshot) 1995 break; 1996 path->slots[i]++; 1997 } 1998 if (path->slots[i] >= nritems) { 1999 if (i == *level) 2000 break; 2001 *level = i + 1; 2002 return 0; 2003 } 2004 if (i == 1) { 2005 *level = i; 2006 return 0; 2007 } 2008 2009 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 2010 eb = read_tree_block(root, bytenr, ptr_gen); 2011 if (IS_ERR(eb)) { 2012 return PTR_ERR(eb); 2013 } else if (!extent_buffer_uptodate(eb)) { 2014 free_extent_buffer(eb); 2015 return -EIO; 2016 } 2017 BUG_ON(btrfs_header_level(eb) != i - 1); 2018 path->nodes[i - 1] = eb; 2019 path->slots[i - 1] = 0; 2020 } 2021 return 1; 2022 } 2023 2024 /* 2025 * invalidate extent cache for file extents whose key in range of 2026 * [min_key, max_key) 2027 */ 2028 static int invalidate_extent_cache(struct btrfs_root *root, 2029 struct btrfs_key *min_key, 2030 struct btrfs_key *max_key) 2031 { 2032 struct inode *inode = NULL; 2033 u64 objectid; 2034 u64 start, end; 2035 u64 ino; 2036 2037 objectid = min_key->objectid; 2038 while (1) { 2039 cond_resched(); 2040 iput(inode); 2041 2042 if (objectid > max_key->objectid) 2043 break; 2044 2045 inode = find_next_inode(root, objectid); 2046 if (!inode) 2047 break; 2048 ino = btrfs_ino(inode); 2049 2050 if (ino > max_key->objectid) { 2051 iput(inode); 2052 break; 2053 } 2054 2055 objectid = ino + 1; 2056 if (!S_ISREG(inode->i_mode)) 2057 continue; 2058 2059 if (unlikely(min_key->objectid == ino)) { 2060 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 2061 continue; 2062 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 2063 start = 0; 2064 else { 2065 start = min_key->offset; 2066 WARN_ON(!IS_ALIGNED(start, root->sectorsize)); 2067 } 2068 } else { 2069 start = 0; 2070 } 2071 2072 if (unlikely(max_key->objectid == ino)) { 2073 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 2074 continue; 2075 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 2076 end = (u64)-1; 2077 } else { 2078 if (max_key->offset == 0) 2079 continue; 2080 end = max_key->offset; 2081 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 2082 end--; 2083 } 2084 } else { 2085 end = (u64)-1; 2086 } 2087 2088 /* the lock_extent waits for readpage to complete */ 2089 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 2090 btrfs_drop_extent_cache(inode, start, end, 1); 2091 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 2092 } 2093 return 0; 2094 } 2095 2096 static int find_next_key(struct btrfs_path *path, int level, 2097 struct btrfs_key *key) 2098 2099 { 2100 while (level < BTRFS_MAX_LEVEL) { 2101 if (!path->nodes[level]) 2102 break; 2103 if (path->slots[level] + 1 < 2104 btrfs_header_nritems(path->nodes[level])) { 2105 btrfs_node_key_to_cpu(path->nodes[level], key, 2106 path->slots[level] + 1); 2107 return 0; 2108 } 2109 level++; 2110 } 2111 return 1; 2112 } 2113 2114 /* 2115 * merge the relocated tree blocks in reloc tree with corresponding 2116 * fs tree. 2117 */ 2118 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 2119 struct btrfs_root *root) 2120 { 2121 LIST_HEAD(inode_list); 2122 struct btrfs_key key; 2123 struct btrfs_key next_key; 2124 struct btrfs_trans_handle *trans = NULL; 2125 struct btrfs_root *reloc_root; 2126 struct btrfs_root_item *root_item; 2127 struct btrfs_path *path; 2128 struct extent_buffer *leaf; 2129 int level; 2130 int max_level; 2131 int replaced = 0; 2132 int ret; 2133 int err = 0; 2134 u32 min_reserved; 2135 2136 path = btrfs_alloc_path(); 2137 if (!path) 2138 return -ENOMEM; 2139 path->reada = READA_FORWARD; 2140 2141 reloc_root = root->reloc_root; 2142 root_item = &reloc_root->root_item; 2143 2144 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 2145 level = btrfs_root_level(root_item); 2146 extent_buffer_get(reloc_root->node); 2147 path->nodes[level] = reloc_root->node; 2148 path->slots[level] = 0; 2149 } else { 2150 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 2151 2152 level = root_item->drop_level; 2153 BUG_ON(level == 0); 2154 path->lowest_level = level; 2155 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 2156 path->lowest_level = 0; 2157 if (ret < 0) { 2158 btrfs_free_path(path); 2159 return ret; 2160 } 2161 2162 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 2163 path->slots[level]); 2164 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 2165 2166 btrfs_unlock_up_safe(path, 0); 2167 } 2168 2169 min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2170 memset(&next_key, 0, sizeof(next_key)); 2171 2172 while (1) { 2173 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, 2174 BTRFS_RESERVE_FLUSH_ALL); 2175 if (ret) { 2176 err = ret; 2177 goto out; 2178 } 2179 trans = btrfs_start_transaction(root, 0); 2180 if (IS_ERR(trans)) { 2181 err = PTR_ERR(trans); 2182 trans = NULL; 2183 goto out; 2184 } 2185 trans->block_rsv = rc->block_rsv; 2186 2187 replaced = 0; 2188 max_level = level; 2189 2190 ret = walk_down_reloc_tree(reloc_root, path, &level); 2191 if (ret < 0) { 2192 err = ret; 2193 goto out; 2194 } 2195 if (ret > 0) 2196 break; 2197 2198 if (!find_next_key(path, level, &key) && 2199 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 2200 ret = 0; 2201 } else { 2202 ret = replace_path(trans, root, reloc_root, path, 2203 &next_key, level, max_level); 2204 } 2205 if (ret < 0) { 2206 err = ret; 2207 goto out; 2208 } 2209 2210 if (ret > 0) { 2211 level = ret; 2212 btrfs_node_key_to_cpu(path->nodes[level], &key, 2213 path->slots[level]); 2214 replaced = 1; 2215 } 2216 2217 ret = walk_up_reloc_tree(reloc_root, path, &level); 2218 if (ret > 0) 2219 break; 2220 2221 BUG_ON(level == 0); 2222 /* 2223 * save the merging progress in the drop_progress. 2224 * this is OK since root refs == 1 in this case. 2225 */ 2226 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 2227 path->slots[level]); 2228 root_item->drop_level = level; 2229 2230 btrfs_end_transaction_throttle(trans, root); 2231 trans = NULL; 2232 2233 btrfs_btree_balance_dirty(root); 2234 2235 if (replaced && rc->stage == UPDATE_DATA_PTRS) 2236 invalidate_extent_cache(root, &key, &next_key); 2237 } 2238 2239 /* 2240 * handle the case only one block in the fs tree need to be 2241 * relocated and the block is tree root. 2242 */ 2243 leaf = btrfs_lock_root_node(root); 2244 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 2245 btrfs_tree_unlock(leaf); 2246 free_extent_buffer(leaf); 2247 if (ret < 0) 2248 err = ret; 2249 out: 2250 btrfs_free_path(path); 2251 2252 if (err == 0) { 2253 memset(&root_item->drop_progress, 0, 2254 sizeof(root_item->drop_progress)); 2255 root_item->drop_level = 0; 2256 btrfs_set_root_refs(root_item, 0); 2257 btrfs_update_reloc_root(trans, root); 2258 } 2259 2260 if (trans) 2261 btrfs_end_transaction_throttle(trans, root); 2262 2263 btrfs_btree_balance_dirty(root); 2264 2265 if (replaced && rc->stage == UPDATE_DATA_PTRS) 2266 invalidate_extent_cache(root, &key, &next_key); 2267 2268 return err; 2269 } 2270 2271 static noinline_for_stack 2272 int prepare_to_merge(struct reloc_control *rc, int err) 2273 { 2274 struct btrfs_root *root = rc->extent_root; 2275 struct btrfs_root *reloc_root; 2276 struct btrfs_trans_handle *trans; 2277 LIST_HEAD(reloc_roots); 2278 u64 num_bytes = 0; 2279 int ret; 2280 2281 mutex_lock(&root->fs_info->reloc_mutex); 2282 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2283 rc->merging_rsv_size += rc->nodes_relocated * 2; 2284 mutex_unlock(&root->fs_info->reloc_mutex); 2285 2286 again: 2287 if (!err) { 2288 num_bytes = rc->merging_rsv_size; 2289 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, 2290 BTRFS_RESERVE_FLUSH_ALL); 2291 if (ret) 2292 err = ret; 2293 } 2294 2295 trans = btrfs_join_transaction(rc->extent_root); 2296 if (IS_ERR(trans)) { 2297 if (!err) 2298 btrfs_block_rsv_release(rc->extent_root, 2299 rc->block_rsv, num_bytes); 2300 return PTR_ERR(trans); 2301 } 2302 2303 if (!err) { 2304 if (num_bytes != rc->merging_rsv_size) { 2305 btrfs_end_transaction(trans, rc->extent_root); 2306 btrfs_block_rsv_release(rc->extent_root, 2307 rc->block_rsv, num_bytes); 2308 goto again; 2309 } 2310 } 2311 2312 rc->merge_reloc_tree = 1; 2313 2314 while (!list_empty(&rc->reloc_roots)) { 2315 reloc_root = list_entry(rc->reloc_roots.next, 2316 struct btrfs_root, root_list); 2317 list_del_init(&reloc_root->root_list); 2318 2319 root = read_fs_root(reloc_root->fs_info, 2320 reloc_root->root_key.offset); 2321 BUG_ON(IS_ERR(root)); 2322 BUG_ON(root->reloc_root != reloc_root); 2323 2324 /* 2325 * set reference count to 1, so btrfs_recover_relocation 2326 * knows it should resumes merging 2327 */ 2328 if (!err) 2329 btrfs_set_root_refs(&reloc_root->root_item, 1); 2330 btrfs_update_reloc_root(trans, root); 2331 2332 list_add(&reloc_root->root_list, &reloc_roots); 2333 } 2334 2335 list_splice(&reloc_roots, &rc->reloc_roots); 2336 2337 if (!err) 2338 btrfs_commit_transaction(trans, rc->extent_root); 2339 else 2340 btrfs_end_transaction(trans, rc->extent_root); 2341 return err; 2342 } 2343 2344 static noinline_for_stack 2345 void free_reloc_roots(struct list_head *list) 2346 { 2347 struct btrfs_root *reloc_root; 2348 2349 while (!list_empty(list)) { 2350 reloc_root = list_entry(list->next, struct btrfs_root, 2351 root_list); 2352 __del_reloc_root(reloc_root); 2353 } 2354 } 2355 2356 static noinline_for_stack 2357 void merge_reloc_roots(struct reloc_control *rc) 2358 { 2359 struct btrfs_root *root; 2360 struct btrfs_root *reloc_root; 2361 u64 last_snap; 2362 u64 otransid; 2363 u64 objectid; 2364 LIST_HEAD(reloc_roots); 2365 int found = 0; 2366 int ret = 0; 2367 again: 2368 root = rc->extent_root; 2369 2370 /* 2371 * this serializes us with btrfs_record_root_in_transaction, 2372 * we have to make sure nobody is in the middle of 2373 * adding their roots to the list while we are 2374 * doing this splice 2375 */ 2376 mutex_lock(&root->fs_info->reloc_mutex); 2377 list_splice_init(&rc->reloc_roots, &reloc_roots); 2378 mutex_unlock(&root->fs_info->reloc_mutex); 2379 2380 while (!list_empty(&reloc_roots)) { 2381 found = 1; 2382 reloc_root = list_entry(reloc_roots.next, 2383 struct btrfs_root, root_list); 2384 2385 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 2386 root = read_fs_root(reloc_root->fs_info, 2387 reloc_root->root_key.offset); 2388 BUG_ON(IS_ERR(root)); 2389 BUG_ON(root->reloc_root != reloc_root); 2390 2391 ret = merge_reloc_root(rc, root); 2392 if (ret) { 2393 if (list_empty(&reloc_root->root_list)) 2394 list_add_tail(&reloc_root->root_list, 2395 &reloc_roots); 2396 goto out; 2397 } 2398 } else { 2399 list_del_init(&reloc_root->root_list); 2400 } 2401 2402 /* 2403 * we keep the old last snapshot transid in rtranid when we 2404 * created the relocation tree. 2405 */ 2406 last_snap = btrfs_root_rtransid(&reloc_root->root_item); 2407 otransid = btrfs_root_otransid(&reloc_root->root_item); 2408 objectid = reloc_root->root_key.offset; 2409 2410 ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); 2411 if (ret < 0) { 2412 if (list_empty(&reloc_root->root_list)) 2413 list_add_tail(&reloc_root->root_list, 2414 &reloc_roots); 2415 goto out; 2416 } 2417 } 2418 2419 if (found) { 2420 found = 0; 2421 goto again; 2422 } 2423 out: 2424 if (ret) { 2425 btrfs_handle_fs_error(root->fs_info, ret, NULL); 2426 if (!list_empty(&reloc_roots)) 2427 free_reloc_roots(&reloc_roots); 2428 2429 /* new reloc root may be added */ 2430 mutex_lock(&root->fs_info->reloc_mutex); 2431 list_splice_init(&rc->reloc_roots, &reloc_roots); 2432 mutex_unlock(&root->fs_info->reloc_mutex); 2433 if (!list_empty(&reloc_roots)) 2434 free_reloc_roots(&reloc_roots); 2435 } 2436 2437 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 2438 } 2439 2440 static void free_block_list(struct rb_root *blocks) 2441 { 2442 struct tree_block *block; 2443 struct rb_node *rb_node; 2444 while ((rb_node = rb_first(blocks))) { 2445 block = rb_entry(rb_node, struct tree_block, rb_node); 2446 rb_erase(rb_node, blocks); 2447 kfree(block); 2448 } 2449 } 2450 2451 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 2452 struct btrfs_root *reloc_root) 2453 { 2454 struct btrfs_root *root; 2455 2456 if (reloc_root->last_trans == trans->transid) 2457 return 0; 2458 2459 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); 2460 BUG_ON(IS_ERR(root)); 2461 BUG_ON(root->reloc_root != reloc_root); 2462 2463 return btrfs_record_root_in_trans(trans, root); 2464 } 2465 2466 static noinline_for_stack 2467 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 2468 struct reloc_control *rc, 2469 struct backref_node *node, 2470 struct backref_edge *edges[]) 2471 { 2472 struct backref_node *next; 2473 struct btrfs_root *root; 2474 int index = 0; 2475 2476 next = node; 2477 while (1) { 2478 cond_resched(); 2479 next = walk_up_backref(next, edges, &index); 2480 root = next->root; 2481 BUG_ON(!root); 2482 BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state)); 2483 2484 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2485 record_reloc_root_in_trans(trans, root); 2486 break; 2487 } 2488 2489 btrfs_record_root_in_trans(trans, root); 2490 root = root->reloc_root; 2491 2492 if (next->new_bytenr != root->node->start) { 2493 BUG_ON(next->new_bytenr); 2494 BUG_ON(!list_empty(&next->list)); 2495 next->new_bytenr = root->node->start; 2496 next->root = root; 2497 list_add_tail(&next->list, 2498 &rc->backref_cache.changed); 2499 __mark_block_processed(rc, next); 2500 break; 2501 } 2502 2503 WARN_ON(1); 2504 root = NULL; 2505 next = walk_down_backref(edges, &index); 2506 if (!next || next->level <= node->level) 2507 break; 2508 } 2509 if (!root) 2510 return NULL; 2511 2512 next = node; 2513 /* setup backref node path for btrfs_reloc_cow_block */ 2514 while (1) { 2515 rc->backref_cache.path[next->level] = next; 2516 if (--index < 0) 2517 break; 2518 next = edges[index]->node[UPPER]; 2519 } 2520 return root; 2521 } 2522 2523 /* 2524 * select a tree root for relocation. return NULL if the block 2525 * is reference counted. we should use do_relocation() in this 2526 * case. return a tree root pointer if the block isn't reference 2527 * counted. return -ENOENT if the block is root of reloc tree. 2528 */ 2529 static noinline_for_stack 2530 struct btrfs_root *select_one_root(struct backref_node *node) 2531 { 2532 struct backref_node *next; 2533 struct btrfs_root *root; 2534 struct btrfs_root *fs_root = NULL; 2535 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2536 int index = 0; 2537 2538 next = node; 2539 while (1) { 2540 cond_resched(); 2541 next = walk_up_backref(next, edges, &index); 2542 root = next->root; 2543 BUG_ON(!root); 2544 2545 /* no other choice for non-references counted tree */ 2546 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 2547 return root; 2548 2549 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) 2550 fs_root = root; 2551 2552 if (next != node) 2553 return NULL; 2554 2555 next = walk_down_backref(edges, &index); 2556 if (!next || next->level <= node->level) 2557 break; 2558 } 2559 2560 if (!fs_root) 2561 return ERR_PTR(-ENOENT); 2562 return fs_root; 2563 } 2564 2565 static noinline_for_stack 2566 u64 calcu_metadata_size(struct reloc_control *rc, 2567 struct backref_node *node, int reserve) 2568 { 2569 struct backref_node *next = node; 2570 struct backref_edge *edge; 2571 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2572 u64 num_bytes = 0; 2573 int index = 0; 2574 2575 BUG_ON(reserve && node->processed); 2576 2577 while (next) { 2578 cond_resched(); 2579 while (1) { 2580 if (next->processed && (reserve || next != node)) 2581 break; 2582 2583 num_bytes += rc->extent_root->nodesize; 2584 2585 if (list_empty(&next->upper)) 2586 break; 2587 2588 edge = list_entry(next->upper.next, 2589 struct backref_edge, list[LOWER]); 2590 edges[index++] = edge; 2591 next = edge->node[UPPER]; 2592 } 2593 next = walk_down_backref(edges, &index); 2594 } 2595 return num_bytes; 2596 } 2597 2598 static int reserve_metadata_space(struct btrfs_trans_handle *trans, 2599 struct reloc_control *rc, 2600 struct backref_node *node) 2601 { 2602 struct btrfs_root *root = rc->extent_root; 2603 u64 num_bytes; 2604 int ret; 2605 u64 tmp; 2606 2607 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2608 2609 trans->block_rsv = rc->block_rsv; 2610 rc->reserved_bytes += num_bytes; 2611 2612 /* 2613 * We are under a transaction here so we can only do limited flushing. 2614 * If we get an enospc just kick back -EAGAIN so we know to drop the 2615 * transaction and try to refill when we can flush all the things. 2616 */ 2617 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, 2618 BTRFS_RESERVE_FLUSH_LIMIT); 2619 if (ret) { 2620 tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES; 2621 while (tmp <= rc->reserved_bytes) 2622 tmp <<= 1; 2623 /* 2624 * only one thread can access block_rsv at this point, 2625 * so we don't need hold lock to protect block_rsv. 2626 * we expand more reservation size here to allow enough 2627 * space for relocation and we will return eailer in 2628 * enospc case. 2629 */ 2630 rc->block_rsv->size = tmp + rc->extent_root->nodesize * 2631 RELOCATION_RESERVED_NODES; 2632 return -EAGAIN; 2633 } 2634 2635 return 0; 2636 } 2637 2638 /* 2639 * relocate a block tree, and then update pointers in upper level 2640 * blocks that reference the block to point to the new location. 2641 * 2642 * if called by link_to_upper, the block has already been relocated. 2643 * in that case this function just updates pointers. 2644 */ 2645 static int do_relocation(struct btrfs_trans_handle *trans, 2646 struct reloc_control *rc, 2647 struct backref_node *node, 2648 struct btrfs_key *key, 2649 struct btrfs_path *path, int lowest) 2650 { 2651 struct backref_node *upper; 2652 struct backref_edge *edge; 2653 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2654 struct btrfs_root *root; 2655 struct extent_buffer *eb; 2656 u32 blocksize; 2657 u64 bytenr; 2658 u64 generation; 2659 int slot; 2660 int ret; 2661 int err = 0; 2662 2663 BUG_ON(lowest && node->eb); 2664 2665 path->lowest_level = node->level + 1; 2666 rc->backref_cache.path[node->level] = node; 2667 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2668 cond_resched(); 2669 2670 upper = edge->node[UPPER]; 2671 root = select_reloc_root(trans, rc, upper, edges); 2672 BUG_ON(!root); 2673 2674 if (upper->eb && !upper->locked) { 2675 if (!lowest) { 2676 ret = btrfs_bin_search(upper->eb, key, 2677 upper->level, &slot); 2678 BUG_ON(ret); 2679 bytenr = btrfs_node_blockptr(upper->eb, slot); 2680 if (node->eb->start == bytenr) 2681 goto next; 2682 } 2683 drop_node_buffer(upper); 2684 } 2685 2686 if (!upper->eb) { 2687 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2688 if (ret < 0) { 2689 err = ret; 2690 break; 2691 } 2692 BUG_ON(ret > 0); 2693 2694 if (!upper->eb) { 2695 upper->eb = path->nodes[upper->level]; 2696 path->nodes[upper->level] = NULL; 2697 } else { 2698 BUG_ON(upper->eb != path->nodes[upper->level]); 2699 } 2700 2701 upper->locked = 1; 2702 path->locks[upper->level] = 0; 2703 2704 slot = path->slots[upper->level]; 2705 btrfs_release_path(path); 2706 } else { 2707 ret = btrfs_bin_search(upper->eb, key, upper->level, 2708 &slot); 2709 BUG_ON(ret); 2710 } 2711 2712 bytenr = btrfs_node_blockptr(upper->eb, slot); 2713 if (lowest) { 2714 BUG_ON(bytenr != node->bytenr); 2715 } else { 2716 if (node->eb->start == bytenr) 2717 goto next; 2718 } 2719 2720 blocksize = root->nodesize; 2721 generation = btrfs_node_ptr_generation(upper->eb, slot); 2722 eb = read_tree_block(root, bytenr, generation); 2723 if (IS_ERR(eb)) { 2724 err = PTR_ERR(eb); 2725 goto next; 2726 } else if (!extent_buffer_uptodate(eb)) { 2727 free_extent_buffer(eb); 2728 err = -EIO; 2729 goto next; 2730 } 2731 btrfs_tree_lock(eb); 2732 btrfs_set_lock_blocking(eb); 2733 2734 if (!node->eb) { 2735 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2736 slot, &eb); 2737 btrfs_tree_unlock(eb); 2738 free_extent_buffer(eb); 2739 if (ret < 0) { 2740 err = ret; 2741 goto next; 2742 } 2743 BUG_ON(node->eb != eb); 2744 } else { 2745 btrfs_set_node_blockptr(upper->eb, slot, 2746 node->eb->start); 2747 btrfs_set_node_ptr_generation(upper->eb, slot, 2748 trans->transid); 2749 btrfs_mark_buffer_dirty(upper->eb); 2750 2751 ret = btrfs_inc_extent_ref(trans, root, 2752 node->eb->start, blocksize, 2753 upper->eb->start, 2754 btrfs_header_owner(upper->eb), 2755 node->level, 0); 2756 BUG_ON(ret); 2757 2758 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2759 BUG_ON(ret); 2760 } 2761 next: 2762 if (!upper->pending) 2763 drop_node_buffer(upper); 2764 else 2765 unlock_node_buffer(upper); 2766 if (err) 2767 break; 2768 } 2769 2770 if (!err && node->pending) { 2771 drop_node_buffer(node); 2772 list_move_tail(&node->list, &rc->backref_cache.changed); 2773 node->pending = 0; 2774 } 2775 2776 path->lowest_level = 0; 2777 BUG_ON(err == -ENOSPC); 2778 return err; 2779 } 2780 2781 static int link_to_upper(struct btrfs_trans_handle *trans, 2782 struct reloc_control *rc, 2783 struct backref_node *node, 2784 struct btrfs_path *path) 2785 { 2786 struct btrfs_key key; 2787 2788 btrfs_node_key_to_cpu(node->eb, &key, 0); 2789 return do_relocation(trans, rc, node, &key, path, 0); 2790 } 2791 2792 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2793 struct reloc_control *rc, 2794 struct btrfs_path *path, int err) 2795 { 2796 LIST_HEAD(list); 2797 struct backref_cache *cache = &rc->backref_cache; 2798 struct backref_node *node; 2799 int level; 2800 int ret; 2801 2802 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2803 while (!list_empty(&cache->pending[level])) { 2804 node = list_entry(cache->pending[level].next, 2805 struct backref_node, list); 2806 list_move_tail(&node->list, &list); 2807 BUG_ON(!node->pending); 2808 2809 if (!err) { 2810 ret = link_to_upper(trans, rc, node, path); 2811 if (ret < 0) 2812 err = ret; 2813 } 2814 } 2815 list_splice_init(&list, &cache->pending[level]); 2816 } 2817 return err; 2818 } 2819 2820 static void mark_block_processed(struct reloc_control *rc, 2821 u64 bytenr, u32 blocksize) 2822 { 2823 set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, 2824 EXTENT_DIRTY); 2825 } 2826 2827 static void __mark_block_processed(struct reloc_control *rc, 2828 struct backref_node *node) 2829 { 2830 u32 blocksize; 2831 if (node->level == 0 || 2832 in_block_group(node->bytenr, rc->block_group)) { 2833 blocksize = rc->extent_root->nodesize; 2834 mark_block_processed(rc, node->bytenr, blocksize); 2835 } 2836 node->processed = 1; 2837 } 2838 2839 /* 2840 * mark a block and all blocks directly/indirectly reference the block 2841 * as processed. 2842 */ 2843 static void update_processed_blocks(struct reloc_control *rc, 2844 struct backref_node *node) 2845 { 2846 struct backref_node *next = node; 2847 struct backref_edge *edge; 2848 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2849 int index = 0; 2850 2851 while (next) { 2852 cond_resched(); 2853 while (1) { 2854 if (next->processed) 2855 break; 2856 2857 __mark_block_processed(rc, next); 2858 2859 if (list_empty(&next->upper)) 2860 break; 2861 2862 edge = list_entry(next->upper.next, 2863 struct backref_edge, list[LOWER]); 2864 edges[index++] = edge; 2865 next = edge->node[UPPER]; 2866 } 2867 next = walk_down_backref(edges, &index); 2868 } 2869 } 2870 2871 static int tree_block_processed(u64 bytenr, struct reloc_control *rc) 2872 { 2873 u32 blocksize = rc->extent_root->nodesize; 2874 2875 if (test_range_bit(&rc->processed_blocks, bytenr, 2876 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2877 return 1; 2878 return 0; 2879 } 2880 2881 static int get_tree_block_key(struct reloc_control *rc, 2882 struct tree_block *block) 2883 { 2884 struct extent_buffer *eb; 2885 2886 BUG_ON(block->key_ready); 2887 eb = read_tree_block(rc->extent_root, block->bytenr, 2888 block->key.offset); 2889 if (IS_ERR(eb)) { 2890 return PTR_ERR(eb); 2891 } else if (!extent_buffer_uptodate(eb)) { 2892 free_extent_buffer(eb); 2893 return -EIO; 2894 } 2895 WARN_ON(btrfs_header_level(eb) != block->level); 2896 if (block->level == 0) 2897 btrfs_item_key_to_cpu(eb, &block->key, 0); 2898 else 2899 btrfs_node_key_to_cpu(eb, &block->key, 0); 2900 free_extent_buffer(eb); 2901 block->key_ready = 1; 2902 return 0; 2903 } 2904 2905 /* 2906 * helper function to relocate a tree block 2907 */ 2908 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2909 struct reloc_control *rc, 2910 struct backref_node *node, 2911 struct btrfs_key *key, 2912 struct btrfs_path *path) 2913 { 2914 struct btrfs_root *root; 2915 int ret = 0; 2916 2917 if (!node) 2918 return 0; 2919 2920 BUG_ON(node->processed); 2921 root = select_one_root(node); 2922 if (root == ERR_PTR(-ENOENT)) { 2923 update_processed_blocks(rc, node); 2924 goto out; 2925 } 2926 2927 if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { 2928 ret = reserve_metadata_space(trans, rc, node); 2929 if (ret) 2930 goto out; 2931 } 2932 2933 if (root) { 2934 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { 2935 BUG_ON(node->new_bytenr); 2936 BUG_ON(!list_empty(&node->list)); 2937 btrfs_record_root_in_trans(trans, root); 2938 root = root->reloc_root; 2939 node->new_bytenr = root->node->start; 2940 node->root = root; 2941 list_add_tail(&node->list, &rc->backref_cache.changed); 2942 } else { 2943 path->lowest_level = node->level; 2944 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2945 btrfs_release_path(path); 2946 if (ret > 0) 2947 ret = 0; 2948 } 2949 if (!ret) 2950 update_processed_blocks(rc, node); 2951 } else { 2952 ret = do_relocation(trans, rc, node, key, path, 1); 2953 } 2954 out: 2955 if (ret || node->level == 0 || node->cowonly) 2956 remove_backref_node(&rc->backref_cache, node); 2957 return ret; 2958 } 2959 2960 /* 2961 * relocate a list of blocks 2962 */ 2963 static noinline_for_stack 2964 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2965 struct reloc_control *rc, struct rb_root *blocks) 2966 { 2967 struct backref_node *node; 2968 struct btrfs_path *path; 2969 struct tree_block *block; 2970 struct rb_node *rb_node; 2971 int ret; 2972 int err = 0; 2973 2974 path = btrfs_alloc_path(); 2975 if (!path) { 2976 err = -ENOMEM; 2977 goto out_free_blocks; 2978 } 2979 2980 rb_node = rb_first(blocks); 2981 while (rb_node) { 2982 block = rb_entry(rb_node, struct tree_block, rb_node); 2983 if (!block->key_ready) 2984 readahead_tree_block(rc->extent_root, block->bytenr); 2985 rb_node = rb_next(rb_node); 2986 } 2987 2988 rb_node = rb_first(blocks); 2989 while (rb_node) { 2990 block = rb_entry(rb_node, struct tree_block, rb_node); 2991 if (!block->key_ready) { 2992 err = get_tree_block_key(rc, block); 2993 if (err) 2994 goto out_free_path; 2995 } 2996 rb_node = rb_next(rb_node); 2997 } 2998 2999 rb_node = rb_first(blocks); 3000 while (rb_node) { 3001 block = rb_entry(rb_node, struct tree_block, rb_node); 3002 3003 node = build_backref_tree(rc, &block->key, 3004 block->level, block->bytenr); 3005 if (IS_ERR(node)) { 3006 err = PTR_ERR(node); 3007 goto out; 3008 } 3009 3010 ret = relocate_tree_block(trans, rc, node, &block->key, 3011 path); 3012 if (ret < 0) { 3013 if (ret != -EAGAIN || rb_node == rb_first(blocks)) 3014 err = ret; 3015 goto out; 3016 } 3017 rb_node = rb_next(rb_node); 3018 } 3019 out: 3020 err = finish_pending_nodes(trans, rc, path, err); 3021 3022 out_free_path: 3023 btrfs_free_path(path); 3024 out_free_blocks: 3025 free_block_list(blocks); 3026 return err; 3027 } 3028 3029 static noinline_for_stack 3030 int prealloc_file_extent_cluster(struct inode *inode, 3031 struct file_extent_cluster *cluster) 3032 { 3033 u64 alloc_hint = 0; 3034 u64 start; 3035 u64 end; 3036 u64 offset = BTRFS_I(inode)->index_cnt; 3037 u64 num_bytes; 3038 int nr = 0; 3039 int ret = 0; 3040 3041 BUG_ON(cluster->start != cluster->boundary[0]); 3042 inode_lock(inode); 3043 3044 ret = btrfs_check_data_free_space(inode, cluster->start, 3045 cluster->end + 1 - cluster->start); 3046 if (ret) 3047 goto out; 3048 3049 while (nr < cluster->nr) { 3050 start = cluster->boundary[nr] - offset; 3051 if (nr + 1 < cluster->nr) 3052 end = cluster->boundary[nr + 1] - 1 - offset; 3053 else 3054 end = cluster->end - offset; 3055 3056 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 3057 num_bytes = end + 1 - start; 3058 ret = btrfs_prealloc_file_range(inode, 0, start, 3059 num_bytes, num_bytes, 3060 end + 1, &alloc_hint); 3061 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 3062 if (ret) 3063 break; 3064 nr++; 3065 } 3066 btrfs_free_reserved_data_space(inode, cluster->start, 3067 cluster->end + 1 - cluster->start); 3068 out: 3069 inode_unlock(inode); 3070 return ret; 3071 } 3072 3073 static noinline_for_stack 3074 int setup_extent_mapping(struct inode *inode, u64 start, u64 end, 3075 u64 block_start) 3076 { 3077 struct btrfs_root *root = BTRFS_I(inode)->root; 3078 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 3079 struct extent_map *em; 3080 int ret = 0; 3081 3082 em = alloc_extent_map(); 3083 if (!em) 3084 return -ENOMEM; 3085 3086 em->start = start; 3087 em->len = end + 1 - start; 3088 em->block_len = em->len; 3089 em->block_start = block_start; 3090 em->bdev = root->fs_info->fs_devices->latest_bdev; 3091 set_bit(EXTENT_FLAG_PINNED, &em->flags); 3092 3093 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 3094 while (1) { 3095 write_lock(&em_tree->lock); 3096 ret = add_extent_mapping(em_tree, em, 0); 3097 write_unlock(&em_tree->lock); 3098 if (ret != -EEXIST) { 3099 free_extent_map(em); 3100 break; 3101 } 3102 btrfs_drop_extent_cache(inode, start, end, 0); 3103 } 3104 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 3105 return ret; 3106 } 3107 3108 static int relocate_file_extent_cluster(struct inode *inode, 3109 struct file_extent_cluster *cluster) 3110 { 3111 u64 page_start; 3112 u64 page_end; 3113 u64 offset = BTRFS_I(inode)->index_cnt; 3114 unsigned long index; 3115 unsigned long last_index; 3116 struct page *page; 3117 struct file_ra_state *ra; 3118 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 3119 int nr = 0; 3120 int ret = 0; 3121 3122 if (!cluster->nr) 3123 return 0; 3124 3125 ra = kzalloc(sizeof(*ra), GFP_NOFS); 3126 if (!ra) 3127 return -ENOMEM; 3128 3129 ret = prealloc_file_extent_cluster(inode, cluster); 3130 if (ret) 3131 goto out; 3132 3133 file_ra_state_init(ra, inode->i_mapping); 3134 3135 ret = setup_extent_mapping(inode, cluster->start - offset, 3136 cluster->end - offset, cluster->start); 3137 if (ret) 3138 goto out; 3139 3140 index = (cluster->start - offset) >> PAGE_SHIFT; 3141 last_index = (cluster->end - offset) >> PAGE_SHIFT; 3142 while (index <= last_index) { 3143 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE); 3144 if (ret) 3145 goto out; 3146 3147 page = find_lock_page(inode->i_mapping, index); 3148 if (!page) { 3149 page_cache_sync_readahead(inode->i_mapping, 3150 ra, NULL, index, 3151 last_index + 1 - index); 3152 page = find_or_create_page(inode->i_mapping, index, 3153 mask); 3154 if (!page) { 3155 btrfs_delalloc_release_metadata(inode, 3156 PAGE_SIZE); 3157 ret = -ENOMEM; 3158 goto out; 3159 } 3160 } 3161 3162 if (PageReadahead(page)) { 3163 page_cache_async_readahead(inode->i_mapping, 3164 ra, NULL, page, index, 3165 last_index + 1 - index); 3166 } 3167 3168 if (!PageUptodate(page)) { 3169 btrfs_readpage(NULL, page); 3170 lock_page(page); 3171 if (!PageUptodate(page)) { 3172 unlock_page(page); 3173 put_page(page); 3174 btrfs_delalloc_release_metadata(inode, 3175 PAGE_SIZE); 3176 ret = -EIO; 3177 goto out; 3178 } 3179 } 3180 3181 page_start = page_offset(page); 3182 page_end = page_start + PAGE_SIZE - 1; 3183 3184 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); 3185 3186 set_page_extent_mapped(page); 3187 3188 if (nr < cluster->nr && 3189 page_start + offset == cluster->boundary[nr]) { 3190 set_extent_bits(&BTRFS_I(inode)->io_tree, 3191 page_start, page_end, 3192 EXTENT_BOUNDARY); 3193 nr++; 3194 } 3195 3196 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); 3197 set_page_dirty(page); 3198 3199 unlock_extent(&BTRFS_I(inode)->io_tree, 3200 page_start, page_end); 3201 unlock_page(page); 3202 put_page(page); 3203 3204 index++; 3205 balance_dirty_pages_ratelimited(inode->i_mapping); 3206 btrfs_throttle(BTRFS_I(inode)->root); 3207 } 3208 WARN_ON(nr != cluster->nr); 3209 out: 3210 kfree(ra); 3211 return ret; 3212 } 3213 3214 static noinline_for_stack 3215 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, 3216 struct file_extent_cluster *cluster) 3217 { 3218 int ret; 3219 3220 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { 3221 ret = relocate_file_extent_cluster(inode, cluster); 3222 if (ret) 3223 return ret; 3224 cluster->nr = 0; 3225 } 3226 3227 if (!cluster->nr) 3228 cluster->start = extent_key->objectid; 3229 else 3230 BUG_ON(cluster->nr >= MAX_EXTENTS); 3231 cluster->end = extent_key->objectid + extent_key->offset - 1; 3232 cluster->boundary[cluster->nr] = extent_key->objectid; 3233 cluster->nr++; 3234 3235 if (cluster->nr >= MAX_EXTENTS) { 3236 ret = relocate_file_extent_cluster(inode, cluster); 3237 if (ret) 3238 return ret; 3239 cluster->nr = 0; 3240 } 3241 return 0; 3242 } 3243 3244 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3245 static int get_ref_objectid_v0(struct reloc_control *rc, 3246 struct btrfs_path *path, 3247 struct btrfs_key *extent_key, 3248 u64 *ref_objectid, int *path_change) 3249 { 3250 struct btrfs_key key; 3251 struct extent_buffer *leaf; 3252 struct btrfs_extent_ref_v0 *ref0; 3253 int ret; 3254 int slot; 3255 3256 leaf = path->nodes[0]; 3257 slot = path->slots[0]; 3258 while (1) { 3259 if (slot >= btrfs_header_nritems(leaf)) { 3260 ret = btrfs_next_leaf(rc->extent_root, path); 3261 if (ret < 0) 3262 return ret; 3263 BUG_ON(ret > 0); 3264 leaf = path->nodes[0]; 3265 slot = path->slots[0]; 3266 if (path_change) 3267 *path_change = 1; 3268 } 3269 btrfs_item_key_to_cpu(leaf, &key, slot); 3270 if (key.objectid != extent_key->objectid) 3271 return -ENOENT; 3272 3273 if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 3274 slot++; 3275 continue; 3276 } 3277 ref0 = btrfs_item_ptr(leaf, slot, 3278 struct btrfs_extent_ref_v0); 3279 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 3280 break; 3281 } 3282 return 0; 3283 } 3284 #endif 3285 3286 /* 3287 * helper to add a tree block to the list. 3288 * the major work is getting the generation and level of the block 3289 */ 3290 static int add_tree_block(struct reloc_control *rc, 3291 struct btrfs_key *extent_key, 3292 struct btrfs_path *path, 3293 struct rb_root *blocks) 3294 { 3295 struct extent_buffer *eb; 3296 struct btrfs_extent_item *ei; 3297 struct btrfs_tree_block_info *bi; 3298 struct tree_block *block; 3299 struct rb_node *rb_node; 3300 u32 item_size; 3301 int level = -1; 3302 u64 generation; 3303 3304 eb = path->nodes[0]; 3305 item_size = btrfs_item_size_nr(eb, path->slots[0]); 3306 3307 if (extent_key->type == BTRFS_METADATA_ITEM_KEY || 3308 item_size >= sizeof(*ei) + sizeof(*bi)) { 3309 ei = btrfs_item_ptr(eb, path->slots[0], 3310 struct btrfs_extent_item); 3311 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) { 3312 bi = (struct btrfs_tree_block_info *)(ei + 1); 3313 level = btrfs_tree_block_level(eb, bi); 3314 } else { 3315 level = (int)extent_key->offset; 3316 } 3317 generation = btrfs_extent_generation(eb, ei); 3318 } else { 3319 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3320 u64 ref_owner; 3321 int ret; 3322 3323 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3324 ret = get_ref_objectid_v0(rc, path, extent_key, 3325 &ref_owner, NULL); 3326 if (ret < 0) 3327 return ret; 3328 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3329 level = (int)ref_owner; 3330 /* FIXME: get real generation */ 3331 generation = 0; 3332 #else 3333 BUG(); 3334 #endif 3335 } 3336 3337 btrfs_release_path(path); 3338 3339 BUG_ON(level == -1); 3340 3341 block = kmalloc(sizeof(*block), GFP_NOFS); 3342 if (!block) 3343 return -ENOMEM; 3344 3345 block->bytenr = extent_key->objectid; 3346 block->key.objectid = rc->extent_root->nodesize; 3347 block->key.offset = generation; 3348 block->level = level; 3349 block->key_ready = 0; 3350 3351 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 3352 if (rb_node) 3353 backref_tree_panic(rb_node, -EEXIST, block->bytenr); 3354 3355 return 0; 3356 } 3357 3358 /* 3359 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 3360 */ 3361 static int __add_tree_block(struct reloc_control *rc, 3362 u64 bytenr, u32 blocksize, 3363 struct rb_root *blocks) 3364 { 3365 struct btrfs_path *path; 3366 struct btrfs_key key; 3367 int ret; 3368 bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, 3369 SKINNY_METADATA); 3370 3371 if (tree_block_processed(bytenr, rc)) 3372 return 0; 3373 3374 if (tree_search(blocks, bytenr)) 3375 return 0; 3376 3377 path = btrfs_alloc_path(); 3378 if (!path) 3379 return -ENOMEM; 3380 again: 3381 key.objectid = bytenr; 3382 if (skinny) { 3383 key.type = BTRFS_METADATA_ITEM_KEY; 3384 key.offset = (u64)-1; 3385 } else { 3386 key.type = BTRFS_EXTENT_ITEM_KEY; 3387 key.offset = blocksize; 3388 } 3389 3390 path->search_commit_root = 1; 3391 path->skip_locking = 1; 3392 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3393 if (ret < 0) 3394 goto out; 3395 3396 if (ret > 0 && skinny) { 3397 if (path->slots[0]) { 3398 path->slots[0]--; 3399 btrfs_item_key_to_cpu(path->nodes[0], &key, 3400 path->slots[0]); 3401 if (key.objectid == bytenr && 3402 (key.type == BTRFS_METADATA_ITEM_KEY || 3403 (key.type == BTRFS_EXTENT_ITEM_KEY && 3404 key.offset == blocksize))) 3405 ret = 0; 3406 } 3407 3408 if (ret) { 3409 skinny = false; 3410 btrfs_release_path(path); 3411 goto again; 3412 } 3413 } 3414 BUG_ON(ret); 3415 3416 ret = add_tree_block(rc, &key, path, blocks); 3417 out: 3418 btrfs_free_path(path); 3419 return ret; 3420 } 3421 3422 /* 3423 * helper to check if the block use full backrefs for pointers in it 3424 */ 3425 static int block_use_full_backref(struct reloc_control *rc, 3426 struct extent_buffer *eb) 3427 { 3428 u64 flags; 3429 int ret; 3430 3431 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || 3432 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) 3433 return 1; 3434 3435 ret = btrfs_lookup_extent_info(NULL, rc->extent_root, 3436 eb->start, btrfs_header_level(eb), 1, 3437 NULL, &flags); 3438 BUG_ON(ret); 3439 3440 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 3441 ret = 1; 3442 else 3443 ret = 0; 3444 return ret; 3445 } 3446 3447 static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3448 struct btrfs_block_group_cache *block_group, 3449 struct inode *inode, 3450 u64 ino) 3451 { 3452 struct btrfs_key key; 3453 struct btrfs_root *root = fs_info->tree_root; 3454 struct btrfs_trans_handle *trans; 3455 int ret = 0; 3456 3457 if (inode) 3458 goto truncate; 3459 3460 key.objectid = ino; 3461 key.type = BTRFS_INODE_ITEM_KEY; 3462 key.offset = 0; 3463 3464 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3465 if (IS_ERR(inode) || is_bad_inode(inode)) { 3466 if (!IS_ERR(inode)) 3467 iput(inode); 3468 return -ENOENT; 3469 } 3470 3471 truncate: 3472 ret = btrfs_check_trunc_cache_free_space(root, 3473 &fs_info->global_block_rsv); 3474 if (ret) 3475 goto out; 3476 3477 trans = btrfs_join_transaction(root); 3478 if (IS_ERR(trans)) { 3479 ret = PTR_ERR(trans); 3480 goto out; 3481 } 3482 3483 ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode); 3484 3485 btrfs_end_transaction(trans, root); 3486 btrfs_btree_balance_dirty(root); 3487 out: 3488 iput(inode); 3489 return ret; 3490 } 3491 3492 /* 3493 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3494 * this function scans fs tree to find blocks reference the data extent 3495 */ 3496 static int find_data_references(struct reloc_control *rc, 3497 struct btrfs_key *extent_key, 3498 struct extent_buffer *leaf, 3499 struct btrfs_extent_data_ref *ref, 3500 struct rb_root *blocks) 3501 { 3502 struct btrfs_path *path; 3503 struct tree_block *block; 3504 struct btrfs_root *root; 3505 struct btrfs_file_extent_item *fi; 3506 struct rb_node *rb_node; 3507 struct btrfs_key key; 3508 u64 ref_root; 3509 u64 ref_objectid; 3510 u64 ref_offset; 3511 u32 ref_count; 3512 u32 nritems; 3513 int err = 0; 3514 int added = 0; 3515 int counted; 3516 int ret; 3517 3518 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3519 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3520 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3521 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3522 3523 /* 3524 * This is an extent belonging to the free space cache, lets just delete 3525 * it and redo the search. 3526 */ 3527 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { 3528 ret = delete_block_group_cache(rc->extent_root->fs_info, 3529 rc->block_group, 3530 NULL, ref_objectid); 3531 if (ret != -ENOENT) 3532 return ret; 3533 ret = 0; 3534 } 3535 3536 path = btrfs_alloc_path(); 3537 if (!path) 3538 return -ENOMEM; 3539 path->reada = READA_FORWARD; 3540 3541 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3542 if (IS_ERR(root)) { 3543 err = PTR_ERR(root); 3544 goto out; 3545 } 3546 3547 key.objectid = ref_objectid; 3548 key.type = BTRFS_EXTENT_DATA_KEY; 3549 if (ref_offset > ((u64)-1 << 32)) 3550 key.offset = 0; 3551 else 3552 key.offset = ref_offset; 3553 3554 path->search_commit_root = 1; 3555 path->skip_locking = 1; 3556 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3557 if (ret < 0) { 3558 err = ret; 3559 goto out; 3560 } 3561 3562 leaf = path->nodes[0]; 3563 nritems = btrfs_header_nritems(leaf); 3564 /* 3565 * the references in tree blocks that use full backrefs 3566 * are not counted in 3567 */ 3568 if (block_use_full_backref(rc, leaf)) 3569 counted = 0; 3570 else 3571 counted = 1; 3572 rb_node = tree_search(blocks, leaf->start); 3573 if (rb_node) { 3574 if (counted) 3575 added = 1; 3576 else 3577 path->slots[0] = nritems; 3578 } 3579 3580 while (ref_count > 0) { 3581 while (path->slots[0] >= nritems) { 3582 ret = btrfs_next_leaf(root, path); 3583 if (ret < 0) { 3584 err = ret; 3585 goto out; 3586 } 3587 if (WARN_ON(ret > 0)) 3588 goto out; 3589 3590 leaf = path->nodes[0]; 3591 nritems = btrfs_header_nritems(leaf); 3592 added = 0; 3593 3594 if (block_use_full_backref(rc, leaf)) 3595 counted = 0; 3596 else 3597 counted = 1; 3598 rb_node = tree_search(blocks, leaf->start); 3599 if (rb_node) { 3600 if (counted) 3601 added = 1; 3602 else 3603 path->slots[0] = nritems; 3604 } 3605 } 3606 3607 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3608 if (WARN_ON(key.objectid != ref_objectid || 3609 key.type != BTRFS_EXTENT_DATA_KEY)) 3610 break; 3611 3612 fi = btrfs_item_ptr(leaf, path->slots[0], 3613 struct btrfs_file_extent_item); 3614 3615 if (btrfs_file_extent_type(leaf, fi) == 3616 BTRFS_FILE_EXTENT_INLINE) 3617 goto next; 3618 3619 if (btrfs_file_extent_disk_bytenr(leaf, fi) != 3620 extent_key->objectid) 3621 goto next; 3622 3623 key.offset -= btrfs_file_extent_offset(leaf, fi); 3624 if (key.offset != ref_offset) 3625 goto next; 3626 3627 if (counted) 3628 ref_count--; 3629 if (added) 3630 goto next; 3631 3632 if (!tree_block_processed(leaf->start, rc)) { 3633 block = kmalloc(sizeof(*block), GFP_NOFS); 3634 if (!block) { 3635 err = -ENOMEM; 3636 break; 3637 } 3638 block->bytenr = leaf->start; 3639 btrfs_item_key_to_cpu(leaf, &block->key, 0); 3640 block->level = 0; 3641 block->key_ready = 1; 3642 rb_node = tree_insert(blocks, block->bytenr, 3643 &block->rb_node); 3644 if (rb_node) 3645 backref_tree_panic(rb_node, -EEXIST, 3646 block->bytenr); 3647 } 3648 if (counted) 3649 added = 1; 3650 else 3651 path->slots[0] = nritems; 3652 next: 3653 path->slots[0]++; 3654 3655 } 3656 out: 3657 btrfs_free_path(path); 3658 return err; 3659 } 3660 3661 /* 3662 * helper to find all tree blocks that reference a given data extent 3663 */ 3664 static noinline_for_stack 3665 int add_data_references(struct reloc_control *rc, 3666 struct btrfs_key *extent_key, 3667 struct btrfs_path *path, 3668 struct rb_root *blocks) 3669 { 3670 struct btrfs_key key; 3671 struct extent_buffer *eb; 3672 struct btrfs_extent_data_ref *dref; 3673 struct btrfs_extent_inline_ref *iref; 3674 unsigned long ptr; 3675 unsigned long end; 3676 u32 blocksize = rc->extent_root->nodesize; 3677 int ret = 0; 3678 int err = 0; 3679 3680 eb = path->nodes[0]; 3681 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3682 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3683 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3684 if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3685 ptr = end; 3686 else 3687 #endif 3688 ptr += sizeof(struct btrfs_extent_item); 3689 3690 while (ptr < end) { 3691 iref = (struct btrfs_extent_inline_ref *)ptr; 3692 key.type = btrfs_extent_inline_ref_type(eb, iref); 3693 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3694 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3695 ret = __add_tree_block(rc, key.offset, blocksize, 3696 blocks); 3697 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3698 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 3699 ret = find_data_references(rc, extent_key, 3700 eb, dref, blocks); 3701 } else { 3702 BUG(); 3703 } 3704 if (ret) { 3705 err = ret; 3706 goto out; 3707 } 3708 ptr += btrfs_extent_inline_ref_size(key.type); 3709 } 3710 WARN_ON(ptr > end); 3711 3712 while (1) { 3713 cond_resched(); 3714 eb = path->nodes[0]; 3715 if (path->slots[0] >= btrfs_header_nritems(eb)) { 3716 ret = btrfs_next_leaf(rc->extent_root, path); 3717 if (ret < 0) { 3718 err = ret; 3719 break; 3720 } 3721 if (ret > 0) 3722 break; 3723 eb = path->nodes[0]; 3724 } 3725 3726 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 3727 if (key.objectid != extent_key->objectid) 3728 break; 3729 3730 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3731 if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3732 key.type == BTRFS_EXTENT_REF_V0_KEY) { 3733 #else 3734 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3735 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3736 #endif 3737 ret = __add_tree_block(rc, key.offset, blocksize, 3738 blocks); 3739 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3740 dref = btrfs_item_ptr(eb, path->slots[0], 3741 struct btrfs_extent_data_ref); 3742 ret = find_data_references(rc, extent_key, 3743 eb, dref, blocks); 3744 } else { 3745 ret = 0; 3746 } 3747 if (ret) { 3748 err = ret; 3749 break; 3750 } 3751 path->slots[0]++; 3752 } 3753 out: 3754 btrfs_release_path(path); 3755 if (err) 3756 free_block_list(blocks); 3757 return err; 3758 } 3759 3760 /* 3761 * helper to find next unprocessed extent 3762 */ 3763 static noinline_for_stack 3764 int find_next_extent(struct reloc_control *rc, struct btrfs_path *path, 3765 struct btrfs_key *extent_key) 3766 { 3767 struct btrfs_key key; 3768 struct extent_buffer *leaf; 3769 u64 start, end, last; 3770 int ret; 3771 3772 last = rc->block_group->key.objectid + rc->block_group->key.offset; 3773 while (1) { 3774 cond_resched(); 3775 if (rc->search_start >= last) { 3776 ret = 1; 3777 break; 3778 } 3779 3780 key.objectid = rc->search_start; 3781 key.type = BTRFS_EXTENT_ITEM_KEY; 3782 key.offset = 0; 3783 3784 path->search_commit_root = 1; 3785 path->skip_locking = 1; 3786 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3787 0, 0); 3788 if (ret < 0) 3789 break; 3790 next: 3791 leaf = path->nodes[0]; 3792 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3793 ret = btrfs_next_leaf(rc->extent_root, path); 3794 if (ret != 0) 3795 break; 3796 leaf = path->nodes[0]; 3797 } 3798 3799 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3800 if (key.objectid >= last) { 3801 ret = 1; 3802 break; 3803 } 3804 3805 if (key.type != BTRFS_EXTENT_ITEM_KEY && 3806 key.type != BTRFS_METADATA_ITEM_KEY) { 3807 path->slots[0]++; 3808 goto next; 3809 } 3810 3811 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3812 key.objectid + key.offset <= rc->search_start) { 3813 path->slots[0]++; 3814 goto next; 3815 } 3816 3817 if (key.type == BTRFS_METADATA_ITEM_KEY && 3818 key.objectid + rc->extent_root->nodesize <= 3819 rc->search_start) { 3820 path->slots[0]++; 3821 goto next; 3822 } 3823 3824 ret = find_first_extent_bit(&rc->processed_blocks, 3825 key.objectid, &start, &end, 3826 EXTENT_DIRTY, NULL); 3827 3828 if (ret == 0 && start <= key.objectid) { 3829 btrfs_release_path(path); 3830 rc->search_start = end + 1; 3831 } else { 3832 if (key.type == BTRFS_EXTENT_ITEM_KEY) 3833 rc->search_start = key.objectid + key.offset; 3834 else 3835 rc->search_start = key.objectid + 3836 rc->extent_root->nodesize; 3837 memcpy(extent_key, &key, sizeof(key)); 3838 return 0; 3839 } 3840 } 3841 btrfs_release_path(path); 3842 return ret; 3843 } 3844 3845 static void set_reloc_control(struct reloc_control *rc) 3846 { 3847 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3848 3849 mutex_lock(&fs_info->reloc_mutex); 3850 fs_info->reloc_ctl = rc; 3851 mutex_unlock(&fs_info->reloc_mutex); 3852 } 3853 3854 static void unset_reloc_control(struct reloc_control *rc) 3855 { 3856 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3857 3858 mutex_lock(&fs_info->reloc_mutex); 3859 fs_info->reloc_ctl = NULL; 3860 mutex_unlock(&fs_info->reloc_mutex); 3861 } 3862 3863 static int check_extent_flags(u64 flags) 3864 { 3865 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3866 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3867 return 1; 3868 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3869 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3870 return 1; 3871 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3872 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3873 return 1; 3874 return 0; 3875 } 3876 3877 static noinline_for_stack 3878 int prepare_to_relocate(struct reloc_control *rc) 3879 { 3880 struct btrfs_trans_handle *trans; 3881 int ret; 3882 3883 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, 3884 BTRFS_BLOCK_RSV_TEMP); 3885 if (!rc->block_rsv) 3886 return -ENOMEM; 3887 3888 memset(&rc->cluster, 0, sizeof(rc->cluster)); 3889 rc->search_start = rc->block_group->key.objectid; 3890 rc->extents_found = 0; 3891 rc->nodes_relocated = 0; 3892 rc->merging_rsv_size = 0; 3893 rc->reserved_bytes = 0; 3894 rc->block_rsv->size = rc->extent_root->nodesize * 3895 RELOCATION_RESERVED_NODES; 3896 ret = btrfs_block_rsv_refill(rc->extent_root, 3897 rc->block_rsv, rc->block_rsv->size, 3898 BTRFS_RESERVE_FLUSH_ALL); 3899 if (ret) 3900 return ret; 3901 3902 rc->create_reloc_tree = 1; 3903 set_reloc_control(rc); 3904 3905 trans = btrfs_join_transaction(rc->extent_root); 3906 if (IS_ERR(trans)) { 3907 unset_reloc_control(rc); 3908 /* 3909 * extent tree is not a ref_cow tree and has no reloc_root to 3910 * cleanup. And callers are responsible to free the above 3911 * block rsv. 3912 */ 3913 return PTR_ERR(trans); 3914 } 3915 btrfs_commit_transaction(trans, rc->extent_root); 3916 return 0; 3917 } 3918 3919 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3920 { 3921 struct rb_root blocks = RB_ROOT; 3922 struct btrfs_key key; 3923 struct btrfs_trans_handle *trans = NULL; 3924 struct btrfs_path *path; 3925 struct btrfs_extent_item *ei; 3926 u64 flags; 3927 u32 item_size; 3928 int ret; 3929 int err = 0; 3930 int progress = 0; 3931 3932 path = btrfs_alloc_path(); 3933 if (!path) 3934 return -ENOMEM; 3935 path->reada = READA_FORWARD; 3936 3937 ret = prepare_to_relocate(rc); 3938 if (ret) { 3939 err = ret; 3940 goto out_free; 3941 } 3942 3943 while (1) { 3944 rc->reserved_bytes = 0; 3945 ret = btrfs_block_rsv_refill(rc->extent_root, 3946 rc->block_rsv, rc->block_rsv->size, 3947 BTRFS_RESERVE_FLUSH_ALL); 3948 if (ret) { 3949 err = ret; 3950 break; 3951 } 3952 progress++; 3953 trans = btrfs_start_transaction(rc->extent_root, 0); 3954 if (IS_ERR(trans)) { 3955 err = PTR_ERR(trans); 3956 trans = NULL; 3957 break; 3958 } 3959 restart: 3960 if (update_backref_cache(trans, &rc->backref_cache)) { 3961 btrfs_end_transaction(trans, rc->extent_root); 3962 continue; 3963 } 3964 3965 ret = find_next_extent(rc, path, &key); 3966 if (ret < 0) 3967 err = ret; 3968 if (ret != 0) 3969 break; 3970 3971 rc->extents_found++; 3972 3973 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3974 struct btrfs_extent_item); 3975 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 3976 if (item_size >= sizeof(*ei)) { 3977 flags = btrfs_extent_flags(path->nodes[0], ei); 3978 ret = check_extent_flags(flags); 3979 BUG_ON(ret); 3980 3981 } else { 3982 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3983 u64 ref_owner; 3984 int path_change = 0; 3985 3986 BUG_ON(item_size != 3987 sizeof(struct btrfs_extent_item_v0)); 3988 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 3989 &path_change); 3990 if (ret < 0) { 3991 err = ret; 3992 break; 3993 } 3994 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 3995 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 3996 else 3997 flags = BTRFS_EXTENT_FLAG_DATA; 3998 3999 if (path_change) { 4000 btrfs_release_path(path); 4001 4002 path->search_commit_root = 1; 4003 path->skip_locking = 1; 4004 ret = btrfs_search_slot(NULL, rc->extent_root, 4005 &key, path, 0, 0); 4006 if (ret < 0) { 4007 err = ret; 4008 break; 4009 } 4010 BUG_ON(ret > 0); 4011 } 4012 #else 4013 BUG(); 4014 #endif 4015 } 4016 4017 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 4018 ret = add_tree_block(rc, &key, path, &blocks); 4019 } else if (rc->stage == UPDATE_DATA_PTRS && 4020 (flags & BTRFS_EXTENT_FLAG_DATA)) { 4021 ret = add_data_references(rc, &key, path, &blocks); 4022 } else { 4023 btrfs_release_path(path); 4024 ret = 0; 4025 } 4026 if (ret < 0) { 4027 err = ret; 4028 break; 4029 } 4030 4031 if (!RB_EMPTY_ROOT(&blocks)) { 4032 ret = relocate_tree_blocks(trans, rc, &blocks); 4033 if (ret < 0) { 4034 /* 4035 * if we fail to relocate tree blocks, force to update 4036 * backref cache when committing transaction. 4037 */ 4038 rc->backref_cache.last_trans = trans->transid - 1; 4039 4040 if (ret != -EAGAIN) { 4041 err = ret; 4042 break; 4043 } 4044 rc->extents_found--; 4045 rc->search_start = key.objectid; 4046 } 4047 } 4048 4049 btrfs_end_transaction_throttle(trans, rc->extent_root); 4050 btrfs_btree_balance_dirty(rc->extent_root); 4051 trans = NULL; 4052 4053 if (rc->stage == MOVE_DATA_EXTENTS && 4054 (flags & BTRFS_EXTENT_FLAG_DATA)) { 4055 rc->found_file_extent = 1; 4056 ret = relocate_data_extent(rc->data_inode, 4057 &key, &rc->cluster); 4058 if (ret < 0) { 4059 err = ret; 4060 break; 4061 } 4062 } 4063 } 4064 if (trans && progress && err == -ENOSPC) { 4065 ret = btrfs_force_chunk_alloc(trans, rc->extent_root, 4066 rc->block_group->flags); 4067 if (ret == 1) { 4068 err = 0; 4069 progress = 0; 4070 goto restart; 4071 } 4072 } 4073 4074 btrfs_release_path(path); 4075 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY); 4076 4077 if (trans) { 4078 btrfs_end_transaction_throttle(trans, rc->extent_root); 4079 btrfs_btree_balance_dirty(rc->extent_root); 4080 } 4081 4082 if (!err) { 4083 ret = relocate_file_extent_cluster(rc->data_inode, 4084 &rc->cluster); 4085 if (ret < 0) 4086 err = ret; 4087 } 4088 4089 rc->create_reloc_tree = 0; 4090 set_reloc_control(rc); 4091 4092 backref_cache_cleanup(&rc->backref_cache); 4093 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 4094 4095 err = prepare_to_merge(rc, err); 4096 4097 merge_reloc_roots(rc); 4098 4099 rc->merge_reloc_tree = 0; 4100 unset_reloc_control(rc); 4101 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 4102 4103 /* get rid of pinned extents */ 4104 trans = btrfs_join_transaction(rc->extent_root); 4105 if (IS_ERR(trans)) 4106 err = PTR_ERR(trans); 4107 else 4108 btrfs_commit_transaction(trans, rc->extent_root); 4109 out_free: 4110 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 4111 btrfs_free_path(path); 4112 return err; 4113 } 4114 4115 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 4116 struct btrfs_root *root, u64 objectid) 4117 { 4118 struct btrfs_path *path; 4119 struct btrfs_inode_item *item; 4120 struct extent_buffer *leaf; 4121 int ret; 4122 4123 path = btrfs_alloc_path(); 4124 if (!path) 4125 return -ENOMEM; 4126 4127 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 4128 if (ret) 4129 goto out; 4130 4131 leaf = path->nodes[0]; 4132 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 4133 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 4134 btrfs_set_inode_generation(leaf, item, 1); 4135 btrfs_set_inode_size(leaf, item, 0); 4136 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 4137 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 4138 BTRFS_INODE_PREALLOC); 4139 btrfs_mark_buffer_dirty(leaf); 4140 out: 4141 btrfs_free_path(path); 4142 return ret; 4143 } 4144 4145 /* 4146 * helper to create inode for data relocation. 4147 * the inode is in data relocation tree and its link count is 0 4148 */ 4149 static noinline_for_stack 4150 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 4151 struct btrfs_block_group_cache *group) 4152 { 4153 struct inode *inode = NULL; 4154 struct btrfs_trans_handle *trans; 4155 struct btrfs_root *root; 4156 struct btrfs_key key; 4157 u64 objectid; 4158 int err = 0; 4159 4160 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 4161 if (IS_ERR(root)) 4162 return ERR_CAST(root); 4163 4164 trans = btrfs_start_transaction(root, 6); 4165 if (IS_ERR(trans)) 4166 return ERR_CAST(trans); 4167 4168 err = btrfs_find_free_objectid(root, &objectid); 4169 if (err) 4170 goto out; 4171 4172 err = __insert_orphan_inode(trans, root, objectid); 4173 BUG_ON(err); 4174 4175 key.objectid = objectid; 4176 key.type = BTRFS_INODE_ITEM_KEY; 4177 key.offset = 0; 4178 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 4179 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 4180 BTRFS_I(inode)->index_cnt = group->key.objectid; 4181 4182 err = btrfs_orphan_add(trans, inode); 4183 out: 4184 btrfs_end_transaction(trans, root); 4185 btrfs_btree_balance_dirty(root); 4186 if (err) { 4187 if (inode) 4188 iput(inode); 4189 inode = ERR_PTR(err); 4190 } 4191 return inode; 4192 } 4193 4194 static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) 4195 { 4196 struct reloc_control *rc; 4197 4198 rc = kzalloc(sizeof(*rc), GFP_NOFS); 4199 if (!rc) 4200 return NULL; 4201 4202 INIT_LIST_HEAD(&rc->reloc_roots); 4203 backref_cache_init(&rc->backref_cache); 4204 mapping_tree_init(&rc->reloc_root_tree); 4205 extent_io_tree_init(&rc->processed_blocks, 4206 fs_info->btree_inode->i_mapping); 4207 return rc; 4208 } 4209 4210 /* 4211 * function to relocate all extents in a block group. 4212 */ 4213 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) 4214 { 4215 struct btrfs_fs_info *fs_info = extent_root->fs_info; 4216 struct reloc_control *rc; 4217 struct inode *inode; 4218 struct btrfs_path *path; 4219 int ret; 4220 int rw = 0; 4221 int err = 0; 4222 4223 rc = alloc_reloc_control(fs_info); 4224 if (!rc) 4225 return -ENOMEM; 4226 4227 rc->extent_root = extent_root; 4228 4229 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 4230 BUG_ON(!rc->block_group); 4231 4232 ret = btrfs_inc_block_group_ro(extent_root, rc->block_group); 4233 if (ret) { 4234 err = ret; 4235 goto out; 4236 } 4237 rw = 1; 4238 4239 path = btrfs_alloc_path(); 4240 if (!path) { 4241 err = -ENOMEM; 4242 goto out; 4243 } 4244 4245 inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, 4246 path); 4247 btrfs_free_path(path); 4248 4249 if (!IS_ERR(inode)) 4250 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0); 4251 else 4252 ret = PTR_ERR(inode); 4253 4254 if (ret && ret != -ENOENT) { 4255 err = ret; 4256 goto out; 4257 } 4258 4259 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 4260 if (IS_ERR(rc->data_inode)) { 4261 err = PTR_ERR(rc->data_inode); 4262 rc->data_inode = NULL; 4263 goto out; 4264 } 4265 4266 btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", 4267 rc->block_group->key.objectid, rc->block_group->flags); 4268 4269 btrfs_wait_block_group_reservations(rc->block_group); 4270 btrfs_wait_nocow_writers(rc->block_group); 4271 btrfs_wait_ordered_roots(fs_info, -1, 4272 rc->block_group->key.objectid, 4273 rc->block_group->key.offset); 4274 4275 while (1) { 4276 mutex_lock(&fs_info->cleaner_mutex); 4277 ret = relocate_block_group(rc); 4278 mutex_unlock(&fs_info->cleaner_mutex); 4279 if (ret < 0) { 4280 err = ret; 4281 goto out; 4282 } 4283 4284 if (rc->extents_found == 0) 4285 break; 4286 4287 btrfs_info(extent_root->fs_info, "found %llu extents", 4288 rc->extents_found); 4289 4290 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4291 ret = btrfs_wait_ordered_range(rc->data_inode, 0, 4292 (u64)-1); 4293 if (ret) { 4294 err = ret; 4295 goto out; 4296 } 4297 invalidate_mapping_pages(rc->data_inode->i_mapping, 4298 0, -1); 4299 rc->stage = UPDATE_DATA_PTRS; 4300 } 4301 } 4302 4303 WARN_ON(rc->block_group->pinned > 0); 4304 WARN_ON(rc->block_group->reserved > 0); 4305 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 4306 out: 4307 if (err && rw) 4308 btrfs_dec_block_group_ro(extent_root, rc->block_group); 4309 iput(rc->data_inode); 4310 btrfs_put_block_group(rc->block_group); 4311 kfree(rc); 4312 return err; 4313 } 4314 4315 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 4316 { 4317 struct btrfs_trans_handle *trans; 4318 int ret, err; 4319 4320 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4321 if (IS_ERR(trans)) 4322 return PTR_ERR(trans); 4323 4324 memset(&root->root_item.drop_progress, 0, 4325 sizeof(root->root_item.drop_progress)); 4326 root->root_item.drop_level = 0; 4327 btrfs_set_root_refs(&root->root_item, 0); 4328 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4329 &root->root_key, &root->root_item); 4330 4331 err = btrfs_end_transaction(trans, root->fs_info->tree_root); 4332 if (err) 4333 return err; 4334 return ret; 4335 } 4336 4337 /* 4338 * recover relocation interrupted by system crash. 4339 * 4340 * this function resumes merging reloc trees with corresponding fs trees. 4341 * this is important for keeping the sharing of tree blocks 4342 */ 4343 int btrfs_recover_relocation(struct btrfs_root *root) 4344 { 4345 LIST_HEAD(reloc_roots); 4346 struct btrfs_key key; 4347 struct btrfs_root *fs_root; 4348 struct btrfs_root *reloc_root; 4349 struct btrfs_path *path; 4350 struct extent_buffer *leaf; 4351 struct reloc_control *rc = NULL; 4352 struct btrfs_trans_handle *trans; 4353 int ret; 4354 int err = 0; 4355 4356 path = btrfs_alloc_path(); 4357 if (!path) 4358 return -ENOMEM; 4359 path->reada = READA_BACK; 4360 4361 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 4362 key.type = BTRFS_ROOT_ITEM_KEY; 4363 key.offset = (u64)-1; 4364 4365 while (1) { 4366 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, 4367 path, 0, 0); 4368 if (ret < 0) { 4369 err = ret; 4370 goto out; 4371 } 4372 if (ret > 0) { 4373 if (path->slots[0] == 0) 4374 break; 4375 path->slots[0]--; 4376 } 4377 leaf = path->nodes[0]; 4378 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4379 btrfs_release_path(path); 4380 4381 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 4382 key.type != BTRFS_ROOT_ITEM_KEY) 4383 break; 4384 4385 reloc_root = btrfs_read_fs_root(root, &key); 4386 if (IS_ERR(reloc_root)) { 4387 err = PTR_ERR(reloc_root); 4388 goto out; 4389 } 4390 4391 list_add(&reloc_root->root_list, &reloc_roots); 4392 4393 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 4394 fs_root = read_fs_root(root->fs_info, 4395 reloc_root->root_key.offset); 4396 if (IS_ERR(fs_root)) { 4397 ret = PTR_ERR(fs_root); 4398 if (ret != -ENOENT) { 4399 err = ret; 4400 goto out; 4401 } 4402 ret = mark_garbage_root(reloc_root); 4403 if (ret < 0) { 4404 err = ret; 4405 goto out; 4406 } 4407 } 4408 } 4409 4410 if (key.offset == 0) 4411 break; 4412 4413 key.offset--; 4414 } 4415 btrfs_release_path(path); 4416 4417 if (list_empty(&reloc_roots)) 4418 goto out; 4419 4420 rc = alloc_reloc_control(root->fs_info); 4421 if (!rc) { 4422 err = -ENOMEM; 4423 goto out; 4424 } 4425 4426 rc->extent_root = root->fs_info->extent_root; 4427 4428 set_reloc_control(rc); 4429 4430 trans = btrfs_join_transaction(rc->extent_root); 4431 if (IS_ERR(trans)) { 4432 unset_reloc_control(rc); 4433 err = PTR_ERR(trans); 4434 goto out_free; 4435 } 4436 4437 rc->merge_reloc_tree = 1; 4438 4439 while (!list_empty(&reloc_roots)) { 4440 reloc_root = list_entry(reloc_roots.next, 4441 struct btrfs_root, root_list); 4442 list_del(&reloc_root->root_list); 4443 4444 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 4445 list_add_tail(&reloc_root->root_list, 4446 &rc->reloc_roots); 4447 continue; 4448 } 4449 4450 fs_root = read_fs_root(root->fs_info, 4451 reloc_root->root_key.offset); 4452 if (IS_ERR(fs_root)) { 4453 err = PTR_ERR(fs_root); 4454 goto out_free; 4455 } 4456 4457 err = __add_reloc_root(reloc_root); 4458 BUG_ON(err < 0); /* -ENOMEM or logic error */ 4459 fs_root->reloc_root = reloc_root; 4460 } 4461 4462 err = btrfs_commit_transaction(trans, rc->extent_root); 4463 if (err) 4464 goto out_free; 4465 4466 merge_reloc_roots(rc); 4467 4468 unset_reloc_control(rc); 4469 4470 trans = btrfs_join_transaction(rc->extent_root); 4471 if (IS_ERR(trans)) 4472 err = PTR_ERR(trans); 4473 else 4474 err = btrfs_commit_transaction(trans, rc->extent_root); 4475 out_free: 4476 kfree(rc); 4477 out: 4478 if (!list_empty(&reloc_roots)) 4479 free_reloc_roots(&reloc_roots); 4480 4481 btrfs_free_path(path); 4482 4483 if (err == 0) { 4484 /* cleanup orphan inode in data relocation tree */ 4485 fs_root = read_fs_root(root->fs_info, 4486 BTRFS_DATA_RELOC_TREE_OBJECTID); 4487 if (IS_ERR(fs_root)) 4488 err = PTR_ERR(fs_root); 4489 else 4490 err = btrfs_orphan_cleanup(fs_root); 4491 } 4492 return err; 4493 } 4494 4495 /* 4496 * helper to add ordered checksum for data relocation. 4497 * 4498 * cloning checksum properly handles the nodatasum extents. 4499 * it also saves CPU time to re-calculate the checksum. 4500 */ 4501 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 4502 { 4503 struct btrfs_ordered_sum *sums; 4504 struct btrfs_ordered_extent *ordered; 4505 struct btrfs_root *root = BTRFS_I(inode)->root; 4506 int ret; 4507 u64 disk_bytenr; 4508 u64 new_bytenr; 4509 LIST_HEAD(list); 4510 4511 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 4512 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 4513 4514 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4515 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4516 disk_bytenr + len - 1, &list, 0); 4517 if (ret) 4518 goto out; 4519 4520 while (!list_empty(&list)) { 4521 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4522 list_del_init(&sums->list); 4523 4524 /* 4525 * We need to offset the new_bytenr based on where the csum is. 4526 * We need to do this because we will read in entire prealloc 4527 * extents but we may have written to say the middle of the 4528 * prealloc extent, so we need to make sure the csum goes with 4529 * the right disk offset. 4530 * 4531 * We can do this because the data reloc inode refers strictly 4532 * to the on disk bytes, so we don't have to worry about 4533 * disk_len vs real len like with real inodes since it's all 4534 * disk length. 4535 */ 4536 new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); 4537 sums->bytenr = new_bytenr; 4538 4539 btrfs_add_ordered_sum(inode, ordered, sums); 4540 } 4541 out: 4542 btrfs_put_ordered_extent(ordered); 4543 return ret; 4544 } 4545 4546 int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4547 struct btrfs_root *root, struct extent_buffer *buf, 4548 struct extent_buffer *cow) 4549 { 4550 struct reloc_control *rc; 4551 struct backref_node *node; 4552 int first_cow = 0; 4553 int level; 4554 int ret = 0; 4555 4556 rc = root->fs_info->reloc_ctl; 4557 if (!rc) 4558 return 0; 4559 4560 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4561 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4562 4563 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 4564 if (buf == root->node) 4565 __update_reloc_root(root, cow->start); 4566 } 4567 4568 level = btrfs_header_level(buf); 4569 if (btrfs_header_generation(buf) <= 4570 btrfs_root_last_snapshot(&root->root_item)) 4571 first_cow = 1; 4572 4573 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && 4574 rc->create_reloc_tree) { 4575 WARN_ON(!first_cow && level == 0); 4576 4577 node = rc->backref_cache.path[level]; 4578 BUG_ON(node->bytenr != buf->start && 4579 node->new_bytenr != buf->start); 4580 4581 drop_node_buffer(node); 4582 extent_buffer_get(cow); 4583 node->eb = cow; 4584 node->new_bytenr = cow->start; 4585 4586 if (!node->pending) { 4587 list_move_tail(&node->list, 4588 &rc->backref_cache.pending[level]); 4589 node->pending = 1; 4590 } 4591 4592 if (first_cow) 4593 __mark_block_processed(rc, node); 4594 4595 if (first_cow && level > 0) 4596 rc->nodes_relocated += buf->len; 4597 } 4598 4599 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) 4600 ret = replace_file_extents(trans, rc, root, cow); 4601 return ret; 4602 } 4603 4604 /* 4605 * called before creating snapshot. it calculates metadata reservation 4606 * required for relocating tree blocks in the snapshot 4607 */ 4608 void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, 4609 u64 *bytes_to_reserve) 4610 { 4611 struct btrfs_root *root; 4612 struct reloc_control *rc; 4613 4614 root = pending->root; 4615 if (!root->reloc_root) 4616 return; 4617 4618 rc = root->fs_info->reloc_ctl; 4619 if (!rc->merge_reloc_tree) 4620 return; 4621 4622 root = root->reloc_root; 4623 BUG_ON(btrfs_root_refs(&root->root_item) == 0); 4624 /* 4625 * relocation is in the stage of merging trees. the space 4626 * used by merging a reloc tree is twice the size of 4627 * relocated tree nodes in the worst case. half for cowing 4628 * the reloc tree, half for cowing the fs tree. the space 4629 * used by cowing the reloc tree will be freed after the 4630 * tree is dropped. if we create snapshot, cowing the fs 4631 * tree may use more space than it frees. so we need 4632 * reserve extra space. 4633 */ 4634 *bytes_to_reserve += rc->nodes_relocated; 4635 } 4636 4637 /* 4638 * called after snapshot is created. migrate block reservation 4639 * and create reloc root for the newly created snapshot 4640 */ 4641 int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 4642 struct btrfs_pending_snapshot *pending) 4643 { 4644 struct btrfs_root *root = pending->root; 4645 struct btrfs_root *reloc_root; 4646 struct btrfs_root *new_root; 4647 struct reloc_control *rc; 4648 int ret; 4649 4650 if (!root->reloc_root) 4651 return 0; 4652 4653 rc = root->fs_info->reloc_ctl; 4654 rc->merging_rsv_size += rc->nodes_relocated; 4655 4656 if (rc->merge_reloc_tree) { 4657 ret = btrfs_block_rsv_migrate(&pending->block_rsv, 4658 rc->block_rsv, 4659 rc->nodes_relocated, 1); 4660 if (ret) 4661 return ret; 4662 } 4663 4664 new_root = pending->snap; 4665 reloc_root = create_reloc_root(trans, root->reloc_root, 4666 new_root->root_key.objectid); 4667 if (IS_ERR(reloc_root)) 4668 return PTR_ERR(reloc_root); 4669 4670 ret = __add_reloc_root(reloc_root); 4671 BUG_ON(ret < 0); 4672 new_root->reloc_root = reloc_root; 4673 4674 if (rc->create_reloc_tree) 4675 ret = clone_backref_node(trans, rc, root, reloc_root); 4676 return ret; 4677 } 4678