1 /* 2 * Copyright (C) 2009 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include "ctree.h" 26 #include "disk-io.h" 27 #include "transaction.h" 28 #include "volumes.h" 29 #include "locking.h" 30 #include "btrfs_inode.h" 31 #include "async-thread.h" 32 #include "free-space-cache.h" 33 #include "inode-map.h" 34 #include "qgroup.h" 35 36 /* 37 * backref_node, mapping_node and tree_block start with this 38 */ 39 struct tree_entry { 40 struct rb_node rb_node; 41 u64 bytenr; 42 }; 43 44 /* 45 * present a tree block in the backref cache 46 */ 47 struct backref_node { 48 struct rb_node rb_node; 49 u64 bytenr; 50 51 u64 new_bytenr; 52 /* objectid of tree block owner, can be not uptodate */ 53 u64 owner; 54 /* link to pending, changed or detached list */ 55 struct list_head list; 56 /* list of upper level blocks reference this block */ 57 struct list_head upper; 58 /* list of child blocks in the cache */ 59 struct list_head lower; 60 /* NULL if this node is not tree root */ 61 struct btrfs_root *root; 62 /* extent buffer got by COW the block */ 63 struct extent_buffer *eb; 64 /* level of tree block */ 65 unsigned int level:8; 66 /* is the block in non-reference counted tree */ 67 unsigned int cowonly:1; 68 /* 1 if no child node in the cache */ 69 unsigned int lowest:1; 70 /* is the extent buffer locked */ 71 unsigned int locked:1; 72 /* has the block been processed */ 73 unsigned int processed:1; 74 /* have backrefs of this block been checked */ 75 unsigned int checked:1; 76 /* 77 * 1 if corresponding block has been cowed but some upper 78 * level block pointers may not point to the new location 79 */ 80 unsigned int pending:1; 81 /* 82 * 1 if the backref node isn't connected to any other 83 * backref node. 84 */ 85 unsigned int detached:1; 86 }; 87 88 /* 89 * present a block pointer in the backref cache 90 */ 91 struct backref_edge { 92 struct list_head list[2]; 93 struct backref_node *node[2]; 94 }; 95 96 #define LOWER 0 97 #define UPPER 1 98 #define RELOCATION_RESERVED_NODES 256 99 100 struct backref_cache { 101 /* red black tree of all backref nodes in the cache */ 102 struct rb_root rb_root; 103 /* for passing backref nodes to btrfs_reloc_cow_block */ 104 struct backref_node *path[BTRFS_MAX_LEVEL]; 105 /* 106 * list of blocks that have been cowed but some block 107 * pointers in upper level blocks may not reflect the 108 * new location 109 */ 110 struct list_head pending[BTRFS_MAX_LEVEL]; 111 /* list of backref nodes with no child node */ 112 struct list_head leaves; 113 /* list of blocks that have been cowed in current transaction */ 114 struct list_head changed; 115 /* list of detached backref node. */ 116 struct list_head detached; 117 118 u64 last_trans; 119 120 int nr_nodes; 121 int nr_edges; 122 }; 123 124 /* 125 * map address of tree root to tree 126 */ 127 struct mapping_node { 128 struct rb_node rb_node; 129 u64 bytenr; 130 void *data; 131 }; 132 133 struct mapping_tree { 134 struct rb_root rb_root; 135 spinlock_t lock; 136 }; 137 138 /* 139 * present a tree block to process 140 */ 141 struct tree_block { 142 struct rb_node rb_node; 143 u64 bytenr; 144 struct btrfs_key key; 145 unsigned int level:8; 146 unsigned int key_ready:1; 147 }; 148 149 #define MAX_EXTENTS 128 150 151 struct file_extent_cluster { 152 u64 start; 153 u64 end; 154 u64 boundary[MAX_EXTENTS]; 155 unsigned int nr; 156 }; 157 158 struct reloc_control { 159 /* block group to relocate */ 160 struct btrfs_block_group_cache *block_group; 161 /* extent tree */ 162 struct btrfs_root *extent_root; 163 /* inode for moving data */ 164 struct inode *data_inode; 165 166 struct btrfs_block_rsv *block_rsv; 167 168 struct backref_cache backref_cache; 169 170 struct file_extent_cluster cluster; 171 /* tree blocks have been processed */ 172 struct extent_io_tree processed_blocks; 173 /* map start of tree root to corresponding reloc tree */ 174 struct mapping_tree reloc_root_tree; 175 /* list of reloc trees */ 176 struct list_head reloc_roots; 177 /* size of metadata reservation for merging reloc trees */ 178 u64 merging_rsv_size; 179 /* size of relocated tree nodes */ 180 u64 nodes_relocated; 181 /* reserved size for block group relocation*/ 182 u64 reserved_bytes; 183 184 u64 search_start; 185 u64 extents_found; 186 187 unsigned int stage:8; 188 unsigned int create_reloc_tree:1; 189 unsigned int merge_reloc_tree:1; 190 unsigned int found_file_extent:1; 191 }; 192 193 /* stages of data relocation */ 194 #define MOVE_DATA_EXTENTS 0 195 #define UPDATE_DATA_PTRS 1 196 197 static void remove_backref_node(struct backref_cache *cache, 198 struct backref_node *node); 199 static void __mark_block_processed(struct reloc_control *rc, 200 struct backref_node *node); 201 202 static void mapping_tree_init(struct mapping_tree *tree) 203 { 204 tree->rb_root = RB_ROOT; 205 spin_lock_init(&tree->lock); 206 } 207 208 static void backref_cache_init(struct backref_cache *cache) 209 { 210 int i; 211 cache->rb_root = RB_ROOT; 212 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 213 INIT_LIST_HEAD(&cache->pending[i]); 214 INIT_LIST_HEAD(&cache->changed); 215 INIT_LIST_HEAD(&cache->detached); 216 INIT_LIST_HEAD(&cache->leaves); 217 } 218 219 static void backref_cache_cleanup(struct backref_cache *cache) 220 { 221 struct backref_node *node; 222 int i; 223 224 while (!list_empty(&cache->detached)) { 225 node = list_entry(cache->detached.next, 226 struct backref_node, list); 227 remove_backref_node(cache, node); 228 } 229 230 while (!list_empty(&cache->leaves)) { 231 node = list_entry(cache->leaves.next, 232 struct backref_node, lower); 233 remove_backref_node(cache, node); 234 } 235 236 cache->last_trans = 0; 237 238 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 239 ASSERT(list_empty(&cache->pending[i])); 240 ASSERT(list_empty(&cache->changed)); 241 ASSERT(list_empty(&cache->detached)); 242 ASSERT(RB_EMPTY_ROOT(&cache->rb_root)); 243 ASSERT(!cache->nr_nodes); 244 ASSERT(!cache->nr_edges); 245 } 246 247 static struct backref_node *alloc_backref_node(struct backref_cache *cache) 248 { 249 struct backref_node *node; 250 251 node = kzalloc(sizeof(*node), GFP_NOFS); 252 if (node) { 253 INIT_LIST_HEAD(&node->list); 254 INIT_LIST_HEAD(&node->upper); 255 INIT_LIST_HEAD(&node->lower); 256 RB_CLEAR_NODE(&node->rb_node); 257 cache->nr_nodes++; 258 } 259 return node; 260 } 261 262 static void free_backref_node(struct backref_cache *cache, 263 struct backref_node *node) 264 { 265 if (node) { 266 cache->nr_nodes--; 267 kfree(node); 268 } 269 } 270 271 static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) 272 { 273 struct backref_edge *edge; 274 275 edge = kzalloc(sizeof(*edge), GFP_NOFS); 276 if (edge) 277 cache->nr_edges++; 278 return edge; 279 } 280 281 static void free_backref_edge(struct backref_cache *cache, 282 struct backref_edge *edge) 283 { 284 if (edge) { 285 cache->nr_edges--; 286 kfree(edge); 287 } 288 } 289 290 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 291 struct rb_node *node) 292 { 293 struct rb_node **p = &root->rb_node; 294 struct rb_node *parent = NULL; 295 struct tree_entry *entry; 296 297 while (*p) { 298 parent = *p; 299 entry = rb_entry(parent, struct tree_entry, rb_node); 300 301 if (bytenr < entry->bytenr) 302 p = &(*p)->rb_left; 303 else if (bytenr > entry->bytenr) 304 p = &(*p)->rb_right; 305 else 306 return parent; 307 } 308 309 rb_link_node(node, parent, p); 310 rb_insert_color(node, root); 311 return NULL; 312 } 313 314 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) 315 { 316 struct rb_node *n = root->rb_node; 317 struct tree_entry *entry; 318 319 while (n) { 320 entry = rb_entry(n, struct tree_entry, rb_node); 321 322 if (bytenr < entry->bytenr) 323 n = n->rb_left; 324 else if (bytenr > entry->bytenr) 325 n = n->rb_right; 326 else 327 return n; 328 } 329 return NULL; 330 } 331 332 static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) 333 { 334 335 struct btrfs_fs_info *fs_info = NULL; 336 struct backref_node *bnode = rb_entry(rb_node, struct backref_node, 337 rb_node); 338 if (bnode->root) 339 fs_info = bnode->root->fs_info; 340 btrfs_panic(fs_info, errno, "Inconsistency in backref cache " 341 "found at offset %llu", bytenr); 342 } 343 344 /* 345 * walk up backref nodes until reach node presents tree root 346 */ 347 static struct backref_node *walk_up_backref(struct backref_node *node, 348 struct backref_edge *edges[], 349 int *index) 350 { 351 struct backref_edge *edge; 352 int idx = *index; 353 354 while (!list_empty(&node->upper)) { 355 edge = list_entry(node->upper.next, 356 struct backref_edge, list[LOWER]); 357 edges[idx++] = edge; 358 node = edge->node[UPPER]; 359 } 360 BUG_ON(node->detached); 361 *index = idx; 362 return node; 363 } 364 365 /* 366 * walk down backref nodes to find start of next reference path 367 */ 368 static struct backref_node *walk_down_backref(struct backref_edge *edges[], 369 int *index) 370 { 371 struct backref_edge *edge; 372 struct backref_node *lower; 373 int idx = *index; 374 375 while (idx > 0) { 376 edge = edges[idx - 1]; 377 lower = edge->node[LOWER]; 378 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 379 idx--; 380 continue; 381 } 382 edge = list_entry(edge->list[LOWER].next, 383 struct backref_edge, list[LOWER]); 384 edges[idx - 1] = edge; 385 *index = idx; 386 return edge->node[UPPER]; 387 } 388 *index = 0; 389 return NULL; 390 } 391 392 static void unlock_node_buffer(struct backref_node *node) 393 { 394 if (node->locked) { 395 btrfs_tree_unlock(node->eb); 396 node->locked = 0; 397 } 398 } 399 400 static void drop_node_buffer(struct backref_node *node) 401 { 402 if (node->eb) { 403 unlock_node_buffer(node); 404 free_extent_buffer(node->eb); 405 node->eb = NULL; 406 } 407 } 408 409 static void drop_backref_node(struct backref_cache *tree, 410 struct backref_node *node) 411 { 412 BUG_ON(!list_empty(&node->upper)); 413 414 drop_node_buffer(node); 415 list_del(&node->list); 416 list_del(&node->lower); 417 if (!RB_EMPTY_NODE(&node->rb_node)) 418 rb_erase(&node->rb_node, &tree->rb_root); 419 free_backref_node(tree, node); 420 } 421 422 /* 423 * remove a backref node from the backref cache 424 */ 425 static void remove_backref_node(struct backref_cache *cache, 426 struct backref_node *node) 427 { 428 struct backref_node *upper; 429 struct backref_edge *edge; 430 431 if (!node) 432 return; 433 434 BUG_ON(!node->lowest && !node->detached); 435 while (!list_empty(&node->upper)) { 436 edge = list_entry(node->upper.next, struct backref_edge, 437 list[LOWER]); 438 upper = edge->node[UPPER]; 439 list_del(&edge->list[LOWER]); 440 list_del(&edge->list[UPPER]); 441 free_backref_edge(cache, edge); 442 443 if (RB_EMPTY_NODE(&upper->rb_node)) { 444 BUG_ON(!list_empty(&node->upper)); 445 drop_backref_node(cache, node); 446 node = upper; 447 node->lowest = 1; 448 continue; 449 } 450 /* 451 * add the node to leaf node list if no other 452 * child block cached. 453 */ 454 if (list_empty(&upper->lower)) { 455 list_add_tail(&upper->lower, &cache->leaves); 456 upper->lowest = 1; 457 } 458 } 459 460 drop_backref_node(cache, node); 461 } 462 463 static void update_backref_node(struct backref_cache *cache, 464 struct backref_node *node, u64 bytenr) 465 { 466 struct rb_node *rb_node; 467 rb_erase(&node->rb_node, &cache->rb_root); 468 node->bytenr = bytenr; 469 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 470 if (rb_node) 471 backref_tree_panic(rb_node, -EEXIST, bytenr); 472 } 473 474 /* 475 * update backref cache after a transaction commit 476 */ 477 static int update_backref_cache(struct btrfs_trans_handle *trans, 478 struct backref_cache *cache) 479 { 480 struct backref_node *node; 481 int level = 0; 482 483 if (cache->last_trans == 0) { 484 cache->last_trans = trans->transid; 485 return 0; 486 } 487 488 if (cache->last_trans == trans->transid) 489 return 0; 490 491 /* 492 * detached nodes are used to avoid unnecessary backref 493 * lookup. transaction commit changes the extent tree. 494 * so the detached nodes are no longer useful. 495 */ 496 while (!list_empty(&cache->detached)) { 497 node = list_entry(cache->detached.next, 498 struct backref_node, list); 499 remove_backref_node(cache, node); 500 } 501 502 while (!list_empty(&cache->changed)) { 503 node = list_entry(cache->changed.next, 504 struct backref_node, list); 505 list_del_init(&node->list); 506 BUG_ON(node->pending); 507 update_backref_node(cache, node, node->new_bytenr); 508 } 509 510 /* 511 * some nodes can be left in the pending list if there were 512 * errors during processing the pending nodes. 513 */ 514 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 515 list_for_each_entry(node, &cache->pending[level], list) { 516 BUG_ON(!node->pending); 517 if (node->bytenr == node->new_bytenr) 518 continue; 519 update_backref_node(cache, node, node->new_bytenr); 520 } 521 } 522 523 cache->last_trans = 0; 524 return 1; 525 } 526 527 528 static int should_ignore_root(struct btrfs_root *root) 529 { 530 struct btrfs_root *reloc_root; 531 532 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 533 return 0; 534 535 reloc_root = root->reloc_root; 536 if (!reloc_root) 537 return 0; 538 539 if (btrfs_root_last_snapshot(&reloc_root->root_item) == 540 root->fs_info->running_transaction->transid - 1) 541 return 0; 542 /* 543 * if there is reloc tree and it was created in previous 544 * transaction backref lookup can find the reloc tree, 545 * so backref node for the fs tree root is useless for 546 * relocation. 547 */ 548 return 1; 549 } 550 /* 551 * find reloc tree by address of tree root 552 */ 553 static struct btrfs_root *find_reloc_root(struct reloc_control *rc, 554 u64 bytenr) 555 { 556 struct rb_node *rb_node; 557 struct mapping_node *node; 558 struct btrfs_root *root = NULL; 559 560 spin_lock(&rc->reloc_root_tree.lock); 561 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); 562 if (rb_node) { 563 node = rb_entry(rb_node, struct mapping_node, rb_node); 564 root = (struct btrfs_root *)node->data; 565 } 566 spin_unlock(&rc->reloc_root_tree.lock); 567 return root; 568 } 569 570 static int is_cowonly_root(u64 root_objectid) 571 { 572 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 573 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 574 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 575 root_objectid == BTRFS_DEV_TREE_OBJECTID || 576 root_objectid == BTRFS_TREE_LOG_OBJECTID || 577 root_objectid == BTRFS_CSUM_TREE_OBJECTID || 578 root_objectid == BTRFS_UUID_TREE_OBJECTID || 579 root_objectid == BTRFS_QUOTA_TREE_OBJECTID || 580 root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) 581 return 1; 582 return 0; 583 } 584 585 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 586 u64 root_objectid) 587 { 588 struct btrfs_key key; 589 590 key.objectid = root_objectid; 591 key.type = BTRFS_ROOT_ITEM_KEY; 592 if (is_cowonly_root(root_objectid)) 593 key.offset = 0; 594 else 595 key.offset = (u64)-1; 596 597 return btrfs_get_fs_root(fs_info, &key, false); 598 } 599 600 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 601 static noinline_for_stack 602 struct btrfs_root *find_tree_root(struct reloc_control *rc, 603 struct extent_buffer *leaf, 604 struct btrfs_extent_ref_v0 *ref0) 605 { 606 struct btrfs_root *root; 607 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 608 u64 generation = btrfs_ref_generation_v0(leaf, ref0); 609 610 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 611 612 root = read_fs_root(rc->extent_root->fs_info, root_objectid); 613 BUG_ON(IS_ERR(root)); 614 615 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && 616 generation != btrfs_root_generation(&root->root_item)) 617 return NULL; 618 619 return root; 620 } 621 #endif 622 623 static noinline_for_stack 624 int find_inline_backref(struct extent_buffer *leaf, int slot, 625 unsigned long *ptr, unsigned long *end) 626 { 627 struct btrfs_key key; 628 struct btrfs_extent_item *ei; 629 struct btrfs_tree_block_info *bi; 630 u32 item_size; 631 632 btrfs_item_key_to_cpu(leaf, &key, slot); 633 634 item_size = btrfs_item_size_nr(leaf, slot); 635 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 636 if (item_size < sizeof(*ei)) { 637 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 638 return 1; 639 } 640 #endif 641 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 642 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 643 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 644 645 if (key.type == BTRFS_EXTENT_ITEM_KEY && 646 item_size <= sizeof(*ei) + sizeof(*bi)) { 647 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 648 return 1; 649 } 650 if (key.type == BTRFS_METADATA_ITEM_KEY && 651 item_size <= sizeof(*ei)) { 652 WARN_ON(item_size < sizeof(*ei)); 653 return 1; 654 } 655 656 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 657 bi = (struct btrfs_tree_block_info *)(ei + 1); 658 *ptr = (unsigned long)(bi + 1); 659 } else { 660 *ptr = (unsigned long)(ei + 1); 661 } 662 *end = (unsigned long)ei + item_size; 663 return 0; 664 } 665 666 /* 667 * build backref tree for a given tree block. root of the backref tree 668 * corresponds the tree block, leaves of the backref tree correspond 669 * roots of b-trees that reference the tree block. 670 * 671 * the basic idea of this function is check backrefs of a given block 672 * to find upper level blocks that reference the block, and then check 673 * backrefs of these upper level blocks recursively. the recursion stop 674 * when tree root is reached or backrefs for the block is cached. 675 * 676 * NOTE: if we find backrefs for a block are cached, we know backrefs 677 * for all upper level blocks that directly/indirectly reference the 678 * block are also cached. 679 */ 680 static noinline_for_stack 681 struct backref_node *build_backref_tree(struct reloc_control *rc, 682 struct btrfs_key *node_key, 683 int level, u64 bytenr) 684 { 685 struct backref_cache *cache = &rc->backref_cache; 686 struct btrfs_path *path1; 687 struct btrfs_path *path2; 688 struct extent_buffer *eb; 689 struct btrfs_root *root; 690 struct backref_node *cur; 691 struct backref_node *upper; 692 struct backref_node *lower; 693 struct backref_node *node = NULL; 694 struct backref_node *exist = NULL; 695 struct backref_edge *edge; 696 struct rb_node *rb_node; 697 struct btrfs_key key; 698 unsigned long end; 699 unsigned long ptr; 700 LIST_HEAD(list); 701 LIST_HEAD(useless); 702 int cowonly; 703 int ret; 704 int err = 0; 705 bool need_check = true; 706 707 path1 = btrfs_alloc_path(); 708 path2 = btrfs_alloc_path(); 709 if (!path1 || !path2) { 710 err = -ENOMEM; 711 goto out; 712 } 713 path1->reada = READA_FORWARD; 714 path2->reada = READA_FORWARD; 715 716 node = alloc_backref_node(cache); 717 if (!node) { 718 err = -ENOMEM; 719 goto out; 720 } 721 722 node->bytenr = bytenr; 723 node->level = level; 724 node->lowest = 1; 725 cur = node; 726 again: 727 end = 0; 728 ptr = 0; 729 key.objectid = cur->bytenr; 730 key.type = BTRFS_METADATA_ITEM_KEY; 731 key.offset = (u64)-1; 732 733 path1->search_commit_root = 1; 734 path1->skip_locking = 1; 735 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, 736 0, 0); 737 if (ret < 0) { 738 err = ret; 739 goto out; 740 } 741 ASSERT(ret); 742 ASSERT(path1->slots[0]); 743 744 path1->slots[0]--; 745 746 WARN_ON(cur->checked); 747 if (!list_empty(&cur->upper)) { 748 /* 749 * the backref was added previously when processing 750 * backref of type BTRFS_TREE_BLOCK_REF_KEY 751 */ 752 ASSERT(list_is_singular(&cur->upper)); 753 edge = list_entry(cur->upper.next, struct backref_edge, 754 list[LOWER]); 755 ASSERT(list_empty(&edge->list[UPPER])); 756 exist = edge->node[UPPER]; 757 /* 758 * add the upper level block to pending list if we need 759 * check its backrefs 760 */ 761 if (!exist->checked) 762 list_add_tail(&edge->list[UPPER], &list); 763 } else { 764 exist = NULL; 765 } 766 767 while (1) { 768 cond_resched(); 769 eb = path1->nodes[0]; 770 771 if (ptr >= end) { 772 if (path1->slots[0] >= btrfs_header_nritems(eb)) { 773 ret = btrfs_next_leaf(rc->extent_root, path1); 774 if (ret < 0) { 775 err = ret; 776 goto out; 777 } 778 if (ret > 0) 779 break; 780 eb = path1->nodes[0]; 781 } 782 783 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); 784 if (key.objectid != cur->bytenr) { 785 WARN_ON(exist); 786 break; 787 } 788 789 if (key.type == BTRFS_EXTENT_ITEM_KEY || 790 key.type == BTRFS_METADATA_ITEM_KEY) { 791 ret = find_inline_backref(eb, path1->slots[0], 792 &ptr, &end); 793 if (ret) 794 goto next; 795 } 796 } 797 798 if (ptr < end) { 799 /* update key for inline back ref */ 800 struct btrfs_extent_inline_ref *iref; 801 iref = (struct btrfs_extent_inline_ref *)ptr; 802 key.type = btrfs_extent_inline_ref_type(eb, iref); 803 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 804 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 805 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 806 } 807 808 if (exist && 809 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 810 exist->owner == key.offset) || 811 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 812 exist->bytenr == key.offset))) { 813 exist = NULL; 814 goto next; 815 } 816 817 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 818 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 819 key.type == BTRFS_EXTENT_REF_V0_KEY) { 820 if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 821 struct btrfs_extent_ref_v0 *ref0; 822 ref0 = btrfs_item_ptr(eb, path1->slots[0], 823 struct btrfs_extent_ref_v0); 824 if (key.objectid == key.offset) { 825 root = find_tree_root(rc, eb, ref0); 826 if (root && !should_ignore_root(root)) 827 cur->root = root; 828 else 829 list_add(&cur->list, &useless); 830 break; 831 } 832 if (is_cowonly_root(btrfs_ref_root_v0(eb, 833 ref0))) 834 cur->cowonly = 1; 835 } 836 #else 837 ASSERT(key.type != BTRFS_EXTENT_REF_V0_KEY); 838 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 839 #endif 840 if (key.objectid == key.offset) { 841 /* 842 * only root blocks of reloc trees use 843 * backref of this type. 844 */ 845 root = find_reloc_root(rc, cur->bytenr); 846 ASSERT(root); 847 cur->root = root; 848 break; 849 } 850 851 edge = alloc_backref_edge(cache); 852 if (!edge) { 853 err = -ENOMEM; 854 goto out; 855 } 856 rb_node = tree_search(&cache->rb_root, key.offset); 857 if (!rb_node) { 858 upper = alloc_backref_node(cache); 859 if (!upper) { 860 free_backref_edge(cache, edge); 861 err = -ENOMEM; 862 goto out; 863 } 864 upper->bytenr = key.offset; 865 upper->level = cur->level + 1; 866 /* 867 * backrefs for the upper level block isn't 868 * cached, add the block to pending list 869 */ 870 list_add_tail(&edge->list[UPPER], &list); 871 } else { 872 upper = rb_entry(rb_node, struct backref_node, 873 rb_node); 874 ASSERT(upper->checked); 875 INIT_LIST_HEAD(&edge->list[UPPER]); 876 } 877 list_add_tail(&edge->list[LOWER], &cur->upper); 878 edge->node[LOWER] = cur; 879 edge->node[UPPER] = upper; 880 881 goto next; 882 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 883 goto next; 884 } 885 886 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ 887 root = read_fs_root(rc->extent_root->fs_info, key.offset); 888 if (IS_ERR(root)) { 889 err = PTR_ERR(root); 890 goto out; 891 } 892 893 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 894 cur->cowonly = 1; 895 896 if (btrfs_root_level(&root->root_item) == cur->level) { 897 /* tree root */ 898 ASSERT(btrfs_root_bytenr(&root->root_item) == 899 cur->bytenr); 900 if (should_ignore_root(root)) 901 list_add(&cur->list, &useless); 902 else 903 cur->root = root; 904 break; 905 } 906 907 level = cur->level + 1; 908 909 /* 910 * searching the tree to find upper level blocks 911 * reference the block. 912 */ 913 path2->search_commit_root = 1; 914 path2->skip_locking = 1; 915 path2->lowest_level = level; 916 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); 917 path2->lowest_level = 0; 918 if (ret < 0) { 919 err = ret; 920 goto out; 921 } 922 if (ret > 0 && path2->slots[level] > 0) 923 path2->slots[level]--; 924 925 eb = path2->nodes[level]; 926 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 927 cur->bytenr); 928 929 lower = cur; 930 need_check = true; 931 for (; level < BTRFS_MAX_LEVEL; level++) { 932 if (!path2->nodes[level]) { 933 ASSERT(btrfs_root_bytenr(&root->root_item) == 934 lower->bytenr); 935 if (should_ignore_root(root)) 936 list_add(&lower->list, &useless); 937 else 938 lower->root = root; 939 break; 940 } 941 942 edge = alloc_backref_edge(cache); 943 if (!edge) { 944 err = -ENOMEM; 945 goto out; 946 } 947 948 eb = path2->nodes[level]; 949 rb_node = tree_search(&cache->rb_root, eb->start); 950 if (!rb_node) { 951 upper = alloc_backref_node(cache); 952 if (!upper) { 953 free_backref_edge(cache, edge); 954 err = -ENOMEM; 955 goto out; 956 } 957 upper->bytenr = eb->start; 958 upper->owner = btrfs_header_owner(eb); 959 upper->level = lower->level + 1; 960 if (!test_bit(BTRFS_ROOT_REF_COWS, 961 &root->state)) 962 upper->cowonly = 1; 963 964 /* 965 * if we know the block isn't shared 966 * we can void checking its backrefs. 967 */ 968 if (btrfs_block_can_be_shared(root, eb)) 969 upper->checked = 0; 970 else 971 upper->checked = 1; 972 973 /* 974 * add the block to pending list if we 975 * need check its backrefs, we only do this once 976 * while walking up a tree as we will catch 977 * anything else later on. 978 */ 979 if (!upper->checked && need_check) { 980 need_check = false; 981 list_add_tail(&edge->list[UPPER], 982 &list); 983 } else { 984 if (upper->checked) 985 need_check = true; 986 INIT_LIST_HEAD(&edge->list[UPPER]); 987 } 988 } else { 989 upper = rb_entry(rb_node, struct backref_node, 990 rb_node); 991 ASSERT(upper->checked); 992 INIT_LIST_HEAD(&edge->list[UPPER]); 993 if (!upper->owner) 994 upper->owner = btrfs_header_owner(eb); 995 } 996 list_add_tail(&edge->list[LOWER], &lower->upper); 997 edge->node[LOWER] = lower; 998 edge->node[UPPER] = upper; 999 1000 if (rb_node) 1001 break; 1002 lower = upper; 1003 upper = NULL; 1004 } 1005 btrfs_release_path(path2); 1006 next: 1007 if (ptr < end) { 1008 ptr += btrfs_extent_inline_ref_size(key.type); 1009 if (ptr >= end) { 1010 WARN_ON(ptr > end); 1011 ptr = 0; 1012 end = 0; 1013 } 1014 } 1015 if (ptr >= end) 1016 path1->slots[0]++; 1017 } 1018 btrfs_release_path(path1); 1019 1020 cur->checked = 1; 1021 WARN_ON(exist); 1022 1023 /* the pending list isn't empty, take the first block to process */ 1024 if (!list_empty(&list)) { 1025 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 1026 list_del_init(&edge->list[UPPER]); 1027 cur = edge->node[UPPER]; 1028 goto again; 1029 } 1030 1031 /* 1032 * everything goes well, connect backref nodes and insert backref nodes 1033 * into the cache. 1034 */ 1035 ASSERT(node->checked); 1036 cowonly = node->cowonly; 1037 if (!cowonly) { 1038 rb_node = tree_insert(&cache->rb_root, node->bytenr, 1039 &node->rb_node); 1040 if (rb_node) 1041 backref_tree_panic(rb_node, -EEXIST, node->bytenr); 1042 list_add_tail(&node->lower, &cache->leaves); 1043 } 1044 1045 list_for_each_entry(edge, &node->upper, list[LOWER]) 1046 list_add_tail(&edge->list[UPPER], &list); 1047 1048 while (!list_empty(&list)) { 1049 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 1050 list_del_init(&edge->list[UPPER]); 1051 upper = edge->node[UPPER]; 1052 if (upper->detached) { 1053 list_del(&edge->list[LOWER]); 1054 lower = edge->node[LOWER]; 1055 free_backref_edge(cache, edge); 1056 if (list_empty(&lower->upper)) 1057 list_add(&lower->list, &useless); 1058 continue; 1059 } 1060 1061 if (!RB_EMPTY_NODE(&upper->rb_node)) { 1062 if (upper->lowest) { 1063 list_del_init(&upper->lower); 1064 upper->lowest = 0; 1065 } 1066 1067 list_add_tail(&edge->list[UPPER], &upper->lower); 1068 continue; 1069 } 1070 1071 if (!upper->checked) { 1072 /* 1073 * Still want to blow up for developers since this is a 1074 * logic bug. 1075 */ 1076 ASSERT(0); 1077 err = -EINVAL; 1078 goto out; 1079 } 1080 if (cowonly != upper->cowonly) { 1081 ASSERT(0); 1082 err = -EINVAL; 1083 goto out; 1084 } 1085 1086 if (!cowonly) { 1087 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 1088 &upper->rb_node); 1089 if (rb_node) 1090 backref_tree_panic(rb_node, -EEXIST, 1091 upper->bytenr); 1092 } 1093 1094 list_add_tail(&edge->list[UPPER], &upper->lower); 1095 1096 list_for_each_entry(edge, &upper->upper, list[LOWER]) 1097 list_add_tail(&edge->list[UPPER], &list); 1098 } 1099 /* 1100 * process useless backref nodes. backref nodes for tree leaves 1101 * are deleted from the cache. backref nodes for upper level 1102 * tree blocks are left in the cache to avoid unnecessary backref 1103 * lookup. 1104 */ 1105 while (!list_empty(&useless)) { 1106 upper = list_entry(useless.next, struct backref_node, list); 1107 list_del_init(&upper->list); 1108 ASSERT(list_empty(&upper->upper)); 1109 if (upper == node) 1110 node = NULL; 1111 if (upper->lowest) { 1112 list_del_init(&upper->lower); 1113 upper->lowest = 0; 1114 } 1115 while (!list_empty(&upper->lower)) { 1116 edge = list_entry(upper->lower.next, 1117 struct backref_edge, list[UPPER]); 1118 list_del(&edge->list[UPPER]); 1119 list_del(&edge->list[LOWER]); 1120 lower = edge->node[LOWER]; 1121 free_backref_edge(cache, edge); 1122 1123 if (list_empty(&lower->upper)) 1124 list_add(&lower->list, &useless); 1125 } 1126 __mark_block_processed(rc, upper); 1127 if (upper->level > 0) { 1128 list_add(&upper->list, &cache->detached); 1129 upper->detached = 1; 1130 } else { 1131 rb_erase(&upper->rb_node, &cache->rb_root); 1132 free_backref_node(cache, upper); 1133 } 1134 } 1135 out: 1136 btrfs_free_path(path1); 1137 btrfs_free_path(path2); 1138 if (err) { 1139 while (!list_empty(&useless)) { 1140 lower = list_entry(useless.next, 1141 struct backref_node, list); 1142 list_del_init(&lower->list); 1143 } 1144 while (!list_empty(&list)) { 1145 edge = list_first_entry(&list, struct backref_edge, 1146 list[UPPER]); 1147 list_del(&edge->list[UPPER]); 1148 list_del(&edge->list[LOWER]); 1149 lower = edge->node[LOWER]; 1150 upper = edge->node[UPPER]; 1151 free_backref_edge(cache, edge); 1152 1153 /* 1154 * Lower is no longer linked to any upper backref nodes 1155 * and isn't in the cache, we can free it ourselves. 1156 */ 1157 if (list_empty(&lower->upper) && 1158 RB_EMPTY_NODE(&lower->rb_node)) 1159 list_add(&lower->list, &useless); 1160 1161 if (!RB_EMPTY_NODE(&upper->rb_node)) 1162 continue; 1163 1164 /* Add this guy's upper edges to the list to process */ 1165 list_for_each_entry(edge, &upper->upper, list[LOWER]) 1166 list_add_tail(&edge->list[UPPER], &list); 1167 if (list_empty(&upper->upper)) 1168 list_add(&upper->list, &useless); 1169 } 1170 1171 while (!list_empty(&useless)) { 1172 lower = list_entry(useless.next, 1173 struct backref_node, list); 1174 list_del_init(&lower->list); 1175 if (lower == node) 1176 node = NULL; 1177 free_backref_node(cache, lower); 1178 } 1179 1180 free_backref_node(cache, node); 1181 return ERR_PTR(err); 1182 } 1183 ASSERT(!node || !node->detached); 1184 return node; 1185 } 1186 1187 /* 1188 * helper to add backref node for the newly created snapshot. 1189 * the backref node is created by cloning backref node that 1190 * corresponds to root of source tree 1191 */ 1192 static int clone_backref_node(struct btrfs_trans_handle *trans, 1193 struct reloc_control *rc, 1194 struct btrfs_root *src, 1195 struct btrfs_root *dest) 1196 { 1197 struct btrfs_root *reloc_root = src->reloc_root; 1198 struct backref_cache *cache = &rc->backref_cache; 1199 struct backref_node *node = NULL; 1200 struct backref_node *new_node; 1201 struct backref_edge *edge; 1202 struct backref_edge *new_edge; 1203 struct rb_node *rb_node; 1204 1205 if (cache->last_trans > 0) 1206 update_backref_cache(trans, cache); 1207 1208 rb_node = tree_search(&cache->rb_root, src->commit_root->start); 1209 if (rb_node) { 1210 node = rb_entry(rb_node, struct backref_node, rb_node); 1211 if (node->detached) 1212 node = NULL; 1213 else 1214 BUG_ON(node->new_bytenr != reloc_root->node->start); 1215 } 1216 1217 if (!node) { 1218 rb_node = tree_search(&cache->rb_root, 1219 reloc_root->commit_root->start); 1220 if (rb_node) { 1221 node = rb_entry(rb_node, struct backref_node, 1222 rb_node); 1223 BUG_ON(node->detached); 1224 } 1225 } 1226 1227 if (!node) 1228 return 0; 1229 1230 new_node = alloc_backref_node(cache); 1231 if (!new_node) 1232 return -ENOMEM; 1233 1234 new_node->bytenr = dest->node->start; 1235 new_node->level = node->level; 1236 new_node->lowest = node->lowest; 1237 new_node->checked = 1; 1238 new_node->root = dest; 1239 1240 if (!node->lowest) { 1241 list_for_each_entry(edge, &node->lower, list[UPPER]) { 1242 new_edge = alloc_backref_edge(cache); 1243 if (!new_edge) 1244 goto fail; 1245 1246 new_edge->node[UPPER] = new_node; 1247 new_edge->node[LOWER] = edge->node[LOWER]; 1248 list_add_tail(&new_edge->list[UPPER], 1249 &new_node->lower); 1250 } 1251 } else { 1252 list_add_tail(&new_node->lower, &cache->leaves); 1253 } 1254 1255 rb_node = tree_insert(&cache->rb_root, new_node->bytenr, 1256 &new_node->rb_node); 1257 if (rb_node) 1258 backref_tree_panic(rb_node, -EEXIST, new_node->bytenr); 1259 1260 if (!new_node->lowest) { 1261 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { 1262 list_add_tail(&new_edge->list[LOWER], 1263 &new_edge->node[LOWER]->upper); 1264 } 1265 } 1266 return 0; 1267 fail: 1268 while (!list_empty(&new_node->lower)) { 1269 new_edge = list_entry(new_node->lower.next, 1270 struct backref_edge, list[UPPER]); 1271 list_del(&new_edge->list[UPPER]); 1272 free_backref_edge(cache, new_edge); 1273 } 1274 free_backref_node(cache, new_node); 1275 return -ENOMEM; 1276 } 1277 1278 /* 1279 * helper to add 'address of tree root -> reloc tree' mapping 1280 */ 1281 static int __must_check __add_reloc_root(struct btrfs_root *root) 1282 { 1283 struct rb_node *rb_node; 1284 struct mapping_node *node; 1285 struct reloc_control *rc = root->fs_info->reloc_ctl; 1286 1287 node = kmalloc(sizeof(*node), GFP_NOFS); 1288 if (!node) 1289 return -ENOMEM; 1290 1291 node->bytenr = root->node->start; 1292 node->data = root; 1293 1294 spin_lock(&rc->reloc_root_tree.lock); 1295 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1296 node->bytenr, &node->rb_node); 1297 spin_unlock(&rc->reloc_root_tree.lock); 1298 if (rb_node) { 1299 btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " 1300 "for start=%llu while inserting into relocation " 1301 "tree", node->bytenr); 1302 kfree(node); 1303 return -EEXIST; 1304 } 1305 1306 list_add_tail(&root->root_list, &rc->reloc_roots); 1307 return 0; 1308 } 1309 1310 /* 1311 * helper to delete the 'address of tree root -> reloc tree' 1312 * mapping 1313 */ 1314 static void __del_reloc_root(struct btrfs_root *root) 1315 { 1316 struct rb_node *rb_node; 1317 struct mapping_node *node = NULL; 1318 struct reloc_control *rc = root->fs_info->reloc_ctl; 1319 1320 spin_lock(&rc->reloc_root_tree.lock); 1321 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1322 root->node->start); 1323 if (rb_node) { 1324 node = rb_entry(rb_node, struct mapping_node, rb_node); 1325 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1326 } 1327 spin_unlock(&rc->reloc_root_tree.lock); 1328 1329 if (!node) 1330 return; 1331 BUG_ON((struct btrfs_root *)node->data != root); 1332 1333 spin_lock(&root->fs_info->trans_lock); 1334 list_del_init(&root->root_list); 1335 spin_unlock(&root->fs_info->trans_lock); 1336 kfree(node); 1337 } 1338 1339 /* 1340 * helper to update the 'address of tree root -> reloc tree' 1341 * mapping 1342 */ 1343 static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) 1344 { 1345 struct rb_node *rb_node; 1346 struct mapping_node *node = NULL; 1347 struct reloc_control *rc = root->fs_info->reloc_ctl; 1348 1349 spin_lock(&rc->reloc_root_tree.lock); 1350 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1351 root->node->start); 1352 if (rb_node) { 1353 node = rb_entry(rb_node, struct mapping_node, rb_node); 1354 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1355 } 1356 spin_unlock(&rc->reloc_root_tree.lock); 1357 1358 if (!node) 1359 return 0; 1360 BUG_ON((struct btrfs_root *)node->data != root); 1361 1362 spin_lock(&rc->reloc_root_tree.lock); 1363 node->bytenr = new_bytenr; 1364 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1365 node->bytenr, &node->rb_node); 1366 spin_unlock(&rc->reloc_root_tree.lock); 1367 if (rb_node) 1368 backref_tree_panic(rb_node, -EEXIST, node->bytenr); 1369 return 0; 1370 } 1371 1372 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, 1373 struct btrfs_root *root, u64 objectid) 1374 { 1375 struct btrfs_root *reloc_root; 1376 struct extent_buffer *eb; 1377 struct btrfs_root_item *root_item; 1378 struct btrfs_key root_key; 1379 u64 last_snap = 0; 1380 int ret; 1381 1382 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 1383 BUG_ON(!root_item); 1384 1385 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 1386 root_key.type = BTRFS_ROOT_ITEM_KEY; 1387 root_key.offset = objectid; 1388 1389 if (root->root_key.objectid == objectid) { 1390 /* called by btrfs_init_reloc_root */ 1391 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 1392 BTRFS_TREE_RELOC_OBJECTID); 1393 BUG_ON(ret); 1394 1395 last_snap = btrfs_root_last_snapshot(&root->root_item); 1396 btrfs_set_root_last_snapshot(&root->root_item, 1397 trans->transid - 1); 1398 } else { 1399 /* 1400 * called by btrfs_reloc_post_snapshot_hook. 1401 * the source tree is a reloc tree, all tree blocks 1402 * modified after it was created have RELOC flag 1403 * set in their headers. so it's OK to not update 1404 * the 'last_snapshot'. 1405 */ 1406 ret = btrfs_copy_root(trans, root, root->node, &eb, 1407 BTRFS_TREE_RELOC_OBJECTID); 1408 BUG_ON(ret); 1409 } 1410 1411 memcpy(root_item, &root->root_item, sizeof(*root_item)); 1412 btrfs_set_root_bytenr(root_item, eb->start); 1413 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 1414 btrfs_set_root_generation(root_item, trans->transid); 1415 1416 if (root->root_key.objectid == objectid) { 1417 btrfs_set_root_refs(root_item, 0); 1418 memset(&root_item->drop_progress, 0, 1419 sizeof(struct btrfs_disk_key)); 1420 root_item->drop_level = 0; 1421 /* 1422 * abuse rtransid, it is safe because it is impossible to 1423 * receive data into a relocation tree. 1424 */ 1425 btrfs_set_root_rtransid(root_item, last_snap); 1426 btrfs_set_root_otransid(root_item, trans->transid); 1427 } 1428 1429 btrfs_tree_unlock(eb); 1430 free_extent_buffer(eb); 1431 1432 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 1433 &root_key, root_item); 1434 BUG_ON(ret); 1435 kfree(root_item); 1436 1437 reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key); 1438 BUG_ON(IS_ERR(reloc_root)); 1439 reloc_root->last_trans = trans->transid; 1440 return reloc_root; 1441 } 1442 1443 /* 1444 * create reloc tree for a given fs tree. reloc tree is just a 1445 * snapshot of the fs tree with special root objectid. 1446 */ 1447 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 1448 struct btrfs_root *root) 1449 { 1450 struct btrfs_root *reloc_root; 1451 struct reloc_control *rc = root->fs_info->reloc_ctl; 1452 struct btrfs_block_rsv *rsv; 1453 int clear_rsv = 0; 1454 int ret; 1455 1456 if (root->reloc_root) { 1457 reloc_root = root->reloc_root; 1458 reloc_root->last_trans = trans->transid; 1459 return 0; 1460 } 1461 1462 if (!rc || !rc->create_reloc_tree || 1463 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1464 return 0; 1465 1466 if (!trans->reloc_reserved) { 1467 rsv = trans->block_rsv; 1468 trans->block_rsv = rc->block_rsv; 1469 clear_rsv = 1; 1470 } 1471 reloc_root = create_reloc_root(trans, root, root->root_key.objectid); 1472 if (clear_rsv) 1473 trans->block_rsv = rsv; 1474 1475 ret = __add_reloc_root(reloc_root); 1476 BUG_ON(ret < 0); 1477 root->reloc_root = reloc_root; 1478 return 0; 1479 } 1480 1481 /* 1482 * update root item of reloc tree 1483 */ 1484 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 1485 struct btrfs_root *root) 1486 { 1487 struct btrfs_root *reloc_root; 1488 struct btrfs_root_item *root_item; 1489 int ret; 1490 1491 if (!root->reloc_root) 1492 goto out; 1493 1494 reloc_root = root->reloc_root; 1495 root_item = &reloc_root->root_item; 1496 1497 if (root->fs_info->reloc_ctl->merge_reloc_tree && 1498 btrfs_root_refs(root_item) == 0) { 1499 root->reloc_root = NULL; 1500 __del_reloc_root(reloc_root); 1501 } 1502 1503 if (reloc_root->commit_root != reloc_root->node) { 1504 btrfs_set_root_node(root_item, reloc_root->node); 1505 free_extent_buffer(reloc_root->commit_root); 1506 reloc_root->commit_root = btrfs_root_node(reloc_root); 1507 } 1508 1509 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1510 &reloc_root->root_key, root_item); 1511 BUG_ON(ret); 1512 1513 out: 1514 return 0; 1515 } 1516 1517 /* 1518 * helper to find first cached inode with inode number >= objectid 1519 * in a subvolume 1520 */ 1521 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 1522 { 1523 struct rb_node *node; 1524 struct rb_node *prev; 1525 struct btrfs_inode *entry; 1526 struct inode *inode; 1527 1528 spin_lock(&root->inode_lock); 1529 again: 1530 node = root->inode_tree.rb_node; 1531 prev = NULL; 1532 while (node) { 1533 prev = node; 1534 entry = rb_entry(node, struct btrfs_inode, rb_node); 1535 1536 if (objectid < btrfs_ino(&entry->vfs_inode)) 1537 node = node->rb_left; 1538 else if (objectid > btrfs_ino(&entry->vfs_inode)) 1539 node = node->rb_right; 1540 else 1541 break; 1542 } 1543 if (!node) { 1544 while (prev) { 1545 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1546 if (objectid <= btrfs_ino(&entry->vfs_inode)) { 1547 node = prev; 1548 break; 1549 } 1550 prev = rb_next(prev); 1551 } 1552 } 1553 while (node) { 1554 entry = rb_entry(node, struct btrfs_inode, rb_node); 1555 inode = igrab(&entry->vfs_inode); 1556 if (inode) { 1557 spin_unlock(&root->inode_lock); 1558 return inode; 1559 } 1560 1561 objectid = btrfs_ino(&entry->vfs_inode) + 1; 1562 if (cond_resched_lock(&root->inode_lock)) 1563 goto again; 1564 1565 node = rb_next(node); 1566 } 1567 spin_unlock(&root->inode_lock); 1568 return NULL; 1569 } 1570 1571 static int in_block_group(u64 bytenr, 1572 struct btrfs_block_group_cache *block_group) 1573 { 1574 if (bytenr >= block_group->key.objectid && 1575 bytenr < block_group->key.objectid + block_group->key.offset) 1576 return 1; 1577 return 0; 1578 } 1579 1580 /* 1581 * get new location of data 1582 */ 1583 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1584 u64 bytenr, u64 num_bytes) 1585 { 1586 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1587 struct btrfs_path *path; 1588 struct btrfs_file_extent_item *fi; 1589 struct extent_buffer *leaf; 1590 int ret; 1591 1592 path = btrfs_alloc_path(); 1593 if (!path) 1594 return -ENOMEM; 1595 1596 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1597 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode), 1598 bytenr, 0); 1599 if (ret < 0) 1600 goto out; 1601 if (ret > 0) { 1602 ret = -ENOENT; 1603 goto out; 1604 } 1605 1606 leaf = path->nodes[0]; 1607 fi = btrfs_item_ptr(leaf, path->slots[0], 1608 struct btrfs_file_extent_item); 1609 1610 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1611 btrfs_file_extent_compression(leaf, fi) || 1612 btrfs_file_extent_encryption(leaf, fi) || 1613 btrfs_file_extent_other_encoding(leaf, fi)); 1614 1615 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1616 ret = -EINVAL; 1617 goto out; 1618 } 1619 1620 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1621 ret = 0; 1622 out: 1623 btrfs_free_path(path); 1624 return ret; 1625 } 1626 1627 /* 1628 * update file extent items in the tree leaf to point to 1629 * the new locations. 1630 */ 1631 static noinline_for_stack 1632 int replace_file_extents(struct btrfs_trans_handle *trans, 1633 struct reloc_control *rc, 1634 struct btrfs_root *root, 1635 struct extent_buffer *leaf) 1636 { 1637 struct btrfs_key key; 1638 struct btrfs_file_extent_item *fi; 1639 struct inode *inode = NULL; 1640 u64 parent; 1641 u64 bytenr; 1642 u64 new_bytenr = 0; 1643 u64 num_bytes; 1644 u64 end; 1645 u32 nritems; 1646 u32 i; 1647 int ret = 0; 1648 int first = 1; 1649 int dirty = 0; 1650 1651 if (rc->stage != UPDATE_DATA_PTRS) 1652 return 0; 1653 1654 /* reloc trees always use full backref */ 1655 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1656 parent = leaf->start; 1657 else 1658 parent = 0; 1659 1660 nritems = btrfs_header_nritems(leaf); 1661 for (i = 0; i < nritems; i++) { 1662 cond_resched(); 1663 btrfs_item_key_to_cpu(leaf, &key, i); 1664 if (key.type != BTRFS_EXTENT_DATA_KEY) 1665 continue; 1666 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1667 if (btrfs_file_extent_type(leaf, fi) == 1668 BTRFS_FILE_EXTENT_INLINE) 1669 continue; 1670 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1671 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1672 if (bytenr == 0) 1673 continue; 1674 if (!in_block_group(bytenr, rc->block_group)) 1675 continue; 1676 1677 /* 1678 * if we are modifying block in fs tree, wait for readpage 1679 * to complete and drop the extent cache 1680 */ 1681 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1682 if (first) { 1683 inode = find_next_inode(root, key.objectid); 1684 first = 0; 1685 } else if (inode && btrfs_ino(inode) < key.objectid) { 1686 btrfs_add_delayed_iput(inode); 1687 inode = find_next_inode(root, key.objectid); 1688 } 1689 if (inode && btrfs_ino(inode) == key.objectid) { 1690 end = key.offset + 1691 btrfs_file_extent_num_bytes(leaf, fi); 1692 WARN_ON(!IS_ALIGNED(key.offset, 1693 root->sectorsize)); 1694 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1695 end--; 1696 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1697 key.offset, end); 1698 if (!ret) 1699 continue; 1700 1701 btrfs_drop_extent_cache(inode, key.offset, end, 1702 1); 1703 unlock_extent(&BTRFS_I(inode)->io_tree, 1704 key.offset, end); 1705 } 1706 } 1707 1708 ret = get_new_location(rc->data_inode, &new_bytenr, 1709 bytenr, num_bytes); 1710 if (ret) { 1711 /* 1712 * Don't have to abort since we've not changed anything 1713 * in the file extent yet. 1714 */ 1715 break; 1716 } 1717 1718 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1719 dirty = 1; 1720 1721 key.offset -= btrfs_file_extent_offset(leaf, fi); 1722 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1723 num_bytes, parent, 1724 btrfs_header_owner(leaf), 1725 key.objectid, key.offset); 1726 if (ret) { 1727 btrfs_abort_transaction(trans, ret); 1728 break; 1729 } 1730 1731 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1732 parent, btrfs_header_owner(leaf), 1733 key.objectid, key.offset); 1734 if (ret) { 1735 btrfs_abort_transaction(trans, ret); 1736 break; 1737 } 1738 } 1739 if (dirty) 1740 btrfs_mark_buffer_dirty(leaf); 1741 if (inode) 1742 btrfs_add_delayed_iput(inode); 1743 return ret; 1744 } 1745 1746 static noinline_for_stack 1747 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1748 struct btrfs_path *path, int level) 1749 { 1750 struct btrfs_disk_key key1; 1751 struct btrfs_disk_key key2; 1752 btrfs_node_key(eb, &key1, slot); 1753 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1754 return memcmp(&key1, &key2, sizeof(key1)); 1755 } 1756 1757 /* 1758 * try to replace tree blocks in fs tree with the new blocks 1759 * in reloc tree. tree blocks haven't been modified since the 1760 * reloc tree was create can be replaced. 1761 * 1762 * if a block was replaced, level of the block + 1 is returned. 1763 * if no block got replaced, 0 is returned. if there are other 1764 * errors, a negative error number is returned. 1765 */ 1766 static noinline_for_stack 1767 int replace_path(struct btrfs_trans_handle *trans, 1768 struct btrfs_root *dest, struct btrfs_root *src, 1769 struct btrfs_path *path, struct btrfs_key *next_key, 1770 int lowest_level, int max_level) 1771 { 1772 struct extent_buffer *eb; 1773 struct extent_buffer *parent; 1774 struct btrfs_key key; 1775 u64 old_bytenr; 1776 u64 new_bytenr; 1777 u64 old_ptr_gen; 1778 u64 new_ptr_gen; 1779 u64 last_snapshot; 1780 u32 blocksize; 1781 int cow = 0; 1782 int level; 1783 int ret; 1784 int slot; 1785 1786 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1787 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1788 1789 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1790 again: 1791 slot = path->slots[lowest_level]; 1792 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1793 1794 eb = btrfs_lock_root_node(dest); 1795 btrfs_set_lock_blocking(eb); 1796 level = btrfs_header_level(eb); 1797 1798 if (level < lowest_level) { 1799 btrfs_tree_unlock(eb); 1800 free_extent_buffer(eb); 1801 return 0; 1802 } 1803 1804 if (cow) { 1805 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1806 BUG_ON(ret); 1807 } 1808 btrfs_set_lock_blocking(eb); 1809 1810 if (next_key) { 1811 next_key->objectid = (u64)-1; 1812 next_key->type = (u8)-1; 1813 next_key->offset = (u64)-1; 1814 } 1815 1816 parent = eb; 1817 while (1) { 1818 level = btrfs_header_level(parent); 1819 BUG_ON(level < lowest_level); 1820 1821 ret = btrfs_bin_search(parent, &key, level, &slot); 1822 if (ret && slot > 0) 1823 slot--; 1824 1825 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1826 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1827 1828 old_bytenr = btrfs_node_blockptr(parent, slot); 1829 blocksize = dest->nodesize; 1830 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1831 1832 if (level <= max_level) { 1833 eb = path->nodes[level]; 1834 new_bytenr = btrfs_node_blockptr(eb, 1835 path->slots[level]); 1836 new_ptr_gen = btrfs_node_ptr_generation(eb, 1837 path->slots[level]); 1838 } else { 1839 new_bytenr = 0; 1840 new_ptr_gen = 0; 1841 } 1842 1843 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) { 1844 ret = level; 1845 break; 1846 } 1847 1848 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1849 memcmp_node_keys(parent, slot, path, level)) { 1850 if (level <= lowest_level) { 1851 ret = 0; 1852 break; 1853 } 1854 1855 eb = read_tree_block(dest, old_bytenr, old_ptr_gen); 1856 if (IS_ERR(eb)) { 1857 ret = PTR_ERR(eb); 1858 break; 1859 } else if (!extent_buffer_uptodate(eb)) { 1860 ret = -EIO; 1861 free_extent_buffer(eb); 1862 break; 1863 } 1864 btrfs_tree_lock(eb); 1865 if (cow) { 1866 ret = btrfs_cow_block(trans, dest, eb, parent, 1867 slot, &eb); 1868 BUG_ON(ret); 1869 } 1870 btrfs_set_lock_blocking(eb); 1871 1872 btrfs_tree_unlock(parent); 1873 free_extent_buffer(parent); 1874 1875 parent = eb; 1876 continue; 1877 } 1878 1879 if (!cow) { 1880 btrfs_tree_unlock(parent); 1881 free_extent_buffer(parent); 1882 cow = 1; 1883 goto again; 1884 } 1885 1886 btrfs_node_key_to_cpu(path->nodes[level], &key, 1887 path->slots[level]); 1888 btrfs_release_path(path); 1889 1890 path->lowest_level = level; 1891 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1892 path->lowest_level = 0; 1893 BUG_ON(ret); 1894 1895 /* 1896 * swap blocks in fs tree and reloc tree. 1897 */ 1898 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1899 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1900 btrfs_mark_buffer_dirty(parent); 1901 1902 btrfs_set_node_blockptr(path->nodes[level], 1903 path->slots[level], old_bytenr); 1904 btrfs_set_node_ptr_generation(path->nodes[level], 1905 path->slots[level], old_ptr_gen); 1906 btrfs_mark_buffer_dirty(path->nodes[level]); 1907 1908 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, 1909 path->nodes[level]->start, 1910 src->root_key.objectid, level - 1, 0); 1911 BUG_ON(ret); 1912 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 1913 0, dest->root_key.objectid, level - 1, 1914 0); 1915 BUG_ON(ret); 1916 1917 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1918 path->nodes[level]->start, 1919 src->root_key.objectid, level - 1, 0); 1920 BUG_ON(ret); 1921 1922 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1923 0, dest->root_key.objectid, level - 1, 1924 0); 1925 BUG_ON(ret); 1926 1927 btrfs_unlock_up_safe(path, 0); 1928 1929 ret = level; 1930 break; 1931 } 1932 btrfs_tree_unlock(parent); 1933 free_extent_buffer(parent); 1934 return ret; 1935 } 1936 1937 /* 1938 * helper to find next relocated block in reloc tree 1939 */ 1940 static noinline_for_stack 1941 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1942 int *level) 1943 { 1944 struct extent_buffer *eb; 1945 int i; 1946 u64 last_snapshot; 1947 u32 nritems; 1948 1949 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1950 1951 for (i = 0; i < *level; i++) { 1952 free_extent_buffer(path->nodes[i]); 1953 path->nodes[i] = NULL; 1954 } 1955 1956 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1957 eb = path->nodes[i]; 1958 nritems = btrfs_header_nritems(eb); 1959 while (path->slots[i] + 1 < nritems) { 1960 path->slots[i]++; 1961 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1962 last_snapshot) 1963 continue; 1964 1965 *level = i; 1966 return 0; 1967 } 1968 free_extent_buffer(path->nodes[i]); 1969 path->nodes[i] = NULL; 1970 } 1971 return 1; 1972 } 1973 1974 /* 1975 * walk down reloc tree to find relocated block of lowest level 1976 */ 1977 static noinline_for_stack 1978 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1979 int *level) 1980 { 1981 struct extent_buffer *eb = NULL; 1982 int i; 1983 u64 bytenr; 1984 u64 ptr_gen = 0; 1985 u64 last_snapshot; 1986 u32 nritems; 1987 1988 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1989 1990 for (i = *level; i > 0; i--) { 1991 eb = path->nodes[i]; 1992 nritems = btrfs_header_nritems(eb); 1993 while (path->slots[i] < nritems) { 1994 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1995 if (ptr_gen > last_snapshot) 1996 break; 1997 path->slots[i]++; 1998 } 1999 if (path->slots[i] >= nritems) { 2000 if (i == *level) 2001 break; 2002 *level = i + 1; 2003 return 0; 2004 } 2005 if (i == 1) { 2006 *level = i; 2007 return 0; 2008 } 2009 2010 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 2011 eb = read_tree_block(root, bytenr, ptr_gen); 2012 if (IS_ERR(eb)) { 2013 return PTR_ERR(eb); 2014 } else if (!extent_buffer_uptodate(eb)) { 2015 free_extent_buffer(eb); 2016 return -EIO; 2017 } 2018 BUG_ON(btrfs_header_level(eb) != i - 1); 2019 path->nodes[i - 1] = eb; 2020 path->slots[i - 1] = 0; 2021 } 2022 return 1; 2023 } 2024 2025 /* 2026 * invalidate extent cache for file extents whose key in range of 2027 * [min_key, max_key) 2028 */ 2029 static int invalidate_extent_cache(struct btrfs_root *root, 2030 struct btrfs_key *min_key, 2031 struct btrfs_key *max_key) 2032 { 2033 struct inode *inode = NULL; 2034 u64 objectid; 2035 u64 start, end; 2036 u64 ino; 2037 2038 objectid = min_key->objectid; 2039 while (1) { 2040 cond_resched(); 2041 iput(inode); 2042 2043 if (objectid > max_key->objectid) 2044 break; 2045 2046 inode = find_next_inode(root, objectid); 2047 if (!inode) 2048 break; 2049 ino = btrfs_ino(inode); 2050 2051 if (ino > max_key->objectid) { 2052 iput(inode); 2053 break; 2054 } 2055 2056 objectid = ino + 1; 2057 if (!S_ISREG(inode->i_mode)) 2058 continue; 2059 2060 if (unlikely(min_key->objectid == ino)) { 2061 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 2062 continue; 2063 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 2064 start = 0; 2065 else { 2066 start = min_key->offset; 2067 WARN_ON(!IS_ALIGNED(start, root->sectorsize)); 2068 } 2069 } else { 2070 start = 0; 2071 } 2072 2073 if (unlikely(max_key->objectid == ino)) { 2074 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 2075 continue; 2076 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 2077 end = (u64)-1; 2078 } else { 2079 if (max_key->offset == 0) 2080 continue; 2081 end = max_key->offset; 2082 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 2083 end--; 2084 } 2085 } else { 2086 end = (u64)-1; 2087 } 2088 2089 /* the lock_extent waits for readpage to complete */ 2090 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 2091 btrfs_drop_extent_cache(inode, start, end, 1); 2092 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 2093 } 2094 return 0; 2095 } 2096 2097 static int find_next_key(struct btrfs_path *path, int level, 2098 struct btrfs_key *key) 2099 2100 { 2101 while (level < BTRFS_MAX_LEVEL) { 2102 if (!path->nodes[level]) 2103 break; 2104 if (path->slots[level] + 1 < 2105 btrfs_header_nritems(path->nodes[level])) { 2106 btrfs_node_key_to_cpu(path->nodes[level], key, 2107 path->slots[level] + 1); 2108 return 0; 2109 } 2110 level++; 2111 } 2112 return 1; 2113 } 2114 2115 /* 2116 * merge the relocated tree blocks in reloc tree with corresponding 2117 * fs tree. 2118 */ 2119 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 2120 struct btrfs_root *root) 2121 { 2122 LIST_HEAD(inode_list); 2123 struct btrfs_key key; 2124 struct btrfs_key next_key; 2125 struct btrfs_trans_handle *trans = NULL; 2126 struct btrfs_root *reloc_root; 2127 struct btrfs_root_item *root_item; 2128 struct btrfs_path *path; 2129 struct extent_buffer *leaf; 2130 int level; 2131 int max_level; 2132 int replaced = 0; 2133 int ret; 2134 int err = 0; 2135 u32 min_reserved; 2136 2137 path = btrfs_alloc_path(); 2138 if (!path) 2139 return -ENOMEM; 2140 path->reada = READA_FORWARD; 2141 2142 reloc_root = root->reloc_root; 2143 root_item = &reloc_root->root_item; 2144 2145 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 2146 level = btrfs_root_level(root_item); 2147 extent_buffer_get(reloc_root->node); 2148 path->nodes[level] = reloc_root->node; 2149 path->slots[level] = 0; 2150 } else { 2151 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 2152 2153 level = root_item->drop_level; 2154 BUG_ON(level == 0); 2155 path->lowest_level = level; 2156 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 2157 path->lowest_level = 0; 2158 if (ret < 0) { 2159 btrfs_free_path(path); 2160 return ret; 2161 } 2162 2163 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 2164 path->slots[level]); 2165 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 2166 2167 btrfs_unlock_up_safe(path, 0); 2168 } 2169 2170 min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2171 memset(&next_key, 0, sizeof(next_key)); 2172 2173 while (1) { 2174 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, 2175 BTRFS_RESERVE_FLUSH_ALL); 2176 if (ret) { 2177 err = ret; 2178 goto out; 2179 } 2180 trans = btrfs_start_transaction(root, 0); 2181 if (IS_ERR(trans)) { 2182 err = PTR_ERR(trans); 2183 trans = NULL; 2184 goto out; 2185 } 2186 trans->block_rsv = rc->block_rsv; 2187 2188 replaced = 0; 2189 max_level = level; 2190 2191 ret = walk_down_reloc_tree(reloc_root, path, &level); 2192 if (ret < 0) { 2193 err = ret; 2194 goto out; 2195 } 2196 if (ret > 0) 2197 break; 2198 2199 if (!find_next_key(path, level, &key) && 2200 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 2201 ret = 0; 2202 } else { 2203 ret = replace_path(trans, root, reloc_root, path, 2204 &next_key, level, max_level); 2205 } 2206 if (ret < 0) { 2207 err = ret; 2208 goto out; 2209 } 2210 2211 if (ret > 0) { 2212 level = ret; 2213 btrfs_node_key_to_cpu(path->nodes[level], &key, 2214 path->slots[level]); 2215 replaced = 1; 2216 } 2217 2218 ret = walk_up_reloc_tree(reloc_root, path, &level); 2219 if (ret > 0) 2220 break; 2221 2222 BUG_ON(level == 0); 2223 /* 2224 * save the merging progress in the drop_progress. 2225 * this is OK since root refs == 1 in this case. 2226 */ 2227 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 2228 path->slots[level]); 2229 root_item->drop_level = level; 2230 2231 btrfs_end_transaction_throttle(trans, root); 2232 trans = NULL; 2233 2234 btrfs_btree_balance_dirty(root); 2235 2236 if (replaced && rc->stage == UPDATE_DATA_PTRS) 2237 invalidate_extent_cache(root, &key, &next_key); 2238 } 2239 2240 /* 2241 * handle the case only one block in the fs tree need to be 2242 * relocated and the block is tree root. 2243 */ 2244 leaf = btrfs_lock_root_node(root); 2245 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 2246 btrfs_tree_unlock(leaf); 2247 free_extent_buffer(leaf); 2248 if (ret < 0) 2249 err = ret; 2250 out: 2251 btrfs_free_path(path); 2252 2253 if (err == 0) { 2254 memset(&root_item->drop_progress, 0, 2255 sizeof(root_item->drop_progress)); 2256 root_item->drop_level = 0; 2257 btrfs_set_root_refs(root_item, 0); 2258 btrfs_update_reloc_root(trans, root); 2259 } 2260 2261 if (trans) 2262 btrfs_end_transaction_throttle(trans, root); 2263 2264 btrfs_btree_balance_dirty(root); 2265 2266 if (replaced && rc->stage == UPDATE_DATA_PTRS) 2267 invalidate_extent_cache(root, &key, &next_key); 2268 2269 return err; 2270 } 2271 2272 static noinline_for_stack 2273 int prepare_to_merge(struct reloc_control *rc, int err) 2274 { 2275 struct btrfs_root *root = rc->extent_root; 2276 struct btrfs_root *reloc_root; 2277 struct btrfs_trans_handle *trans; 2278 LIST_HEAD(reloc_roots); 2279 u64 num_bytes = 0; 2280 int ret; 2281 2282 mutex_lock(&root->fs_info->reloc_mutex); 2283 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2284 rc->merging_rsv_size += rc->nodes_relocated * 2; 2285 mutex_unlock(&root->fs_info->reloc_mutex); 2286 2287 again: 2288 if (!err) { 2289 num_bytes = rc->merging_rsv_size; 2290 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, 2291 BTRFS_RESERVE_FLUSH_ALL); 2292 if (ret) 2293 err = ret; 2294 } 2295 2296 trans = btrfs_join_transaction(rc->extent_root); 2297 if (IS_ERR(trans)) { 2298 if (!err) 2299 btrfs_block_rsv_release(rc->extent_root, 2300 rc->block_rsv, num_bytes); 2301 return PTR_ERR(trans); 2302 } 2303 2304 if (!err) { 2305 if (num_bytes != rc->merging_rsv_size) { 2306 btrfs_end_transaction(trans, rc->extent_root); 2307 btrfs_block_rsv_release(rc->extent_root, 2308 rc->block_rsv, num_bytes); 2309 goto again; 2310 } 2311 } 2312 2313 rc->merge_reloc_tree = 1; 2314 2315 while (!list_empty(&rc->reloc_roots)) { 2316 reloc_root = list_entry(rc->reloc_roots.next, 2317 struct btrfs_root, root_list); 2318 list_del_init(&reloc_root->root_list); 2319 2320 root = read_fs_root(reloc_root->fs_info, 2321 reloc_root->root_key.offset); 2322 BUG_ON(IS_ERR(root)); 2323 BUG_ON(root->reloc_root != reloc_root); 2324 2325 /* 2326 * set reference count to 1, so btrfs_recover_relocation 2327 * knows it should resumes merging 2328 */ 2329 if (!err) 2330 btrfs_set_root_refs(&reloc_root->root_item, 1); 2331 btrfs_update_reloc_root(trans, root); 2332 2333 list_add(&reloc_root->root_list, &reloc_roots); 2334 } 2335 2336 list_splice(&reloc_roots, &rc->reloc_roots); 2337 2338 if (!err) 2339 btrfs_commit_transaction(trans, rc->extent_root); 2340 else 2341 btrfs_end_transaction(trans, rc->extent_root); 2342 return err; 2343 } 2344 2345 static noinline_for_stack 2346 void free_reloc_roots(struct list_head *list) 2347 { 2348 struct btrfs_root *reloc_root; 2349 2350 while (!list_empty(list)) { 2351 reloc_root = list_entry(list->next, struct btrfs_root, 2352 root_list); 2353 __del_reloc_root(reloc_root); 2354 } 2355 } 2356 2357 static noinline_for_stack 2358 void merge_reloc_roots(struct reloc_control *rc) 2359 { 2360 struct btrfs_root *root; 2361 struct btrfs_root *reloc_root; 2362 u64 last_snap; 2363 u64 otransid; 2364 u64 objectid; 2365 LIST_HEAD(reloc_roots); 2366 int found = 0; 2367 int ret = 0; 2368 again: 2369 root = rc->extent_root; 2370 2371 /* 2372 * this serializes us with btrfs_record_root_in_transaction, 2373 * we have to make sure nobody is in the middle of 2374 * adding their roots to the list while we are 2375 * doing this splice 2376 */ 2377 mutex_lock(&root->fs_info->reloc_mutex); 2378 list_splice_init(&rc->reloc_roots, &reloc_roots); 2379 mutex_unlock(&root->fs_info->reloc_mutex); 2380 2381 while (!list_empty(&reloc_roots)) { 2382 found = 1; 2383 reloc_root = list_entry(reloc_roots.next, 2384 struct btrfs_root, root_list); 2385 2386 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 2387 root = read_fs_root(reloc_root->fs_info, 2388 reloc_root->root_key.offset); 2389 BUG_ON(IS_ERR(root)); 2390 BUG_ON(root->reloc_root != reloc_root); 2391 2392 ret = merge_reloc_root(rc, root); 2393 if (ret) { 2394 if (list_empty(&reloc_root->root_list)) 2395 list_add_tail(&reloc_root->root_list, 2396 &reloc_roots); 2397 goto out; 2398 } 2399 } else { 2400 list_del_init(&reloc_root->root_list); 2401 } 2402 2403 /* 2404 * we keep the old last snapshot transid in rtranid when we 2405 * created the relocation tree. 2406 */ 2407 last_snap = btrfs_root_rtransid(&reloc_root->root_item); 2408 otransid = btrfs_root_otransid(&reloc_root->root_item); 2409 objectid = reloc_root->root_key.offset; 2410 2411 ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); 2412 if (ret < 0) { 2413 if (list_empty(&reloc_root->root_list)) 2414 list_add_tail(&reloc_root->root_list, 2415 &reloc_roots); 2416 goto out; 2417 } 2418 } 2419 2420 if (found) { 2421 found = 0; 2422 goto again; 2423 } 2424 out: 2425 if (ret) { 2426 btrfs_handle_fs_error(root->fs_info, ret, NULL); 2427 if (!list_empty(&reloc_roots)) 2428 free_reloc_roots(&reloc_roots); 2429 2430 /* new reloc root may be added */ 2431 mutex_lock(&root->fs_info->reloc_mutex); 2432 list_splice_init(&rc->reloc_roots, &reloc_roots); 2433 mutex_unlock(&root->fs_info->reloc_mutex); 2434 if (!list_empty(&reloc_roots)) 2435 free_reloc_roots(&reloc_roots); 2436 } 2437 2438 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 2439 } 2440 2441 static void free_block_list(struct rb_root *blocks) 2442 { 2443 struct tree_block *block; 2444 struct rb_node *rb_node; 2445 while ((rb_node = rb_first(blocks))) { 2446 block = rb_entry(rb_node, struct tree_block, rb_node); 2447 rb_erase(rb_node, blocks); 2448 kfree(block); 2449 } 2450 } 2451 2452 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 2453 struct btrfs_root *reloc_root) 2454 { 2455 struct btrfs_root *root; 2456 2457 if (reloc_root->last_trans == trans->transid) 2458 return 0; 2459 2460 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); 2461 BUG_ON(IS_ERR(root)); 2462 BUG_ON(root->reloc_root != reloc_root); 2463 2464 return btrfs_record_root_in_trans(trans, root); 2465 } 2466 2467 static noinline_for_stack 2468 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 2469 struct reloc_control *rc, 2470 struct backref_node *node, 2471 struct backref_edge *edges[]) 2472 { 2473 struct backref_node *next; 2474 struct btrfs_root *root; 2475 int index = 0; 2476 2477 next = node; 2478 while (1) { 2479 cond_resched(); 2480 next = walk_up_backref(next, edges, &index); 2481 root = next->root; 2482 BUG_ON(!root); 2483 BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state)); 2484 2485 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2486 record_reloc_root_in_trans(trans, root); 2487 break; 2488 } 2489 2490 btrfs_record_root_in_trans(trans, root); 2491 root = root->reloc_root; 2492 2493 if (next->new_bytenr != root->node->start) { 2494 BUG_ON(next->new_bytenr); 2495 BUG_ON(!list_empty(&next->list)); 2496 next->new_bytenr = root->node->start; 2497 next->root = root; 2498 list_add_tail(&next->list, 2499 &rc->backref_cache.changed); 2500 __mark_block_processed(rc, next); 2501 break; 2502 } 2503 2504 WARN_ON(1); 2505 root = NULL; 2506 next = walk_down_backref(edges, &index); 2507 if (!next || next->level <= node->level) 2508 break; 2509 } 2510 if (!root) 2511 return NULL; 2512 2513 next = node; 2514 /* setup backref node path for btrfs_reloc_cow_block */ 2515 while (1) { 2516 rc->backref_cache.path[next->level] = next; 2517 if (--index < 0) 2518 break; 2519 next = edges[index]->node[UPPER]; 2520 } 2521 return root; 2522 } 2523 2524 /* 2525 * select a tree root for relocation. return NULL if the block 2526 * is reference counted. we should use do_relocation() in this 2527 * case. return a tree root pointer if the block isn't reference 2528 * counted. return -ENOENT if the block is root of reloc tree. 2529 */ 2530 static noinline_for_stack 2531 struct btrfs_root *select_one_root(struct backref_node *node) 2532 { 2533 struct backref_node *next; 2534 struct btrfs_root *root; 2535 struct btrfs_root *fs_root = NULL; 2536 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2537 int index = 0; 2538 2539 next = node; 2540 while (1) { 2541 cond_resched(); 2542 next = walk_up_backref(next, edges, &index); 2543 root = next->root; 2544 BUG_ON(!root); 2545 2546 /* no other choice for non-references counted tree */ 2547 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 2548 return root; 2549 2550 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) 2551 fs_root = root; 2552 2553 if (next != node) 2554 return NULL; 2555 2556 next = walk_down_backref(edges, &index); 2557 if (!next || next->level <= node->level) 2558 break; 2559 } 2560 2561 if (!fs_root) 2562 return ERR_PTR(-ENOENT); 2563 return fs_root; 2564 } 2565 2566 static noinline_for_stack 2567 u64 calcu_metadata_size(struct reloc_control *rc, 2568 struct backref_node *node, int reserve) 2569 { 2570 struct backref_node *next = node; 2571 struct backref_edge *edge; 2572 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2573 u64 num_bytes = 0; 2574 int index = 0; 2575 2576 BUG_ON(reserve && node->processed); 2577 2578 while (next) { 2579 cond_resched(); 2580 while (1) { 2581 if (next->processed && (reserve || next != node)) 2582 break; 2583 2584 num_bytes += rc->extent_root->nodesize; 2585 2586 if (list_empty(&next->upper)) 2587 break; 2588 2589 edge = list_entry(next->upper.next, 2590 struct backref_edge, list[LOWER]); 2591 edges[index++] = edge; 2592 next = edge->node[UPPER]; 2593 } 2594 next = walk_down_backref(edges, &index); 2595 } 2596 return num_bytes; 2597 } 2598 2599 static int reserve_metadata_space(struct btrfs_trans_handle *trans, 2600 struct reloc_control *rc, 2601 struct backref_node *node) 2602 { 2603 struct btrfs_root *root = rc->extent_root; 2604 u64 num_bytes; 2605 int ret; 2606 u64 tmp; 2607 2608 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2609 2610 trans->block_rsv = rc->block_rsv; 2611 rc->reserved_bytes += num_bytes; 2612 2613 /* 2614 * We are under a transaction here so we can only do limited flushing. 2615 * If we get an enospc just kick back -EAGAIN so we know to drop the 2616 * transaction and try to refill when we can flush all the things. 2617 */ 2618 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, 2619 BTRFS_RESERVE_FLUSH_LIMIT); 2620 if (ret) { 2621 tmp = rc->extent_root->nodesize * RELOCATION_RESERVED_NODES; 2622 while (tmp <= rc->reserved_bytes) 2623 tmp <<= 1; 2624 /* 2625 * only one thread can access block_rsv at this point, 2626 * so we don't need hold lock to protect block_rsv. 2627 * we expand more reservation size here to allow enough 2628 * space for relocation and we will return eailer in 2629 * enospc case. 2630 */ 2631 rc->block_rsv->size = tmp + rc->extent_root->nodesize * 2632 RELOCATION_RESERVED_NODES; 2633 return -EAGAIN; 2634 } 2635 2636 return 0; 2637 } 2638 2639 /* 2640 * relocate a block tree, and then update pointers in upper level 2641 * blocks that reference the block to point to the new location. 2642 * 2643 * if called by link_to_upper, the block has already been relocated. 2644 * in that case this function just updates pointers. 2645 */ 2646 static int do_relocation(struct btrfs_trans_handle *trans, 2647 struct reloc_control *rc, 2648 struct backref_node *node, 2649 struct btrfs_key *key, 2650 struct btrfs_path *path, int lowest) 2651 { 2652 struct backref_node *upper; 2653 struct backref_edge *edge; 2654 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2655 struct btrfs_root *root; 2656 struct extent_buffer *eb; 2657 u32 blocksize; 2658 u64 bytenr; 2659 u64 generation; 2660 int slot; 2661 int ret; 2662 int err = 0; 2663 2664 BUG_ON(lowest && node->eb); 2665 2666 path->lowest_level = node->level + 1; 2667 rc->backref_cache.path[node->level] = node; 2668 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2669 cond_resched(); 2670 2671 upper = edge->node[UPPER]; 2672 root = select_reloc_root(trans, rc, upper, edges); 2673 BUG_ON(!root); 2674 2675 if (upper->eb && !upper->locked) { 2676 if (!lowest) { 2677 ret = btrfs_bin_search(upper->eb, key, 2678 upper->level, &slot); 2679 BUG_ON(ret); 2680 bytenr = btrfs_node_blockptr(upper->eb, slot); 2681 if (node->eb->start == bytenr) 2682 goto next; 2683 } 2684 drop_node_buffer(upper); 2685 } 2686 2687 if (!upper->eb) { 2688 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2689 if (ret < 0) { 2690 err = ret; 2691 break; 2692 } 2693 BUG_ON(ret > 0); 2694 2695 if (!upper->eb) { 2696 upper->eb = path->nodes[upper->level]; 2697 path->nodes[upper->level] = NULL; 2698 } else { 2699 BUG_ON(upper->eb != path->nodes[upper->level]); 2700 } 2701 2702 upper->locked = 1; 2703 path->locks[upper->level] = 0; 2704 2705 slot = path->slots[upper->level]; 2706 btrfs_release_path(path); 2707 } else { 2708 ret = btrfs_bin_search(upper->eb, key, upper->level, 2709 &slot); 2710 BUG_ON(ret); 2711 } 2712 2713 bytenr = btrfs_node_blockptr(upper->eb, slot); 2714 if (lowest) { 2715 BUG_ON(bytenr != node->bytenr); 2716 } else { 2717 if (node->eb->start == bytenr) 2718 goto next; 2719 } 2720 2721 blocksize = root->nodesize; 2722 generation = btrfs_node_ptr_generation(upper->eb, slot); 2723 eb = read_tree_block(root, bytenr, generation); 2724 if (IS_ERR(eb)) { 2725 err = PTR_ERR(eb); 2726 goto next; 2727 } else if (!extent_buffer_uptodate(eb)) { 2728 free_extent_buffer(eb); 2729 err = -EIO; 2730 goto next; 2731 } 2732 btrfs_tree_lock(eb); 2733 btrfs_set_lock_blocking(eb); 2734 2735 if (!node->eb) { 2736 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2737 slot, &eb); 2738 btrfs_tree_unlock(eb); 2739 free_extent_buffer(eb); 2740 if (ret < 0) { 2741 err = ret; 2742 goto next; 2743 } 2744 BUG_ON(node->eb != eb); 2745 } else { 2746 btrfs_set_node_blockptr(upper->eb, slot, 2747 node->eb->start); 2748 btrfs_set_node_ptr_generation(upper->eb, slot, 2749 trans->transid); 2750 btrfs_mark_buffer_dirty(upper->eb); 2751 2752 ret = btrfs_inc_extent_ref(trans, root, 2753 node->eb->start, blocksize, 2754 upper->eb->start, 2755 btrfs_header_owner(upper->eb), 2756 node->level, 0); 2757 BUG_ON(ret); 2758 2759 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2760 BUG_ON(ret); 2761 } 2762 next: 2763 if (!upper->pending) 2764 drop_node_buffer(upper); 2765 else 2766 unlock_node_buffer(upper); 2767 if (err) 2768 break; 2769 } 2770 2771 if (!err && node->pending) { 2772 drop_node_buffer(node); 2773 list_move_tail(&node->list, &rc->backref_cache.changed); 2774 node->pending = 0; 2775 } 2776 2777 path->lowest_level = 0; 2778 BUG_ON(err == -ENOSPC); 2779 return err; 2780 } 2781 2782 static int link_to_upper(struct btrfs_trans_handle *trans, 2783 struct reloc_control *rc, 2784 struct backref_node *node, 2785 struct btrfs_path *path) 2786 { 2787 struct btrfs_key key; 2788 2789 btrfs_node_key_to_cpu(node->eb, &key, 0); 2790 return do_relocation(trans, rc, node, &key, path, 0); 2791 } 2792 2793 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2794 struct reloc_control *rc, 2795 struct btrfs_path *path, int err) 2796 { 2797 LIST_HEAD(list); 2798 struct backref_cache *cache = &rc->backref_cache; 2799 struct backref_node *node; 2800 int level; 2801 int ret; 2802 2803 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2804 while (!list_empty(&cache->pending[level])) { 2805 node = list_entry(cache->pending[level].next, 2806 struct backref_node, list); 2807 list_move_tail(&node->list, &list); 2808 BUG_ON(!node->pending); 2809 2810 if (!err) { 2811 ret = link_to_upper(trans, rc, node, path); 2812 if (ret < 0) 2813 err = ret; 2814 } 2815 } 2816 list_splice_init(&list, &cache->pending[level]); 2817 } 2818 return err; 2819 } 2820 2821 static void mark_block_processed(struct reloc_control *rc, 2822 u64 bytenr, u32 blocksize) 2823 { 2824 set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, 2825 EXTENT_DIRTY); 2826 } 2827 2828 static void __mark_block_processed(struct reloc_control *rc, 2829 struct backref_node *node) 2830 { 2831 u32 blocksize; 2832 if (node->level == 0 || 2833 in_block_group(node->bytenr, rc->block_group)) { 2834 blocksize = rc->extent_root->nodesize; 2835 mark_block_processed(rc, node->bytenr, blocksize); 2836 } 2837 node->processed = 1; 2838 } 2839 2840 /* 2841 * mark a block and all blocks directly/indirectly reference the block 2842 * as processed. 2843 */ 2844 static void update_processed_blocks(struct reloc_control *rc, 2845 struct backref_node *node) 2846 { 2847 struct backref_node *next = node; 2848 struct backref_edge *edge; 2849 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2850 int index = 0; 2851 2852 while (next) { 2853 cond_resched(); 2854 while (1) { 2855 if (next->processed) 2856 break; 2857 2858 __mark_block_processed(rc, next); 2859 2860 if (list_empty(&next->upper)) 2861 break; 2862 2863 edge = list_entry(next->upper.next, 2864 struct backref_edge, list[LOWER]); 2865 edges[index++] = edge; 2866 next = edge->node[UPPER]; 2867 } 2868 next = walk_down_backref(edges, &index); 2869 } 2870 } 2871 2872 static int tree_block_processed(u64 bytenr, struct reloc_control *rc) 2873 { 2874 u32 blocksize = rc->extent_root->nodesize; 2875 2876 if (test_range_bit(&rc->processed_blocks, bytenr, 2877 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2878 return 1; 2879 return 0; 2880 } 2881 2882 static int get_tree_block_key(struct reloc_control *rc, 2883 struct tree_block *block) 2884 { 2885 struct extent_buffer *eb; 2886 2887 BUG_ON(block->key_ready); 2888 eb = read_tree_block(rc->extent_root, block->bytenr, 2889 block->key.offset); 2890 if (IS_ERR(eb)) { 2891 return PTR_ERR(eb); 2892 } else if (!extent_buffer_uptodate(eb)) { 2893 free_extent_buffer(eb); 2894 return -EIO; 2895 } 2896 WARN_ON(btrfs_header_level(eb) != block->level); 2897 if (block->level == 0) 2898 btrfs_item_key_to_cpu(eb, &block->key, 0); 2899 else 2900 btrfs_node_key_to_cpu(eb, &block->key, 0); 2901 free_extent_buffer(eb); 2902 block->key_ready = 1; 2903 return 0; 2904 } 2905 2906 /* 2907 * helper function to relocate a tree block 2908 */ 2909 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2910 struct reloc_control *rc, 2911 struct backref_node *node, 2912 struct btrfs_key *key, 2913 struct btrfs_path *path) 2914 { 2915 struct btrfs_root *root; 2916 int ret = 0; 2917 2918 if (!node) 2919 return 0; 2920 2921 BUG_ON(node->processed); 2922 root = select_one_root(node); 2923 if (root == ERR_PTR(-ENOENT)) { 2924 update_processed_blocks(rc, node); 2925 goto out; 2926 } 2927 2928 if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { 2929 ret = reserve_metadata_space(trans, rc, node); 2930 if (ret) 2931 goto out; 2932 } 2933 2934 if (root) { 2935 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { 2936 BUG_ON(node->new_bytenr); 2937 BUG_ON(!list_empty(&node->list)); 2938 btrfs_record_root_in_trans(trans, root); 2939 root = root->reloc_root; 2940 node->new_bytenr = root->node->start; 2941 node->root = root; 2942 list_add_tail(&node->list, &rc->backref_cache.changed); 2943 } else { 2944 path->lowest_level = node->level; 2945 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2946 btrfs_release_path(path); 2947 if (ret > 0) 2948 ret = 0; 2949 } 2950 if (!ret) 2951 update_processed_blocks(rc, node); 2952 } else { 2953 ret = do_relocation(trans, rc, node, key, path, 1); 2954 } 2955 out: 2956 if (ret || node->level == 0 || node->cowonly) 2957 remove_backref_node(&rc->backref_cache, node); 2958 return ret; 2959 } 2960 2961 /* 2962 * relocate a list of blocks 2963 */ 2964 static noinline_for_stack 2965 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2966 struct reloc_control *rc, struct rb_root *blocks) 2967 { 2968 struct backref_node *node; 2969 struct btrfs_path *path; 2970 struct tree_block *block; 2971 struct rb_node *rb_node; 2972 int ret; 2973 int err = 0; 2974 2975 path = btrfs_alloc_path(); 2976 if (!path) { 2977 err = -ENOMEM; 2978 goto out_free_blocks; 2979 } 2980 2981 rb_node = rb_first(blocks); 2982 while (rb_node) { 2983 block = rb_entry(rb_node, struct tree_block, rb_node); 2984 if (!block->key_ready) 2985 readahead_tree_block(rc->extent_root, block->bytenr); 2986 rb_node = rb_next(rb_node); 2987 } 2988 2989 rb_node = rb_first(blocks); 2990 while (rb_node) { 2991 block = rb_entry(rb_node, struct tree_block, rb_node); 2992 if (!block->key_ready) { 2993 err = get_tree_block_key(rc, block); 2994 if (err) 2995 goto out_free_path; 2996 } 2997 rb_node = rb_next(rb_node); 2998 } 2999 3000 rb_node = rb_first(blocks); 3001 while (rb_node) { 3002 block = rb_entry(rb_node, struct tree_block, rb_node); 3003 3004 node = build_backref_tree(rc, &block->key, 3005 block->level, block->bytenr); 3006 if (IS_ERR(node)) { 3007 err = PTR_ERR(node); 3008 goto out; 3009 } 3010 3011 ret = relocate_tree_block(trans, rc, node, &block->key, 3012 path); 3013 if (ret < 0) { 3014 if (ret != -EAGAIN || rb_node == rb_first(blocks)) 3015 err = ret; 3016 goto out; 3017 } 3018 rb_node = rb_next(rb_node); 3019 } 3020 out: 3021 err = finish_pending_nodes(trans, rc, path, err); 3022 3023 out_free_path: 3024 btrfs_free_path(path); 3025 out_free_blocks: 3026 free_block_list(blocks); 3027 return err; 3028 } 3029 3030 static noinline_for_stack 3031 int prealloc_file_extent_cluster(struct inode *inode, 3032 struct file_extent_cluster *cluster) 3033 { 3034 u64 alloc_hint = 0; 3035 u64 start; 3036 u64 end; 3037 u64 offset = BTRFS_I(inode)->index_cnt; 3038 u64 num_bytes; 3039 int nr = 0; 3040 int ret = 0; 3041 u64 prealloc_start = cluster->start - offset; 3042 u64 prealloc_end = cluster->end - offset; 3043 u64 cur_offset; 3044 3045 BUG_ON(cluster->start != cluster->boundary[0]); 3046 inode_lock(inode); 3047 3048 ret = btrfs_check_data_free_space(inode, prealloc_start, 3049 prealloc_end + 1 - prealloc_start); 3050 if (ret) 3051 goto out; 3052 3053 cur_offset = prealloc_start; 3054 while (nr < cluster->nr) { 3055 start = cluster->boundary[nr] - offset; 3056 if (nr + 1 < cluster->nr) 3057 end = cluster->boundary[nr + 1] - 1 - offset; 3058 else 3059 end = cluster->end - offset; 3060 3061 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 3062 num_bytes = end + 1 - start; 3063 if (cur_offset < start) 3064 btrfs_free_reserved_data_space(inode, cur_offset, 3065 start - cur_offset); 3066 ret = btrfs_prealloc_file_range(inode, 0, start, 3067 num_bytes, num_bytes, 3068 end + 1, &alloc_hint); 3069 cur_offset = end + 1; 3070 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 3071 if (ret) 3072 break; 3073 nr++; 3074 } 3075 if (cur_offset < prealloc_end) 3076 btrfs_free_reserved_data_space(inode, cur_offset, 3077 prealloc_end + 1 - cur_offset); 3078 out: 3079 inode_unlock(inode); 3080 return ret; 3081 } 3082 3083 static noinline_for_stack 3084 int setup_extent_mapping(struct inode *inode, u64 start, u64 end, 3085 u64 block_start) 3086 { 3087 struct btrfs_root *root = BTRFS_I(inode)->root; 3088 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 3089 struct extent_map *em; 3090 int ret = 0; 3091 3092 em = alloc_extent_map(); 3093 if (!em) 3094 return -ENOMEM; 3095 3096 em->start = start; 3097 em->len = end + 1 - start; 3098 em->block_len = em->len; 3099 em->block_start = block_start; 3100 em->bdev = root->fs_info->fs_devices->latest_bdev; 3101 set_bit(EXTENT_FLAG_PINNED, &em->flags); 3102 3103 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 3104 while (1) { 3105 write_lock(&em_tree->lock); 3106 ret = add_extent_mapping(em_tree, em, 0); 3107 write_unlock(&em_tree->lock); 3108 if (ret != -EEXIST) { 3109 free_extent_map(em); 3110 break; 3111 } 3112 btrfs_drop_extent_cache(inode, start, end, 0); 3113 } 3114 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 3115 return ret; 3116 } 3117 3118 static int relocate_file_extent_cluster(struct inode *inode, 3119 struct file_extent_cluster *cluster) 3120 { 3121 u64 page_start; 3122 u64 page_end; 3123 u64 offset = BTRFS_I(inode)->index_cnt; 3124 unsigned long index; 3125 unsigned long last_index; 3126 struct page *page; 3127 struct file_ra_state *ra; 3128 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 3129 int nr = 0; 3130 int ret = 0; 3131 3132 if (!cluster->nr) 3133 return 0; 3134 3135 ra = kzalloc(sizeof(*ra), GFP_NOFS); 3136 if (!ra) 3137 return -ENOMEM; 3138 3139 ret = prealloc_file_extent_cluster(inode, cluster); 3140 if (ret) 3141 goto out; 3142 3143 file_ra_state_init(ra, inode->i_mapping); 3144 3145 ret = setup_extent_mapping(inode, cluster->start - offset, 3146 cluster->end - offset, cluster->start); 3147 if (ret) 3148 goto out; 3149 3150 index = (cluster->start - offset) >> PAGE_SHIFT; 3151 last_index = (cluster->end - offset) >> PAGE_SHIFT; 3152 while (index <= last_index) { 3153 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE); 3154 if (ret) 3155 goto out; 3156 3157 page = find_lock_page(inode->i_mapping, index); 3158 if (!page) { 3159 page_cache_sync_readahead(inode->i_mapping, 3160 ra, NULL, index, 3161 last_index + 1 - index); 3162 page = find_or_create_page(inode->i_mapping, index, 3163 mask); 3164 if (!page) { 3165 btrfs_delalloc_release_metadata(inode, 3166 PAGE_SIZE); 3167 ret = -ENOMEM; 3168 goto out; 3169 } 3170 } 3171 3172 if (PageReadahead(page)) { 3173 page_cache_async_readahead(inode->i_mapping, 3174 ra, NULL, page, index, 3175 last_index + 1 - index); 3176 } 3177 3178 if (!PageUptodate(page)) { 3179 btrfs_readpage(NULL, page); 3180 lock_page(page); 3181 if (!PageUptodate(page)) { 3182 unlock_page(page); 3183 put_page(page); 3184 btrfs_delalloc_release_metadata(inode, 3185 PAGE_SIZE); 3186 ret = -EIO; 3187 goto out; 3188 } 3189 } 3190 3191 page_start = page_offset(page); 3192 page_end = page_start + PAGE_SIZE - 1; 3193 3194 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); 3195 3196 set_page_extent_mapped(page); 3197 3198 if (nr < cluster->nr && 3199 page_start + offset == cluster->boundary[nr]) { 3200 set_extent_bits(&BTRFS_I(inode)->io_tree, 3201 page_start, page_end, 3202 EXTENT_BOUNDARY); 3203 nr++; 3204 } 3205 3206 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); 3207 set_page_dirty(page); 3208 3209 unlock_extent(&BTRFS_I(inode)->io_tree, 3210 page_start, page_end); 3211 unlock_page(page); 3212 put_page(page); 3213 3214 index++; 3215 balance_dirty_pages_ratelimited(inode->i_mapping); 3216 btrfs_throttle(BTRFS_I(inode)->root); 3217 } 3218 WARN_ON(nr != cluster->nr); 3219 out: 3220 kfree(ra); 3221 return ret; 3222 } 3223 3224 static noinline_for_stack 3225 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, 3226 struct file_extent_cluster *cluster) 3227 { 3228 int ret; 3229 3230 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { 3231 ret = relocate_file_extent_cluster(inode, cluster); 3232 if (ret) 3233 return ret; 3234 cluster->nr = 0; 3235 } 3236 3237 if (!cluster->nr) 3238 cluster->start = extent_key->objectid; 3239 else 3240 BUG_ON(cluster->nr >= MAX_EXTENTS); 3241 cluster->end = extent_key->objectid + extent_key->offset - 1; 3242 cluster->boundary[cluster->nr] = extent_key->objectid; 3243 cluster->nr++; 3244 3245 if (cluster->nr >= MAX_EXTENTS) { 3246 ret = relocate_file_extent_cluster(inode, cluster); 3247 if (ret) 3248 return ret; 3249 cluster->nr = 0; 3250 } 3251 return 0; 3252 } 3253 3254 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3255 static int get_ref_objectid_v0(struct reloc_control *rc, 3256 struct btrfs_path *path, 3257 struct btrfs_key *extent_key, 3258 u64 *ref_objectid, int *path_change) 3259 { 3260 struct btrfs_key key; 3261 struct extent_buffer *leaf; 3262 struct btrfs_extent_ref_v0 *ref0; 3263 int ret; 3264 int slot; 3265 3266 leaf = path->nodes[0]; 3267 slot = path->slots[0]; 3268 while (1) { 3269 if (slot >= btrfs_header_nritems(leaf)) { 3270 ret = btrfs_next_leaf(rc->extent_root, path); 3271 if (ret < 0) 3272 return ret; 3273 BUG_ON(ret > 0); 3274 leaf = path->nodes[0]; 3275 slot = path->slots[0]; 3276 if (path_change) 3277 *path_change = 1; 3278 } 3279 btrfs_item_key_to_cpu(leaf, &key, slot); 3280 if (key.objectid != extent_key->objectid) 3281 return -ENOENT; 3282 3283 if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 3284 slot++; 3285 continue; 3286 } 3287 ref0 = btrfs_item_ptr(leaf, slot, 3288 struct btrfs_extent_ref_v0); 3289 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 3290 break; 3291 } 3292 return 0; 3293 } 3294 #endif 3295 3296 /* 3297 * helper to add a tree block to the list. 3298 * the major work is getting the generation and level of the block 3299 */ 3300 static int add_tree_block(struct reloc_control *rc, 3301 struct btrfs_key *extent_key, 3302 struct btrfs_path *path, 3303 struct rb_root *blocks) 3304 { 3305 struct extent_buffer *eb; 3306 struct btrfs_extent_item *ei; 3307 struct btrfs_tree_block_info *bi; 3308 struct tree_block *block; 3309 struct rb_node *rb_node; 3310 u32 item_size; 3311 int level = -1; 3312 u64 generation; 3313 3314 eb = path->nodes[0]; 3315 item_size = btrfs_item_size_nr(eb, path->slots[0]); 3316 3317 if (extent_key->type == BTRFS_METADATA_ITEM_KEY || 3318 item_size >= sizeof(*ei) + sizeof(*bi)) { 3319 ei = btrfs_item_ptr(eb, path->slots[0], 3320 struct btrfs_extent_item); 3321 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) { 3322 bi = (struct btrfs_tree_block_info *)(ei + 1); 3323 level = btrfs_tree_block_level(eb, bi); 3324 } else { 3325 level = (int)extent_key->offset; 3326 } 3327 generation = btrfs_extent_generation(eb, ei); 3328 } else { 3329 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3330 u64 ref_owner; 3331 int ret; 3332 3333 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3334 ret = get_ref_objectid_v0(rc, path, extent_key, 3335 &ref_owner, NULL); 3336 if (ret < 0) 3337 return ret; 3338 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3339 level = (int)ref_owner; 3340 /* FIXME: get real generation */ 3341 generation = 0; 3342 #else 3343 BUG(); 3344 #endif 3345 } 3346 3347 btrfs_release_path(path); 3348 3349 BUG_ON(level == -1); 3350 3351 block = kmalloc(sizeof(*block), GFP_NOFS); 3352 if (!block) 3353 return -ENOMEM; 3354 3355 block->bytenr = extent_key->objectid; 3356 block->key.objectid = rc->extent_root->nodesize; 3357 block->key.offset = generation; 3358 block->level = level; 3359 block->key_ready = 0; 3360 3361 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 3362 if (rb_node) 3363 backref_tree_panic(rb_node, -EEXIST, block->bytenr); 3364 3365 return 0; 3366 } 3367 3368 /* 3369 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 3370 */ 3371 static int __add_tree_block(struct reloc_control *rc, 3372 u64 bytenr, u32 blocksize, 3373 struct rb_root *blocks) 3374 { 3375 struct btrfs_path *path; 3376 struct btrfs_key key; 3377 int ret; 3378 bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, 3379 SKINNY_METADATA); 3380 3381 if (tree_block_processed(bytenr, rc)) 3382 return 0; 3383 3384 if (tree_search(blocks, bytenr)) 3385 return 0; 3386 3387 path = btrfs_alloc_path(); 3388 if (!path) 3389 return -ENOMEM; 3390 again: 3391 key.objectid = bytenr; 3392 if (skinny) { 3393 key.type = BTRFS_METADATA_ITEM_KEY; 3394 key.offset = (u64)-1; 3395 } else { 3396 key.type = BTRFS_EXTENT_ITEM_KEY; 3397 key.offset = blocksize; 3398 } 3399 3400 path->search_commit_root = 1; 3401 path->skip_locking = 1; 3402 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3403 if (ret < 0) 3404 goto out; 3405 3406 if (ret > 0 && skinny) { 3407 if (path->slots[0]) { 3408 path->slots[0]--; 3409 btrfs_item_key_to_cpu(path->nodes[0], &key, 3410 path->slots[0]); 3411 if (key.objectid == bytenr && 3412 (key.type == BTRFS_METADATA_ITEM_KEY || 3413 (key.type == BTRFS_EXTENT_ITEM_KEY && 3414 key.offset == blocksize))) 3415 ret = 0; 3416 } 3417 3418 if (ret) { 3419 skinny = false; 3420 btrfs_release_path(path); 3421 goto again; 3422 } 3423 } 3424 BUG_ON(ret); 3425 3426 ret = add_tree_block(rc, &key, path, blocks); 3427 out: 3428 btrfs_free_path(path); 3429 return ret; 3430 } 3431 3432 /* 3433 * helper to check if the block use full backrefs for pointers in it 3434 */ 3435 static int block_use_full_backref(struct reloc_control *rc, 3436 struct extent_buffer *eb) 3437 { 3438 u64 flags; 3439 int ret; 3440 3441 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || 3442 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) 3443 return 1; 3444 3445 ret = btrfs_lookup_extent_info(NULL, rc->extent_root, 3446 eb->start, btrfs_header_level(eb), 1, 3447 NULL, &flags); 3448 BUG_ON(ret); 3449 3450 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 3451 ret = 1; 3452 else 3453 ret = 0; 3454 return ret; 3455 } 3456 3457 static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3458 struct btrfs_block_group_cache *block_group, 3459 struct inode *inode, 3460 u64 ino) 3461 { 3462 struct btrfs_key key; 3463 struct btrfs_root *root = fs_info->tree_root; 3464 struct btrfs_trans_handle *trans; 3465 int ret = 0; 3466 3467 if (inode) 3468 goto truncate; 3469 3470 key.objectid = ino; 3471 key.type = BTRFS_INODE_ITEM_KEY; 3472 key.offset = 0; 3473 3474 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3475 if (IS_ERR(inode) || is_bad_inode(inode)) { 3476 if (!IS_ERR(inode)) 3477 iput(inode); 3478 return -ENOENT; 3479 } 3480 3481 truncate: 3482 ret = btrfs_check_trunc_cache_free_space(root, 3483 &fs_info->global_block_rsv); 3484 if (ret) 3485 goto out; 3486 3487 trans = btrfs_join_transaction(root); 3488 if (IS_ERR(trans)) { 3489 ret = PTR_ERR(trans); 3490 goto out; 3491 } 3492 3493 ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode); 3494 3495 btrfs_end_transaction(trans, root); 3496 btrfs_btree_balance_dirty(root); 3497 out: 3498 iput(inode); 3499 return ret; 3500 } 3501 3502 /* 3503 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3504 * this function scans fs tree to find blocks reference the data extent 3505 */ 3506 static int find_data_references(struct reloc_control *rc, 3507 struct btrfs_key *extent_key, 3508 struct extent_buffer *leaf, 3509 struct btrfs_extent_data_ref *ref, 3510 struct rb_root *blocks) 3511 { 3512 struct btrfs_path *path; 3513 struct tree_block *block; 3514 struct btrfs_root *root; 3515 struct btrfs_file_extent_item *fi; 3516 struct rb_node *rb_node; 3517 struct btrfs_key key; 3518 u64 ref_root; 3519 u64 ref_objectid; 3520 u64 ref_offset; 3521 u32 ref_count; 3522 u32 nritems; 3523 int err = 0; 3524 int added = 0; 3525 int counted; 3526 int ret; 3527 3528 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3529 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3530 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3531 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3532 3533 /* 3534 * This is an extent belonging to the free space cache, lets just delete 3535 * it and redo the search. 3536 */ 3537 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { 3538 ret = delete_block_group_cache(rc->extent_root->fs_info, 3539 rc->block_group, 3540 NULL, ref_objectid); 3541 if (ret != -ENOENT) 3542 return ret; 3543 ret = 0; 3544 } 3545 3546 path = btrfs_alloc_path(); 3547 if (!path) 3548 return -ENOMEM; 3549 path->reada = READA_FORWARD; 3550 3551 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3552 if (IS_ERR(root)) { 3553 err = PTR_ERR(root); 3554 goto out; 3555 } 3556 3557 key.objectid = ref_objectid; 3558 key.type = BTRFS_EXTENT_DATA_KEY; 3559 if (ref_offset > ((u64)-1 << 32)) 3560 key.offset = 0; 3561 else 3562 key.offset = ref_offset; 3563 3564 path->search_commit_root = 1; 3565 path->skip_locking = 1; 3566 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3567 if (ret < 0) { 3568 err = ret; 3569 goto out; 3570 } 3571 3572 leaf = path->nodes[0]; 3573 nritems = btrfs_header_nritems(leaf); 3574 /* 3575 * the references in tree blocks that use full backrefs 3576 * are not counted in 3577 */ 3578 if (block_use_full_backref(rc, leaf)) 3579 counted = 0; 3580 else 3581 counted = 1; 3582 rb_node = tree_search(blocks, leaf->start); 3583 if (rb_node) { 3584 if (counted) 3585 added = 1; 3586 else 3587 path->slots[0] = nritems; 3588 } 3589 3590 while (ref_count > 0) { 3591 while (path->slots[0] >= nritems) { 3592 ret = btrfs_next_leaf(root, path); 3593 if (ret < 0) { 3594 err = ret; 3595 goto out; 3596 } 3597 if (WARN_ON(ret > 0)) 3598 goto out; 3599 3600 leaf = path->nodes[0]; 3601 nritems = btrfs_header_nritems(leaf); 3602 added = 0; 3603 3604 if (block_use_full_backref(rc, leaf)) 3605 counted = 0; 3606 else 3607 counted = 1; 3608 rb_node = tree_search(blocks, leaf->start); 3609 if (rb_node) { 3610 if (counted) 3611 added = 1; 3612 else 3613 path->slots[0] = nritems; 3614 } 3615 } 3616 3617 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3618 if (WARN_ON(key.objectid != ref_objectid || 3619 key.type != BTRFS_EXTENT_DATA_KEY)) 3620 break; 3621 3622 fi = btrfs_item_ptr(leaf, path->slots[0], 3623 struct btrfs_file_extent_item); 3624 3625 if (btrfs_file_extent_type(leaf, fi) == 3626 BTRFS_FILE_EXTENT_INLINE) 3627 goto next; 3628 3629 if (btrfs_file_extent_disk_bytenr(leaf, fi) != 3630 extent_key->objectid) 3631 goto next; 3632 3633 key.offset -= btrfs_file_extent_offset(leaf, fi); 3634 if (key.offset != ref_offset) 3635 goto next; 3636 3637 if (counted) 3638 ref_count--; 3639 if (added) 3640 goto next; 3641 3642 if (!tree_block_processed(leaf->start, rc)) { 3643 block = kmalloc(sizeof(*block), GFP_NOFS); 3644 if (!block) { 3645 err = -ENOMEM; 3646 break; 3647 } 3648 block->bytenr = leaf->start; 3649 btrfs_item_key_to_cpu(leaf, &block->key, 0); 3650 block->level = 0; 3651 block->key_ready = 1; 3652 rb_node = tree_insert(blocks, block->bytenr, 3653 &block->rb_node); 3654 if (rb_node) 3655 backref_tree_panic(rb_node, -EEXIST, 3656 block->bytenr); 3657 } 3658 if (counted) 3659 added = 1; 3660 else 3661 path->slots[0] = nritems; 3662 next: 3663 path->slots[0]++; 3664 3665 } 3666 out: 3667 btrfs_free_path(path); 3668 return err; 3669 } 3670 3671 /* 3672 * helper to find all tree blocks that reference a given data extent 3673 */ 3674 static noinline_for_stack 3675 int add_data_references(struct reloc_control *rc, 3676 struct btrfs_key *extent_key, 3677 struct btrfs_path *path, 3678 struct rb_root *blocks) 3679 { 3680 struct btrfs_key key; 3681 struct extent_buffer *eb; 3682 struct btrfs_extent_data_ref *dref; 3683 struct btrfs_extent_inline_ref *iref; 3684 unsigned long ptr; 3685 unsigned long end; 3686 u32 blocksize = rc->extent_root->nodesize; 3687 int ret = 0; 3688 int err = 0; 3689 3690 eb = path->nodes[0]; 3691 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3692 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3693 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3694 if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3695 ptr = end; 3696 else 3697 #endif 3698 ptr += sizeof(struct btrfs_extent_item); 3699 3700 while (ptr < end) { 3701 iref = (struct btrfs_extent_inline_ref *)ptr; 3702 key.type = btrfs_extent_inline_ref_type(eb, iref); 3703 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3704 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3705 ret = __add_tree_block(rc, key.offset, blocksize, 3706 blocks); 3707 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3708 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 3709 ret = find_data_references(rc, extent_key, 3710 eb, dref, blocks); 3711 } else { 3712 BUG(); 3713 } 3714 if (ret) { 3715 err = ret; 3716 goto out; 3717 } 3718 ptr += btrfs_extent_inline_ref_size(key.type); 3719 } 3720 WARN_ON(ptr > end); 3721 3722 while (1) { 3723 cond_resched(); 3724 eb = path->nodes[0]; 3725 if (path->slots[0] >= btrfs_header_nritems(eb)) { 3726 ret = btrfs_next_leaf(rc->extent_root, path); 3727 if (ret < 0) { 3728 err = ret; 3729 break; 3730 } 3731 if (ret > 0) 3732 break; 3733 eb = path->nodes[0]; 3734 } 3735 3736 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 3737 if (key.objectid != extent_key->objectid) 3738 break; 3739 3740 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3741 if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3742 key.type == BTRFS_EXTENT_REF_V0_KEY) { 3743 #else 3744 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3745 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3746 #endif 3747 ret = __add_tree_block(rc, key.offset, blocksize, 3748 blocks); 3749 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3750 dref = btrfs_item_ptr(eb, path->slots[0], 3751 struct btrfs_extent_data_ref); 3752 ret = find_data_references(rc, extent_key, 3753 eb, dref, blocks); 3754 } else { 3755 ret = 0; 3756 } 3757 if (ret) { 3758 err = ret; 3759 break; 3760 } 3761 path->slots[0]++; 3762 } 3763 out: 3764 btrfs_release_path(path); 3765 if (err) 3766 free_block_list(blocks); 3767 return err; 3768 } 3769 3770 /* 3771 * helper to find next unprocessed extent 3772 */ 3773 static noinline_for_stack 3774 int find_next_extent(struct reloc_control *rc, struct btrfs_path *path, 3775 struct btrfs_key *extent_key) 3776 { 3777 struct btrfs_key key; 3778 struct extent_buffer *leaf; 3779 u64 start, end, last; 3780 int ret; 3781 3782 last = rc->block_group->key.objectid + rc->block_group->key.offset; 3783 while (1) { 3784 cond_resched(); 3785 if (rc->search_start >= last) { 3786 ret = 1; 3787 break; 3788 } 3789 3790 key.objectid = rc->search_start; 3791 key.type = BTRFS_EXTENT_ITEM_KEY; 3792 key.offset = 0; 3793 3794 path->search_commit_root = 1; 3795 path->skip_locking = 1; 3796 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3797 0, 0); 3798 if (ret < 0) 3799 break; 3800 next: 3801 leaf = path->nodes[0]; 3802 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3803 ret = btrfs_next_leaf(rc->extent_root, path); 3804 if (ret != 0) 3805 break; 3806 leaf = path->nodes[0]; 3807 } 3808 3809 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3810 if (key.objectid >= last) { 3811 ret = 1; 3812 break; 3813 } 3814 3815 if (key.type != BTRFS_EXTENT_ITEM_KEY && 3816 key.type != BTRFS_METADATA_ITEM_KEY) { 3817 path->slots[0]++; 3818 goto next; 3819 } 3820 3821 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3822 key.objectid + key.offset <= rc->search_start) { 3823 path->slots[0]++; 3824 goto next; 3825 } 3826 3827 if (key.type == BTRFS_METADATA_ITEM_KEY && 3828 key.objectid + rc->extent_root->nodesize <= 3829 rc->search_start) { 3830 path->slots[0]++; 3831 goto next; 3832 } 3833 3834 ret = find_first_extent_bit(&rc->processed_blocks, 3835 key.objectid, &start, &end, 3836 EXTENT_DIRTY, NULL); 3837 3838 if (ret == 0 && start <= key.objectid) { 3839 btrfs_release_path(path); 3840 rc->search_start = end + 1; 3841 } else { 3842 if (key.type == BTRFS_EXTENT_ITEM_KEY) 3843 rc->search_start = key.objectid + key.offset; 3844 else 3845 rc->search_start = key.objectid + 3846 rc->extent_root->nodesize; 3847 memcpy(extent_key, &key, sizeof(key)); 3848 return 0; 3849 } 3850 } 3851 btrfs_release_path(path); 3852 return ret; 3853 } 3854 3855 static void set_reloc_control(struct reloc_control *rc) 3856 { 3857 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3858 3859 mutex_lock(&fs_info->reloc_mutex); 3860 fs_info->reloc_ctl = rc; 3861 mutex_unlock(&fs_info->reloc_mutex); 3862 } 3863 3864 static void unset_reloc_control(struct reloc_control *rc) 3865 { 3866 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3867 3868 mutex_lock(&fs_info->reloc_mutex); 3869 fs_info->reloc_ctl = NULL; 3870 mutex_unlock(&fs_info->reloc_mutex); 3871 } 3872 3873 static int check_extent_flags(u64 flags) 3874 { 3875 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3876 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3877 return 1; 3878 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3879 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3880 return 1; 3881 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3882 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3883 return 1; 3884 return 0; 3885 } 3886 3887 static noinline_for_stack 3888 int prepare_to_relocate(struct reloc_control *rc) 3889 { 3890 struct btrfs_trans_handle *trans; 3891 int ret; 3892 3893 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, 3894 BTRFS_BLOCK_RSV_TEMP); 3895 if (!rc->block_rsv) 3896 return -ENOMEM; 3897 3898 memset(&rc->cluster, 0, sizeof(rc->cluster)); 3899 rc->search_start = rc->block_group->key.objectid; 3900 rc->extents_found = 0; 3901 rc->nodes_relocated = 0; 3902 rc->merging_rsv_size = 0; 3903 rc->reserved_bytes = 0; 3904 rc->block_rsv->size = rc->extent_root->nodesize * 3905 RELOCATION_RESERVED_NODES; 3906 ret = btrfs_block_rsv_refill(rc->extent_root, 3907 rc->block_rsv, rc->block_rsv->size, 3908 BTRFS_RESERVE_FLUSH_ALL); 3909 if (ret) 3910 return ret; 3911 3912 rc->create_reloc_tree = 1; 3913 set_reloc_control(rc); 3914 3915 trans = btrfs_join_transaction(rc->extent_root); 3916 if (IS_ERR(trans)) { 3917 unset_reloc_control(rc); 3918 /* 3919 * extent tree is not a ref_cow tree and has no reloc_root to 3920 * cleanup. And callers are responsible to free the above 3921 * block rsv. 3922 */ 3923 return PTR_ERR(trans); 3924 } 3925 btrfs_commit_transaction(trans, rc->extent_root); 3926 return 0; 3927 } 3928 3929 /* 3930 * Qgroup fixer for data chunk relocation. 3931 * The data relocation is done in the following steps 3932 * 1) Copy data extents into data reloc tree 3933 * 2) Create tree reloc tree(special snapshot) for related subvolumes 3934 * 3) Modify file extents in tree reloc tree 3935 * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks 3936 * 3937 * The problem is, data and tree reloc tree are not accounted to qgroup, 3938 * and 4) will only info qgroup to track tree blocks change, not file extents 3939 * in the tree blocks. 3940 * 3941 * The good news is, related data extents are all in data reloc tree, so we 3942 * only need to info qgroup to track all file extents in data reloc tree 3943 * before commit trans. 3944 */ 3945 static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans, 3946 struct reloc_control *rc) 3947 { 3948 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3949 struct inode *inode = rc->data_inode; 3950 struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; 3951 struct btrfs_path *path; 3952 struct btrfs_key key; 3953 int ret = 0; 3954 3955 if (!fs_info->quota_enabled) 3956 return 0; 3957 3958 /* 3959 * Only for stage where we update data pointers the qgroup fix is 3960 * valid. 3961 * For MOVING_DATA stage, we will miss the timing of swapping tree 3962 * blocks, and won't fix it. 3963 */ 3964 if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) 3965 return 0; 3966 3967 path = btrfs_alloc_path(); 3968 if (!path) 3969 return -ENOMEM; 3970 key.objectid = btrfs_ino(inode); 3971 key.type = BTRFS_EXTENT_DATA_KEY; 3972 key.offset = 0; 3973 3974 ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); 3975 if (ret < 0) 3976 goto out; 3977 3978 lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); 3979 while (1) { 3980 struct btrfs_file_extent_item *fi; 3981 3982 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3983 if (key.objectid > btrfs_ino(inode)) 3984 break; 3985 if (key.type != BTRFS_EXTENT_DATA_KEY) 3986 goto next; 3987 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 3988 struct btrfs_file_extent_item); 3989 if (btrfs_file_extent_type(path->nodes[0], fi) != 3990 BTRFS_FILE_EXTENT_REG) 3991 goto next; 3992 ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, 3993 btrfs_file_extent_disk_bytenr(path->nodes[0], fi), 3994 btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), 3995 GFP_NOFS); 3996 if (ret < 0) 3997 break; 3998 next: 3999 ret = btrfs_next_item(data_reloc_root, path); 4000 if (ret < 0) 4001 break; 4002 if (ret > 0) { 4003 ret = 0; 4004 break; 4005 } 4006 } 4007 unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); 4008 out: 4009 btrfs_free_path(path); 4010 return ret; 4011 } 4012 4013 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 4014 { 4015 struct rb_root blocks = RB_ROOT; 4016 struct btrfs_key key; 4017 struct btrfs_trans_handle *trans = NULL; 4018 struct btrfs_path *path; 4019 struct btrfs_extent_item *ei; 4020 u64 flags; 4021 u32 item_size; 4022 int ret; 4023 int err = 0; 4024 int progress = 0; 4025 4026 path = btrfs_alloc_path(); 4027 if (!path) 4028 return -ENOMEM; 4029 path->reada = READA_FORWARD; 4030 4031 ret = prepare_to_relocate(rc); 4032 if (ret) { 4033 err = ret; 4034 goto out_free; 4035 } 4036 4037 while (1) { 4038 rc->reserved_bytes = 0; 4039 ret = btrfs_block_rsv_refill(rc->extent_root, 4040 rc->block_rsv, rc->block_rsv->size, 4041 BTRFS_RESERVE_FLUSH_ALL); 4042 if (ret) { 4043 err = ret; 4044 break; 4045 } 4046 progress++; 4047 trans = btrfs_start_transaction(rc->extent_root, 0); 4048 if (IS_ERR(trans)) { 4049 err = PTR_ERR(trans); 4050 trans = NULL; 4051 break; 4052 } 4053 restart: 4054 if (update_backref_cache(trans, &rc->backref_cache)) { 4055 btrfs_end_transaction(trans, rc->extent_root); 4056 continue; 4057 } 4058 4059 ret = find_next_extent(rc, path, &key); 4060 if (ret < 0) 4061 err = ret; 4062 if (ret != 0) 4063 break; 4064 4065 rc->extents_found++; 4066 4067 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 4068 struct btrfs_extent_item); 4069 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 4070 if (item_size >= sizeof(*ei)) { 4071 flags = btrfs_extent_flags(path->nodes[0], ei); 4072 ret = check_extent_flags(flags); 4073 BUG_ON(ret); 4074 4075 } else { 4076 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4077 u64 ref_owner; 4078 int path_change = 0; 4079 4080 BUG_ON(item_size != 4081 sizeof(struct btrfs_extent_item_v0)); 4082 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 4083 &path_change); 4084 if (ret < 0) { 4085 err = ret; 4086 break; 4087 } 4088 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 4089 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 4090 else 4091 flags = BTRFS_EXTENT_FLAG_DATA; 4092 4093 if (path_change) { 4094 btrfs_release_path(path); 4095 4096 path->search_commit_root = 1; 4097 path->skip_locking = 1; 4098 ret = btrfs_search_slot(NULL, rc->extent_root, 4099 &key, path, 0, 0); 4100 if (ret < 0) { 4101 err = ret; 4102 break; 4103 } 4104 BUG_ON(ret > 0); 4105 } 4106 #else 4107 BUG(); 4108 #endif 4109 } 4110 4111 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 4112 ret = add_tree_block(rc, &key, path, &blocks); 4113 } else if (rc->stage == UPDATE_DATA_PTRS && 4114 (flags & BTRFS_EXTENT_FLAG_DATA)) { 4115 ret = add_data_references(rc, &key, path, &blocks); 4116 } else { 4117 btrfs_release_path(path); 4118 ret = 0; 4119 } 4120 if (ret < 0) { 4121 err = ret; 4122 break; 4123 } 4124 4125 if (!RB_EMPTY_ROOT(&blocks)) { 4126 ret = relocate_tree_blocks(trans, rc, &blocks); 4127 if (ret < 0) { 4128 /* 4129 * if we fail to relocate tree blocks, force to update 4130 * backref cache when committing transaction. 4131 */ 4132 rc->backref_cache.last_trans = trans->transid - 1; 4133 4134 if (ret != -EAGAIN) { 4135 err = ret; 4136 break; 4137 } 4138 rc->extents_found--; 4139 rc->search_start = key.objectid; 4140 } 4141 } 4142 4143 btrfs_end_transaction_throttle(trans, rc->extent_root); 4144 btrfs_btree_balance_dirty(rc->extent_root); 4145 trans = NULL; 4146 4147 if (rc->stage == MOVE_DATA_EXTENTS && 4148 (flags & BTRFS_EXTENT_FLAG_DATA)) { 4149 rc->found_file_extent = 1; 4150 ret = relocate_data_extent(rc->data_inode, 4151 &key, &rc->cluster); 4152 if (ret < 0) { 4153 err = ret; 4154 break; 4155 } 4156 } 4157 } 4158 if (trans && progress && err == -ENOSPC) { 4159 ret = btrfs_force_chunk_alloc(trans, rc->extent_root, 4160 rc->block_group->flags); 4161 if (ret == 1) { 4162 err = 0; 4163 progress = 0; 4164 goto restart; 4165 } 4166 } 4167 4168 btrfs_release_path(path); 4169 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY); 4170 4171 if (trans) { 4172 btrfs_end_transaction_throttle(trans, rc->extent_root); 4173 btrfs_btree_balance_dirty(rc->extent_root); 4174 } 4175 4176 if (!err) { 4177 ret = relocate_file_extent_cluster(rc->data_inode, 4178 &rc->cluster); 4179 if (ret < 0) 4180 err = ret; 4181 } 4182 4183 rc->create_reloc_tree = 0; 4184 set_reloc_control(rc); 4185 4186 backref_cache_cleanup(&rc->backref_cache); 4187 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 4188 4189 err = prepare_to_merge(rc, err); 4190 4191 merge_reloc_roots(rc); 4192 4193 rc->merge_reloc_tree = 0; 4194 unset_reloc_control(rc); 4195 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 4196 4197 /* get rid of pinned extents */ 4198 trans = btrfs_join_transaction(rc->extent_root); 4199 if (IS_ERR(trans)) { 4200 err = PTR_ERR(trans); 4201 goto out_free; 4202 } 4203 ret = qgroup_fix_relocated_data_extents(trans, rc); 4204 if (ret < 0) { 4205 btrfs_abort_transaction(trans, ret); 4206 if (!err) 4207 err = ret; 4208 goto out_free; 4209 } 4210 btrfs_commit_transaction(trans, rc->extent_root); 4211 out_free: 4212 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 4213 btrfs_free_path(path); 4214 return err; 4215 } 4216 4217 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 4218 struct btrfs_root *root, u64 objectid) 4219 { 4220 struct btrfs_path *path; 4221 struct btrfs_inode_item *item; 4222 struct extent_buffer *leaf; 4223 int ret; 4224 4225 path = btrfs_alloc_path(); 4226 if (!path) 4227 return -ENOMEM; 4228 4229 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 4230 if (ret) 4231 goto out; 4232 4233 leaf = path->nodes[0]; 4234 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 4235 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 4236 btrfs_set_inode_generation(leaf, item, 1); 4237 btrfs_set_inode_size(leaf, item, 0); 4238 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 4239 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 4240 BTRFS_INODE_PREALLOC); 4241 btrfs_mark_buffer_dirty(leaf); 4242 out: 4243 btrfs_free_path(path); 4244 return ret; 4245 } 4246 4247 /* 4248 * helper to create inode for data relocation. 4249 * the inode is in data relocation tree and its link count is 0 4250 */ 4251 static noinline_for_stack 4252 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 4253 struct btrfs_block_group_cache *group) 4254 { 4255 struct inode *inode = NULL; 4256 struct btrfs_trans_handle *trans; 4257 struct btrfs_root *root; 4258 struct btrfs_key key; 4259 u64 objectid; 4260 int err = 0; 4261 4262 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 4263 if (IS_ERR(root)) 4264 return ERR_CAST(root); 4265 4266 trans = btrfs_start_transaction(root, 6); 4267 if (IS_ERR(trans)) 4268 return ERR_CAST(trans); 4269 4270 err = btrfs_find_free_objectid(root, &objectid); 4271 if (err) 4272 goto out; 4273 4274 err = __insert_orphan_inode(trans, root, objectid); 4275 BUG_ON(err); 4276 4277 key.objectid = objectid; 4278 key.type = BTRFS_INODE_ITEM_KEY; 4279 key.offset = 0; 4280 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 4281 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 4282 BTRFS_I(inode)->index_cnt = group->key.objectid; 4283 4284 err = btrfs_orphan_add(trans, inode); 4285 out: 4286 btrfs_end_transaction(trans, root); 4287 btrfs_btree_balance_dirty(root); 4288 if (err) { 4289 if (inode) 4290 iput(inode); 4291 inode = ERR_PTR(err); 4292 } 4293 return inode; 4294 } 4295 4296 static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) 4297 { 4298 struct reloc_control *rc; 4299 4300 rc = kzalloc(sizeof(*rc), GFP_NOFS); 4301 if (!rc) 4302 return NULL; 4303 4304 INIT_LIST_HEAD(&rc->reloc_roots); 4305 backref_cache_init(&rc->backref_cache); 4306 mapping_tree_init(&rc->reloc_root_tree); 4307 extent_io_tree_init(&rc->processed_blocks, 4308 fs_info->btree_inode->i_mapping); 4309 return rc; 4310 } 4311 4312 /* 4313 * function to relocate all extents in a block group. 4314 */ 4315 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) 4316 { 4317 struct btrfs_fs_info *fs_info = extent_root->fs_info; 4318 struct reloc_control *rc; 4319 struct inode *inode; 4320 struct btrfs_path *path; 4321 int ret; 4322 int rw = 0; 4323 int err = 0; 4324 4325 rc = alloc_reloc_control(fs_info); 4326 if (!rc) 4327 return -ENOMEM; 4328 4329 rc->extent_root = extent_root; 4330 4331 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 4332 BUG_ON(!rc->block_group); 4333 4334 ret = btrfs_inc_block_group_ro(extent_root, rc->block_group); 4335 if (ret) { 4336 err = ret; 4337 goto out; 4338 } 4339 rw = 1; 4340 4341 path = btrfs_alloc_path(); 4342 if (!path) { 4343 err = -ENOMEM; 4344 goto out; 4345 } 4346 4347 inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, 4348 path); 4349 btrfs_free_path(path); 4350 4351 if (!IS_ERR(inode)) 4352 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0); 4353 else 4354 ret = PTR_ERR(inode); 4355 4356 if (ret && ret != -ENOENT) { 4357 err = ret; 4358 goto out; 4359 } 4360 4361 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 4362 if (IS_ERR(rc->data_inode)) { 4363 err = PTR_ERR(rc->data_inode); 4364 rc->data_inode = NULL; 4365 goto out; 4366 } 4367 4368 btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", 4369 rc->block_group->key.objectid, rc->block_group->flags); 4370 4371 btrfs_wait_block_group_reservations(rc->block_group); 4372 btrfs_wait_nocow_writers(rc->block_group); 4373 btrfs_wait_ordered_roots(fs_info, -1, 4374 rc->block_group->key.objectid, 4375 rc->block_group->key.offset); 4376 4377 while (1) { 4378 mutex_lock(&fs_info->cleaner_mutex); 4379 ret = relocate_block_group(rc); 4380 mutex_unlock(&fs_info->cleaner_mutex); 4381 if (ret < 0) { 4382 err = ret; 4383 goto out; 4384 } 4385 4386 if (rc->extents_found == 0) 4387 break; 4388 4389 btrfs_info(extent_root->fs_info, "found %llu extents", 4390 rc->extents_found); 4391 4392 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4393 ret = btrfs_wait_ordered_range(rc->data_inode, 0, 4394 (u64)-1); 4395 if (ret) { 4396 err = ret; 4397 goto out; 4398 } 4399 invalidate_mapping_pages(rc->data_inode->i_mapping, 4400 0, -1); 4401 rc->stage = UPDATE_DATA_PTRS; 4402 } 4403 } 4404 4405 WARN_ON(rc->block_group->pinned > 0); 4406 WARN_ON(rc->block_group->reserved > 0); 4407 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 4408 out: 4409 if (err && rw) 4410 btrfs_dec_block_group_ro(extent_root, rc->block_group); 4411 iput(rc->data_inode); 4412 btrfs_put_block_group(rc->block_group); 4413 kfree(rc); 4414 return err; 4415 } 4416 4417 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 4418 { 4419 struct btrfs_trans_handle *trans; 4420 int ret, err; 4421 4422 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4423 if (IS_ERR(trans)) 4424 return PTR_ERR(trans); 4425 4426 memset(&root->root_item.drop_progress, 0, 4427 sizeof(root->root_item.drop_progress)); 4428 root->root_item.drop_level = 0; 4429 btrfs_set_root_refs(&root->root_item, 0); 4430 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4431 &root->root_key, &root->root_item); 4432 4433 err = btrfs_end_transaction(trans, root->fs_info->tree_root); 4434 if (err) 4435 return err; 4436 return ret; 4437 } 4438 4439 /* 4440 * recover relocation interrupted by system crash. 4441 * 4442 * this function resumes merging reloc trees with corresponding fs trees. 4443 * this is important for keeping the sharing of tree blocks 4444 */ 4445 int btrfs_recover_relocation(struct btrfs_root *root) 4446 { 4447 LIST_HEAD(reloc_roots); 4448 struct btrfs_key key; 4449 struct btrfs_root *fs_root; 4450 struct btrfs_root *reloc_root; 4451 struct btrfs_path *path; 4452 struct extent_buffer *leaf; 4453 struct reloc_control *rc = NULL; 4454 struct btrfs_trans_handle *trans; 4455 int ret; 4456 int err = 0; 4457 4458 path = btrfs_alloc_path(); 4459 if (!path) 4460 return -ENOMEM; 4461 path->reada = READA_BACK; 4462 4463 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 4464 key.type = BTRFS_ROOT_ITEM_KEY; 4465 key.offset = (u64)-1; 4466 4467 while (1) { 4468 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, 4469 path, 0, 0); 4470 if (ret < 0) { 4471 err = ret; 4472 goto out; 4473 } 4474 if (ret > 0) { 4475 if (path->slots[0] == 0) 4476 break; 4477 path->slots[0]--; 4478 } 4479 leaf = path->nodes[0]; 4480 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4481 btrfs_release_path(path); 4482 4483 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 4484 key.type != BTRFS_ROOT_ITEM_KEY) 4485 break; 4486 4487 reloc_root = btrfs_read_fs_root(root, &key); 4488 if (IS_ERR(reloc_root)) { 4489 err = PTR_ERR(reloc_root); 4490 goto out; 4491 } 4492 4493 list_add(&reloc_root->root_list, &reloc_roots); 4494 4495 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 4496 fs_root = read_fs_root(root->fs_info, 4497 reloc_root->root_key.offset); 4498 if (IS_ERR(fs_root)) { 4499 ret = PTR_ERR(fs_root); 4500 if (ret != -ENOENT) { 4501 err = ret; 4502 goto out; 4503 } 4504 ret = mark_garbage_root(reloc_root); 4505 if (ret < 0) { 4506 err = ret; 4507 goto out; 4508 } 4509 } 4510 } 4511 4512 if (key.offset == 0) 4513 break; 4514 4515 key.offset--; 4516 } 4517 btrfs_release_path(path); 4518 4519 if (list_empty(&reloc_roots)) 4520 goto out; 4521 4522 rc = alloc_reloc_control(root->fs_info); 4523 if (!rc) { 4524 err = -ENOMEM; 4525 goto out; 4526 } 4527 4528 rc->extent_root = root->fs_info->extent_root; 4529 4530 set_reloc_control(rc); 4531 4532 trans = btrfs_join_transaction(rc->extent_root); 4533 if (IS_ERR(trans)) { 4534 unset_reloc_control(rc); 4535 err = PTR_ERR(trans); 4536 goto out_free; 4537 } 4538 4539 rc->merge_reloc_tree = 1; 4540 4541 while (!list_empty(&reloc_roots)) { 4542 reloc_root = list_entry(reloc_roots.next, 4543 struct btrfs_root, root_list); 4544 list_del(&reloc_root->root_list); 4545 4546 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 4547 list_add_tail(&reloc_root->root_list, 4548 &rc->reloc_roots); 4549 continue; 4550 } 4551 4552 fs_root = read_fs_root(root->fs_info, 4553 reloc_root->root_key.offset); 4554 if (IS_ERR(fs_root)) { 4555 err = PTR_ERR(fs_root); 4556 goto out_free; 4557 } 4558 4559 err = __add_reloc_root(reloc_root); 4560 BUG_ON(err < 0); /* -ENOMEM or logic error */ 4561 fs_root->reloc_root = reloc_root; 4562 } 4563 4564 err = btrfs_commit_transaction(trans, rc->extent_root); 4565 if (err) 4566 goto out_free; 4567 4568 merge_reloc_roots(rc); 4569 4570 unset_reloc_control(rc); 4571 4572 trans = btrfs_join_transaction(rc->extent_root); 4573 if (IS_ERR(trans)) { 4574 err = PTR_ERR(trans); 4575 goto out_free; 4576 } 4577 err = qgroup_fix_relocated_data_extents(trans, rc); 4578 if (err < 0) { 4579 btrfs_abort_transaction(trans, err); 4580 goto out_free; 4581 } 4582 err = btrfs_commit_transaction(trans, rc->extent_root); 4583 out_free: 4584 kfree(rc); 4585 out: 4586 if (!list_empty(&reloc_roots)) 4587 free_reloc_roots(&reloc_roots); 4588 4589 btrfs_free_path(path); 4590 4591 if (err == 0) { 4592 /* cleanup orphan inode in data relocation tree */ 4593 fs_root = read_fs_root(root->fs_info, 4594 BTRFS_DATA_RELOC_TREE_OBJECTID); 4595 if (IS_ERR(fs_root)) 4596 err = PTR_ERR(fs_root); 4597 else 4598 err = btrfs_orphan_cleanup(fs_root); 4599 } 4600 return err; 4601 } 4602 4603 /* 4604 * helper to add ordered checksum for data relocation. 4605 * 4606 * cloning checksum properly handles the nodatasum extents. 4607 * it also saves CPU time to re-calculate the checksum. 4608 */ 4609 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 4610 { 4611 struct btrfs_ordered_sum *sums; 4612 struct btrfs_ordered_extent *ordered; 4613 struct btrfs_root *root = BTRFS_I(inode)->root; 4614 int ret; 4615 u64 disk_bytenr; 4616 u64 new_bytenr; 4617 LIST_HEAD(list); 4618 4619 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 4620 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 4621 4622 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4623 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4624 disk_bytenr + len - 1, &list, 0); 4625 if (ret) 4626 goto out; 4627 4628 while (!list_empty(&list)) { 4629 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4630 list_del_init(&sums->list); 4631 4632 /* 4633 * We need to offset the new_bytenr based on where the csum is. 4634 * We need to do this because we will read in entire prealloc 4635 * extents but we may have written to say the middle of the 4636 * prealloc extent, so we need to make sure the csum goes with 4637 * the right disk offset. 4638 * 4639 * We can do this because the data reloc inode refers strictly 4640 * to the on disk bytes, so we don't have to worry about 4641 * disk_len vs real len like with real inodes since it's all 4642 * disk length. 4643 */ 4644 new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); 4645 sums->bytenr = new_bytenr; 4646 4647 btrfs_add_ordered_sum(inode, ordered, sums); 4648 } 4649 out: 4650 btrfs_put_ordered_extent(ordered); 4651 return ret; 4652 } 4653 4654 int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4655 struct btrfs_root *root, struct extent_buffer *buf, 4656 struct extent_buffer *cow) 4657 { 4658 struct reloc_control *rc; 4659 struct backref_node *node; 4660 int first_cow = 0; 4661 int level; 4662 int ret = 0; 4663 4664 rc = root->fs_info->reloc_ctl; 4665 if (!rc) 4666 return 0; 4667 4668 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4669 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4670 4671 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 4672 if (buf == root->node) 4673 __update_reloc_root(root, cow->start); 4674 } 4675 4676 level = btrfs_header_level(buf); 4677 if (btrfs_header_generation(buf) <= 4678 btrfs_root_last_snapshot(&root->root_item)) 4679 first_cow = 1; 4680 4681 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && 4682 rc->create_reloc_tree) { 4683 WARN_ON(!first_cow && level == 0); 4684 4685 node = rc->backref_cache.path[level]; 4686 BUG_ON(node->bytenr != buf->start && 4687 node->new_bytenr != buf->start); 4688 4689 drop_node_buffer(node); 4690 extent_buffer_get(cow); 4691 node->eb = cow; 4692 node->new_bytenr = cow->start; 4693 4694 if (!node->pending) { 4695 list_move_tail(&node->list, 4696 &rc->backref_cache.pending[level]); 4697 node->pending = 1; 4698 } 4699 4700 if (first_cow) 4701 __mark_block_processed(rc, node); 4702 4703 if (first_cow && level > 0) 4704 rc->nodes_relocated += buf->len; 4705 } 4706 4707 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) 4708 ret = replace_file_extents(trans, rc, root, cow); 4709 return ret; 4710 } 4711 4712 /* 4713 * called before creating snapshot. it calculates metadata reservation 4714 * required for relocating tree blocks in the snapshot 4715 */ 4716 void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, 4717 u64 *bytes_to_reserve) 4718 { 4719 struct btrfs_root *root; 4720 struct reloc_control *rc; 4721 4722 root = pending->root; 4723 if (!root->reloc_root) 4724 return; 4725 4726 rc = root->fs_info->reloc_ctl; 4727 if (!rc->merge_reloc_tree) 4728 return; 4729 4730 root = root->reloc_root; 4731 BUG_ON(btrfs_root_refs(&root->root_item) == 0); 4732 /* 4733 * relocation is in the stage of merging trees. the space 4734 * used by merging a reloc tree is twice the size of 4735 * relocated tree nodes in the worst case. half for cowing 4736 * the reloc tree, half for cowing the fs tree. the space 4737 * used by cowing the reloc tree will be freed after the 4738 * tree is dropped. if we create snapshot, cowing the fs 4739 * tree may use more space than it frees. so we need 4740 * reserve extra space. 4741 */ 4742 *bytes_to_reserve += rc->nodes_relocated; 4743 } 4744 4745 /* 4746 * called after snapshot is created. migrate block reservation 4747 * and create reloc root for the newly created snapshot 4748 */ 4749 int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 4750 struct btrfs_pending_snapshot *pending) 4751 { 4752 struct btrfs_root *root = pending->root; 4753 struct btrfs_root *reloc_root; 4754 struct btrfs_root *new_root; 4755 struct reloc_control *rc; 4756 int ret; 4757 4758 if (!root->reloc_root) 4759 return 0; 4760 4761 rc = root->fs_info->reloc_ctl; 4762 rc->merging_rsv_size += rc->nodes_relocated; 4763 4764 if (rc->merge_reloc_tree) { 4765 ret = btrfs_block_rsv_migrate(&pending->block_rsv, 4766 rc->block_rsv, 4767 rc->nodes_relocated, 1); 4768 if (ret) 4769 return ret; 4770 } 4771 4772 new_root = pending->snap; 4773 reloc_root = create_reloc_root(trans, root->reloc_root, 4774 new_root->root_key.objectid); 4775 if (IS_ERR(reloc_root)) 4776 return PTR_ERR(reloc_root); 4777 4778 ret = __add_reloc_root(reloc_root); 4779 BUG_ON(ret < 0); 4780 new_root->reloc_root = reloc_root; 4781 4782 if (rc->create_reloc_tree) 4783 ret = clone_backref_node(trans, rc, root, reloc_root); 4784 return ret; 4785 } 4786