1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2009 Oracle. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/pagemap.h> 8 #include <linux/writeback.h> 9 #include <linux/blkdev.h> 10 #include <linux/rbtree.h> 11 #include <linux/slab.h> 12 #include <linux/error-injection.h> 13 #include "ctree.h" 14 #include "disk-io.h" 15 #include "transaction.h" 16 #include "volumes.h" 17 #include "locking.h" 18 #include "btrfs_inode.h" 19 #include "async-thread.h" 20 #include "free-space-cache.h" 21 #include "inode-map.h" 22 #include "qgroup.h" 23 #include "print-tree.h" 24 #include "delalloc-space.h" 25 #include "block-group.h" 26 #include "backref.h" 27 #include "misc.h" 28 29 /* 30 * Relocation overview 31 * 32 * [What does relocation do] 33 * 34 * The objective of relocation is to relocate all extents of the target block 35 * group to other block groups. 36 * This is utilized by resize (shrink only), profile converting, compacting 37 * space, or balance routine to spread chunks over devices. 38 * 39 * Before | After 40 * ------------------------------------------------------------------ 41 * BG A: 10 data extents | BG A: deleted 42 * BG B: 2 data extents | BG B: 10 data extents (2 old + 8 relocated) 43 * BG C: 1 extents | BG C: 3 data extents (1 old + 2 relocated) 44 * 45 * [How does relocation work] 46 * 47 * 1. Mark the target block group read-only 48 * New extents won't be allocated from the target block group. 49 * 50 * 2.1 Record each extent in the target block group 51 * To build a proper map of extents to be relocated. 52 * 53 * 2.2 Build data reloc tree and reloc trees 54 * Data reloc tree will contain an inode, recording all newly relocated 55 * data extents. 56 * There will be only one data reloc tree for one data block group. 57 * 58 * Reloc tree will be a special snapshot of its source tree, containing 59 * relocated tree blocks. 60 * Each tree referring to a tree block in target block group will get its 61 * reloc tree built. 62 * 63 * 2.3 Swap source tree with its corresponding reloc tree 64 * Each involved tree only refers to new extents after swap. 65 * 66 * 3. Cleanup reloc trees and data reloc tree. 67 * As old extents in the target block group are still referenced by reloc 68 * trees, we need to clean them up before really freeing the target block 69 * group. 70 * 71 * The main complexity is in steps 2.2 and 2.3. 72 * 73 * The entry point of relocation is relocate_block_group() function. 74 */ 75 76 #define RELOCATION_RESERVED_NODES 256 77 /* 78 * map address of tree root to tree 79 */ 80 struct mapping_node { 81 struct { 82 struct rb_node rb_node; 83 u64 bytenr; 84 }; /* Use rb_simle_node for search/insert */ 85 void *data; 86 }; 87 88 struct mapping_tree { 89 struct rb_root rb_root; 90 spinlock_t lock; 91 }; 92 93 /* 94 * present a tree block to process 95 */ 96 struct tree_block { 97 struct { 98 struct rb_node rb_node; 99 u64 bytenr; 100 }; /* Use rb_simple_node for search/insert */ 101 struct btrfs_key key; 102 unsigned int level:8; 103 unsigned int key_ready:1; 104 }; 105 106 #define MAX_EXTENTS 128 107 108 struct file_extent_cluster { 109 u64 start; 110 u64 end; 111 u64 boundary[MAX_EXTENTS]; 112 unsigned int nr; 113 }; 114 115 struct reloc_control { 116 /* block group to relocate */ 117 struct btrfs_block_group *block_group; 118 /* extent tree */ 119 struct btrfs_root *extent_root; 120 /* inode for moving data */ 121 struct inode *data_inode; 122 123 struct btrfs_block_rsv *block_rsv; 124 125 struct btrfs_backref_cache backref_cache; 126 127 struct file_extent_cluster cluster; 128 /* tree blocks have been processed */ 129 struct extent_io_tree processed_blocks; 130 /* map start of tree root to corresponding reloc tree */ 131 struct mapping_tree reloc_root_tree; 132 /* list of reloc trees */ 133 struct list_head reloc_roots; 134 /* list of subvolume trees that get relocated */ 135 struct list_head dirty_subvol_roots; 136 /* size of metadata reservation for merging reloc trees */ 137 u64 merging_rsv_size; 138 /* size of relocated tree nodes */ 139 u64 nodes_relocated; 140 /* reserved size for block group relocation*/ 141 u64 reserved_bytes; 142 143 u64 search_start; 144 u64 extents_found; 145 146 unsigned int stage:8; 147 unsigned int create_reloc_tree:1; 148 unsigned int merge_reloc_tree:1; 149 unsigned int found_file_extent:1; 150 }; 151 152 /* stages of data relocation */ 153 #define MOVE_DATA_EXTENTS 0 154 #define UPDATE_DATA_PTRS 1 155 156 static void mark_block_processed(struct reloc_control *rc, 157 struct btrfs_backref_node *node) 158 { 159 u32 blocksize; 160 161 if (node->level == 0 || 162 in_range(node->bytenr, rc->block_group->start, 163 rc->block_group->length)) { 164 blocksize = rc->extent_root->fs_info->nodesize; 165 set_extent_bits(&rc->processed_blocks, node->bytenr, 166 node->bytenr + blocksize - 1, EXTENT_DIRTY); 167 } 168 node->processed = 1; 169 } 170 171 172 static void mapping_tree_init(struct mapping_tree *tree) 173 { 174 tree->rb_root = RB_ROOT; 175 spin_lock_init(&tree->lock); 176 } 177 178 /* 179 * walk up backref nodes until reach node presents tree root 180 */ 181 static struct btrfs_backref_node *walk_up_backref( 182 struct btrfs_backref_node *node, 183 struct btrfs_backref_edge *edges[], int *index) 184 { 185 struct btrfs_backref_edge *edge; 186 int idx = *index; 187 188 while (!list_empty(&node->upper)) { 189 edge = list_entry(node->upper.next, 190 struct btrfs_backref_edge, list[LOWER]); 191 edges[idx++] = edge; 192 node = edge->node[UPPER]; 193 } 194 BUG_ON(node->detached); 195 *index = idx; 196 return node; 197 } 198 199 /* 200 * walk down backref nodes to find start of next reference path 201 */ 202 static struct btrfs_backref_node *walk_down_backref( 203 struct btrfs_backref_edge *edges[], int *index) 204 { 205 struct btrfs_backref_edge *edge; 206 struct btrfs_backref_node *lower; 207 int idx = *index; 208 209 while (idx > 0) { 210 edge = edges[idx - 1]; 211 lower = edge->node[LOWER]; 212 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 213 idx--; 214 continue; 215 } 216 edge = list_entry(edge->list[LOWER].next, 217 struct btrfs_backref_edge, list[LOWER]); 218 edges[idx - 1] = edge; 219 *index = idx; 220 return edge->node[UPPER]; 221 } 222 *index = 0; 223 return NULL; 224 } 225 226 static void update_backref_node(struct btrfs_backref_cache *cache, 227 struct btrfs_backref_node *node, u64 bytenr) 228 { 229 struct rb_node *rb_node; 230 rb_erase(&node->rb_node, &cache->rb_root); 231 node->bytenr = bytenr; 232 rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node); 233 if (rb_node) 234 btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST); 235 } 236 237 /* 238 * update backref cache after a transaction commit 239 */ 240 static int update_backref_cache(struct btrfs_trans_handle *trans, 241 struct btrfs_backref_cache *cache) 242 { 243 struct btrfs_backref_node *node; 244 int level = 0; 245 246 if (cache->last_trans == 0) { 247 cache->last_trans = trans->transid; 248 return 0; 249 } 250 251 if (cache->last_trans == trans->transid) 252 return 0; 253 254 /* 255 * detached nodes are used to avoid unnecessary backref 256 * lookup. transaction commit changes the extent tree. 257 * so the detached nodes are no longer useful. 258 */ 259 while (!list_empty(&cache->detached)) { 260 node = list_entry(cache->detached.next, 261 struct btrfs_backref_node, list); 262 btrfs_backref_cleanup_node(cache, node); 263 } 264 265 while (!list_empty(&cache->changed)) { 266 node = list_entry(cache->changed.next, 267 struct btrfs_backref_node, list); 268 list_del_init(&node->list); 269 BUG_ON(node->pending); 270 update_backref_node(cache, node, node->new_bytenr); 271 } 272 273 /* 274 * some nodes can be left in the pending list if there were 275 * errors during processing the pending nodes. 276 */ 277 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 278 list_for_each_entry(node, &cache->pending[level], list) { 279 BUG_ON(!node->pending); 280 if (node->bytenr == node->new_bytenr) 281 continue; 282 update_backref_node(cache, node, node->new_bytenr); 283 } 284 } 285 286 cache->last_trans = 0; 287 return 1; 288 } 289 290 static bool reloc_root_is_dead(struct btrfs_root *root) 291 { 292 /* 293 * Pair with set_bit/clear_bit in clean_dirty_subvols and 294 * btrfs_update_reloc_root. We need to see the updated bit before 295 * trying to access reloc_root 296 */ 297 smp_rmb(); 298 if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)) 299 return true; 300 return false; 301 } 302 303 /* 304 * Check if this subvolume tree has valid reloc tree. 305 * 306 * Reloc tree after swap is considered dead, thus not considered as valid. 307 * This is enough for most callers, as they don't distinguish dead reloc root 308 * from no reloc root. But btrfs_should_ignore_reloc_root() below is a 309 * special case. 310 */ 311 static bool have_reloc_root(struct btrfs_root *root) 312 { 313 if (reloc_root_is_dead(root)) 314 return false; 315 if (!root->reloc_root) 316 return false; 317 return true; 318 } 319 320 int btrfs_should_ignore_reloc_root(struct btrfs_root *root) 321 { 322 struct btrfs_root *reloc_root; 323 324 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 325 return 0; 326 327 /* This root has been merged with its reloc tree, we can ignore it */ 328 if (reloc_root_is_dead(root)) 329 return 1; 330 331 reloc_root = root->reloc_root; 332 if (!reloc_root) 333 return 0; 334 335 if (btrfs_header_generation(reloc_root->commit_root) == 336 root->fs_info->running_transaction->transid) 337 return 0; 338 /* 339 * if there is reloc tree and it was created in previous 340 * transaction backref lookup can find the reloc tree, 341 * so backref node for the fs tree root is useless for 342 * relocation. 343 */ 344 return 1; 345 } 346 347 /* 348 * find reloc tree by address of tree root 349 */ 350 struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr) 351 { 352 struct reloc_control *rc = fs_info->reloc_ctl; 353 struct rb_node *rb_node; 354 struct mapping_node *node; 355 struct btrfs_root *root = NULL; 356 357 ASSERT(rc); 358 spin_lock(&rc->reloc_root_tree.lock); 359 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, bytenr); 360 if (rb_node) { 361 node = rb_entry(rb_node, struct mapping_node, rb_node); 362 root = (struct btrfs_root *)node->data; 363 } 364 spin_unlock(&rc->reloc_root_tree.lock); 365 return btrfs_grab_root(root); 366 } 367 368 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 369 u64 root_objectid) 370 { 371 struct btrfs_key key; 372 373 key.objectid = root_objectid; 374 key.type = BTRFS_ROOT_ITEM_KEY; 375 key.offset = (u64)-1; 376 377 return btrfs_get_fs_root(fs_info, &key, false); 378 } 379 380 /* 381 * In handle_one_tree_backref(), we have only linked the lower node to the edge, 382 * but the upper node hasn't been linked to the edge. 383 * This means we can only iterate through btrfs_backref_node::upper to reach 384 * parent edges, but not through btrfs_backref_node::lower to reach children 385 * edges. 386 * 387 * This function will finish the btrfs_backref_node::lower to related edges, 388 * so that backref cache can be bi-directionally iterated. 389 * 390 * Also, this will add the nodes to backref cache for the next run. 391 */ 392 static int finish_upper_links(struct btrfs_backref_cache *cache, 393 struct btrfs_backref_node *start) 394 { 395 struct list_head *useless_node = &cache->useless_node; 396 struct btrfs_backref_edge *edge; 397 struct rb_node *rb_node; 398 LIST_HEAD(pending_edge); 399 400 ASSERT(start->checked); 401 402 /* Insert this node to cache if it's not COW-only */ 403 if (!start->cowonly) { 404 rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, 405 &start->rb_node); 406 if (rb_node) 407 btrfs_backref_panic(cache->fs_info, start->bytenr, 408 -EEXIST); 409 list_add_tail(&start->lower, &cache->leaves); 410 } 411 412 /* 413 * Use breadth first search to iterate all related edges. 414 * 415 * The starting points are all the edges of this node 416 */ 417 list_for_each_entry(edge, &start->upper, list[LOWER]) 418 list_add_tail(&edge->list[UPPER], &pending_edge); 419 420 while (!list_empty(&pending_edge)) { 421 struct btrfs_backref_node *upper; 422 struct btrfs_backref_node *lower; 423 struct rb_node *rb_node; 424 425 edge = list_first_entry(&pending_edge, 426 struct btrfs_backref_edge, list[UPPER]); 427 list_del_init(&edge->list[UPPER]); 428 upper = edge->node[UPPER]; 429 lower = edge->node[LOWER]; 430 431 /* Parent is detached, no need to keep any edges */ 432 if (upper->detached) { 433 list_del(&edge->list[LOWER]); 434 btrfs_backref_free_edge(cache, edge); 435 436 /* Lower node is orphan, queue for cleanup */ 437 if (list_empty(&lower->upper)) 438 list_add(&lower->list, useless_node); 439 continue; 440 } 441 442 /* 443 * All new nodes added in current build_backref_tree() haven't 444 * been linked to the cache rb tree. 445 * So if we have upper->rb_node populated, this means a cache 446 * hit. We only need to link the edge, as @upper and all its 447 * parent have already been linked. 448 */ 449 if (!RB_EMPTY_NODE(&upper->rb_node)) { 450 if (upper->lowest) { 451 list_del_init(&upper->lower); 452 upper->lowest = 0; 453 } 454 455 list_add_tail(&edge->list[UPPER], &upper->lower); 456 continue; 457 } 458 459 /* Sanity check, we shouldn't have any unchecked nodes */ 460 if (!upper->checked) { 461 ASSERT(0); 462 return -EUCLEAN; 463 } 464 465 /* Sanity check, COW-only node has non-COW-only parent */ 466 if (start->cowonly != upper->cowonly) { 467 ASSERT(0); 468 return -EUCLEAN; 469 } 470 471 /* Only cache non-COW-only (subvolume trees) tree blocks */ 472 if (!upper->cowonly) { 473 rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr, 474 &upper->rb_node); 475 if (rb_node) { 476 btrfs_backref_panic(cache->fs_info, 477 upper->bytenr, -EEXIST); 478 return -EUCLEAN; 479 } 480 } 481 482 list_add_tail(&edge->list[UPPER], &upper->lower); 483 484 /* 485 * Also queue all the parent edges of this uncached node to 486 * finish the upper linkage 487 */ 488 list_for_each_entry(edge, &upper->upper, list[LOWER]) 489 list_add_tail(&edge->list[UPPER], &pending_edge); 490 } 491 return 0; 492 } 493 494 /* 495 * For useless nodes, do two major clean ups: 496 * 497 * - Cleanup the children edges and nodes 498 * If child node is also orphan (no parent) during cleanup, then the child 499 * node will also be cleaned up. 500 * 501 * - Freeing up leaves (level 0), keeps nodes detached 502 * For nodes, the node is still cached as "detached" 503 * 504 * Return false if @node is not in the @useless_nodes list. 505 * Return true if @node is in the @useless_nodes list. 506 */ 507 static bool handle_useless_nodes(struct reloc_control *rc, 508 struct btrfs_backref_node *node) 509 { 510 struct btrfs_backref_cache *cache = &rc->backref_cache; 511 struct list_head *useless_node = &cache->useless_node; 512 bool ret = false; 513 514 while (!list_empty(useless_node)) { 515 struct btrfs_backref_node *cur; 516 517 cur = list_first_entry(useless_node, struct btrfs_backref_node, 518 list); 519 list_del_init(&cur->list); 520 521 /* Only tree root nodes can be added to @useless_nodes */ 522 ASSERT(list_empty(&cur->upper)); 523 524 if (cur == node) 525 ret = true; 526 527 /* The node is the lowest node */ 528 if (cur->lowest) { 529 list_del_init(&cur->lower); 530 cur->lowest = 0; 531 } 532 533 /* Cleanup the lower edges */ 534 while (!list_empty(&cur->lower)) { 535 struct btrfs_backref_edge *edge; 536 struct btrfs_backref_node *lower; 537 538 edge = list_entry(cur->lower.next, 539 struct btrfs_backref_edge, list[UPPER]); 540 list_del(&edge->list[UPPER]); 541 list_del(&edge->list[LOWER]); 542 lower = edge->node[LOWER]; 543 btrfs_backref_free_edge(cache, edge); 544 545 /* Child node is also orphan, queue for cleanup */ 546 if (list_empty(&lower->upper)) 547 list_add(&lower->list, useless_node); 548 } 549 /* Mark this block processed for relocation */ 550 mark_block_processed(rc, cur); 551 552 /* 553 * Backref nodes for tree leaves are deleted from the cache. 554 * Backref nodes for upper level tree blocks are left in the 555 * cache to avoid unnecessary backref lookup. 556 */ 557 if (cur->level > 0) { 558 list_add(&cur->list, &cache->detached); 559 cur->detached = 1; 560 } else { 561 rb_erase(&cur->rb_node, &cache->rb_root); 562 btrfs_backref_free_node(cache, cur); 563 } 564 } 565 return ret; 566 } 567 568 /* 569 * Build backref tree for a given tree block. Root of the backref tree 570 * corresponds the tree block, leaves of the backref tree correspond roots of 571 * b-trees that reference the tree block. 572 * 573 * The basic idea of this function is check backrefs of a given block to find 574 * upper level blocks that reference the block, and then check backrefs of 575 * these upper level blocks recursively. The recursion stops when tree root is 576 * reached or backrefs for the block is cached. 577 * 578 * NOTE: if we find that backrefs for a block are cached, we know backrefs for 579 * all upper level blocks that directly/indirectly reference the block are also 580 * cached. 581 */ 582 static noinline_for_stack struct btrfs_backref_node *build_backref_tree( 583 struct reloc_control *rc, struct btrfs_key *node_key, 584 int level, u64 bytenr) 585 { 586 struct btrfs_backref_iter *iter; 587 struct btrfs_backref_cache *cache = &rc->backref_cache; 588 /* For searching parent of TREE_BLOCK_REF */ 589 struct btrfs_path *path; 590 struct btrfs_backref_node *cur; 591 struct btrfs_backref_node *upper; 592 struct btrfs_backref_node *lower; 593 struct btrfs_backref_node *node = NULL; 594 struct btrfs_backref_edge *edge; 595 int ret; 596 int err = 0; 597 598 iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS); 599 if (!iter) 600 return ERR_PTR(-ENOMEM); 601 path = btrfs_alloc_path(); 602 if (!path) { 603 err = -ENOMEM; 604 goto out; 605 } 606 607 node = btrfs_backref_alloc_node(cache, bytenr, level); 608 if (!node) { 609 err = -ENOMEM; 610 goto out; 611 } 612 613 node->lowest = 1; 614 cur = node; 615 616 /* Breadth-first search to build backref cache */ 617 do { 618 ret = btrfs_backref_add_tree_node(cache, path, iter, node_key, 619 cur); 620 if (ret < 0) { 621 err = ret; 622 goto out; 623 } 624 edge = list_first_entry_or_null(&cache->pending_edge, 625 struct btrfs_backref_edge, list[UPPER]); 626 /* 627 * The pending list isn't empty, take the first block to 628 * process 629 */ 630 if (edge) { 631 list_del_init(&edge->list[UPPER]); 632 cur = edge->node[UPPER]; 633 } 634 } while (edge); 635 636 /* Finish the upper linkage of newly added edges/nodes */ 637 ret = finish_upper_links(cache, node); 638 if (ret < 0) { 639 err = ret; 640 goto out; 641 } 642 643 if (handle_useless_nodes(rc, node)) 644 node = NULL; 645 out: 646 btrfs_backref_iter_free(iter); 647 btrfs_free_path(path); 648 if (err) { 649 while (!list_empty(&cache->useless_node)) { 650 lower = list_first_entry(&cache->useless_node, 651 struct btrfs_backref_node, list); 652 list_del_init(&lower->list); 653 } 654 while (!list_empty(&cache->pending_edge)) { 655 edge = list_first_entry(&cache->pending_edge, 656 struct btrfs_backref_edge, list[UPPER]); 657 list_del(&edge->list[UPPER]); 658 list_del(&edge->list[LOWER]); 659 lower = edge->node[LOWER]; 660 upper = edge->node[UPPER]; 661 btrfs_backref_free_edge(cache, edge); 662 663 /* 664 * Lower is no longer linked to any upper backref nodes 665 * and isn't in the cache, we can free it ourselves. 666 */ 667 if (list_empty(&lower->upper) && 668 RB_EMPTY_NODE(&lower->rb_node)) 669 list_add(&lower->list, &cache->useless_node); 670 671 if (!RB_EMPTY_NODE(&upper->rb_node)) 672 continue; 673 674 /* Add this guy's upper edges to the list to process */ 675 list_for_each_entry(edge, &upper->upper, list[LOWER]) 676 list_add_tail(&edge->list[UPPER], 677 &cache->pending_edge); 678 if (list_empty(&upper->upper)) 679 list_add(&upper->list, &cache->useless_node); 680 } 681 682 while (!list_empty(&cache->useless_node)) { 683 lower = list_first_entry(&cache->useless_node, 684 struct btrfs_backref_node, list); 685 list_del_init(&lower->list); 686 if (lower == node) 687 node = NULL; 688 btrfs_backref_free_node(cache, lower); 689 } 690 691 btrfs_backref_cleanup_node(cache, node); 692 ASSERT(list_empty(&cache->useless_node) && 693 list_empty(&cache->pending_edge)); 694 return ERR_PTR(err); 695 } 696 ASSERT(!node || !node->detached); 697 ASSERT(list_empty(&cache->useless_node) && 698 list_empty(&cache->pending_edge)); 699 return node; 700 } 701 702 /* 703 * helper to add backref node for the newly created snapshot. 704 * the backref node is created by cloning backref node that 705 * corresponds to root of source tree 706 */ 707 static int clone_backref_node(struct btrfs_trans_handle *trans, 708 struct reloc_control *rc, 709 struct btrfs_root *src, 710 struct btrfs_root *dest) 711 { 712 struct btrfs_root *reloc_root = src->reloc_root; 713 struct btrfs_backref_cache *cache = &rc->backref_cache; 714 struct btrfs_backref_node *node = NULL; 715 struct btrfs_backref_node *new_node; 716 struct btrfs_backref_edge *edge; 717 struct btrfs_backref_edge *new_edge; 718 struct rb_node *rb_node; 719 720 if (cache->last_trans > 0) 721 update_backref_cache(trans, cache); 722 723 rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start); 724 if (rb_node) { 725 node = rb_entry(rb_node, struct btrfs_backref_node, rb_node); 726 if (node->detached) 727 node = NULL; 728 else 729 BUG_ON(node->new_bytenr != reloc_root->node->start); 730 } 731 732 if (!node) { 733 rb_node = rb_simple_search(&cache->rb_root, 734 reloc_root->commit_root->start); 735 if (rb_node) { 736 node = rb_entry(rb_node, struct btrfs_backref_node, 737 rb_node); 738 BUG_ON(node->detached); 739 } 740 } 741 742 if (!node) 743 return 0; 744 745 new_node = btrfs_backref_alloc_node(cache, dest->node->start, 746 node->level); 747 if (!new_node) 748 return -ENOMEM; 749 750 new_node->lowest = node->lowest; 751 new_node->checked = 1; 752 new_node->root = btrfs_grab_root(dest); 753 ASSERT(new_node->root); 754 755 if (!node->lowest) { 756 list_for_each_entry(edge, &node->lower, list[UPPER]) { 757 new_edge = btrfs_backref_alloc_edge(cache); 758 if (!new_edge) 759 goto fail; 760 761 btrfs_backref_link_edge(new_edge, edge->node[LOWER], 762 new_node, LINK_UPPER); 763 } 764 } else { 765 list_add_tail(&new_node->lower, &cache->leaves); 766 } 767 768 rb_node = rb_simple_insert(&cache->rb_root, new_node->bytenr, 769 &new_node->rb_node); 770 if (rb_node) 771 btrfs_backref_panic(trans->fs_info, new_node->bytenr, -EEXIST); 772 773 if (!new_node->lowest) { 774 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { 775 list_add_tail(&new_edge->list[LOWER], 776 &new_edge->node[LOWER]->upper); 777 } 778 } 779 return 0; 780 fail: 781 while (!list_empty(&new_node->lower)) { 782 new_edge = list_entry(new_node->lower.next, 783 struct btrfs_backref_edge, list[UPPER]); 784 list_del(&new_edge->list[UPPER]); 785 btrfs_backref_free_edge(cache, new_edge); 786 } 787 btrfs_backref_free_node(cache, new_node); 788 return -ENOMEM; 789 } 790 791 /* 792 * helper to add 'address of tree root -> reloc tree' mapping 793 */ 794 static int __must_check __add_reloc_root(struct btrfs_root *root) 795 { 796 struct btrfs_fs_info *fs_info = root->fs_info; 797 struct rb_node *rb_node; 798 struct mapping_node *node; 799 struct reloc_control *rc = fs_info->reloc_ctl; 800 801 node = kmalloc(sizeof(*node), GFP_NOFS); 802 if (!node) 803 return -ENOMEM; 804 805 node->bytenr = root->commit_root->start; 806 node->data = root; 807 808 spin_lock(&rc->reloc_root_tree.lock); 809 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root, 810 node->bytenr, &node->rb_node); 811 spin_unlock(&rc->reloc_root_tree.lock); 812 if (rb_node) { 813 btrfs_panic(fs_info, -EEXIST, 814 "Duplicate root found for start=%llu while inserting into relocation tree", 815 node->bytenr); 816 } 817 818 list_add_tail(&root->root_list, &rc->reloc_roots); 819 return 0; 820 } 821 822 /* 823 * helper to delete the 'address of tree root -> reloc tree' 824 * mapping 825 */ 826 static void __del_reloc_root(struct btrfs_root *root) 827 { 828 struct btrfs_fs_info *fs_info = root->fs_info; 829 struct rb_node *rb_node; 830 struct mapping_node *node = NULL; 831 struct reloc_control *rc = fs_info->reloc_ctl; 832 bool put_ref = false; 833 834 if (rc && root->node) { 835 spin_lock(&rc->reloc_root_tree.lock); 836 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, 837 root->commit_root->start); 838 if (rb_node) { 839 node = rb_entry(rb_node, struct mapping_node, rb_node); 840 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 841 RB_CLEAR_NODE(&node->rb_node); 842 } 843 spin_unlock(&rc->reloc_root_tree.lock); 844 if (!node) 845 return; 846 BUG_ON((struct btrfs_root *)node->data != root); 847 } 848 849 /* 850 * We only put the reloc root here if it's on the list. There's a lot 851 * of places where the pattern is to splice the rc->reloc_roots, process 852 * the reloc roots, and then add the reloc root back onto 853 * rc->reloc_roots. If we call __del_reloc_root while it's off of the 854 * list we don't want the reference being dropped, because the guy 855 * messing with the list is in charge of the reference. 856 */ 857 spin_lock(&fs_info->trans_lock); 858 if (!list_empty(&root->root_list)) { 859 put_ref = true; 860 list_del_init(&root->root_list); 861 } 862 spin_unlock(&fs_info->trans_lock); 863 if (put_ref) 864 btrfs_put_root(root); 865 kfree(node); 866 } 867 868 /* 869 * helper to update the 'address of tree root -> reloc tree' 870 * mapping 871 */ 872 static int __update_reloc_root(struct btrfs_root *root) 873 { 874 struct btrfs_fs_info *fs_info = root->fs_info; 875 struct rb_node *rb_node; 876 struct mapping_node *node = NULL; 877 struct reloc_control *rc = fs_info->reloc_ctl; 878 879 spin_lock(&rc->reloc_root_tree.lock); 880 rb_node = rb_simple_search(&rc->reloc_root_tree.rb_root, 881 root->commit_root->start); 882 if (rb_node) { 883 node = rb_entry(rb_node, struct mapping_node, rb_node); 884 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 885 } 886 spin_unlock(&rc->reloc_root_tree.lock); 887 888 if (!node) 889 return 0; 890 BUG_ON((struct btrfs_root *)node->data != root); 891 892 spin_lock(&rc->reloc_root_tree.lock); 893 node->bytenr = root->node->start; 894 rb_node = rb_simple_insert(&rc->reloc_root_tree.rb_root, 895 node->bytenr, &node->rb_node); 896 spin_unlock(&rc->reloc_root_tree.lock); 897 if (rb_node) 898 btrfs_backref_panic(fs_info, node->bytenr, -EEXIST); 899 return 0; 900 } 901 902 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, 903 struct btrfs_root *root, u64 objectid) 904 { 905 struct btrfs_fs_info *fs_info = root->fs_info; 906 struct btrfs_root *reloc_root; 907 struct extent_buffer *eb; 908 struct btrfs_root_item *root_item; 909 struct btrfs_key root_key; 910 int ret; 911 912 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 913 BUG_ON(!root_item); 914 915 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 916 root_key.type = BTRFS_ROOT_ITEM_KEY; 917 root_key.offset = objectid; 918 919 if (root->root_key.objectid == objectid) { 920 u64 commit_root_gen; 921 922 /* called by btrfs_init_reloc_root */ 923 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 924 BTRFS_TREE_RELOC_OBJECTID); 925 BUG_ON(ret); 926 /* 927 * Set the last_snapshot field to the generation of the commit 928 * root - like this ctree.c:btrfs_block_can_be_shared() behaves 929 * correctly (returns true) when the relocation root is created 930 * either inside the critical section of a transaction commit 931 * (through transaction.c:qgroup_account_snapshot()) and when 932 * it's created before the transaction commit is started. 933 */ 934 commit_root_gen = btrfs_header_generation(root->commit_root); 935 btrfs_set_root_last_snapshot(&root->root_item, commit_root_gen); 936 } else { 937 /* 938 * called by btrfs_reloc_post_snapshot_hook. 939 * the source tree is a reloc tree, all tree blocks 940 * modified after it was created have RELOC flag 941 * set in their headers. so it's OK to not update 942 * the 'last_snapshot'. 943 */ 944 ret = btrfs_copy_root(trans, root, root->node, &eb, 945 BTRFS_TREE_RELOC_OBJECTID); 946 BUG_ON(ret); 947 } 948 949 memcpy(root_item, &root->root_item, sizeof(*root_item)); 950 btrfs_set_root_bytenr(root_item, eb->start); 951 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 952 btrfs_set_root_generation(root_item, trans->transid); 953 954 if (root->root_key.objectid == objectid) { 955 btrfs_set_root_refs(root_item, 0); 956 memset(&root_item->drop_progress, 0, 957 sizeof(struct btrfs_disk_key)); 958 root_item->drop_level = 0; 959 } 960 961 btrfs_tree_unlock(eb); 962 free_extent_buffer(eb); 963 964 ret = btrfs_insert_root(trans, fs_info->tree_root, 965 &root_key, root_item); 966 BUG_ON(ret); 967 kfree(root_item); 968 969 reloc_root = btrfs_read_tree_root(fs_info->tree_root, &root_key); 970 BUG_ON(IS_ERR(reloc_root)); 971 set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state); 972 reloc_root->last_trans = trans->transid; 973 return reloc_root; 974 } 975 976 /* 977 * create reloc tree for a given fs tree. reloc tree is just a 978 * snapshot of the fs tree with special root objectid. 979 * 980 * The reloc_root comes out of here with two references, one for 981 * root->reloc_root, and another for being on the rc->reloc_roots list. 982 */ 983 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 984 struct btrfs_root *root) 985 { 986 struct btrfs_fs_info *fs_info = root->fs_info; 987 struct btrfs_root *reloc_root; 988 struct reloc_control *rc = fs_info->reloc_ctl; 989 struct btrfs_block_rsv *rsv; 990 int clear_rsv = 0; 991 int ret; 992 993 if (!rc) 994 return 0; 995 996 /* 997 * The subvolume has reloc tree but the swap is finished, no need to 998 * create/update the dead reloc tree 999 */ 1000 if (reloc_root_is_dead(root)) 1001 return 0; 1002 1003 /* 1004 * This is subtle but important. We do not do 1005 * record_root_in_transaction for reloc roots, instead we record their 1006 * corresponding fs root, and then here we update the last trans for the 1007 * reloc root. This means that we have to do this for the entire life 1008 * of the reloc root, regardless of which stage of the relocation we are 1009 * in. 1010 */ 1011 if (root->reloc_root) { 1012 reloc_root = root->reloc_root; 1013 reloc_root->last_trans = trans->transid; 1014 return 0; 1015 } 1016 1017 /* 1018 * We are merging reloc roots, we do not need new reloc trees. Also 1019 * reloc trees never need their own reloc tree. 1020 */ 1021 if (!rc->create_reloc_tree || 1022 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1023 return 0; 1024 1025 if (!trans->reloc_reserved) { 1026 rsv = trans->block_rsv; 1027 trans->block_rsv = rc->block_rsv; 1028 clear_rsv = 1; 1029 } 1030 reloc_root = create_reloc_root(trans, root, root->root_key.objectid); 1031 if (clear_rsv) 1032 trans->block_rsv = rsv; 1033 1034 ret = __add_reloc_root(reloc_root); 1035 BUG_ON(ret < 0); 1036 root->reloc_root = btrfs_grab_root(reloc_root); 1037 return 0; 1038 } 1039 1040 /* 1041 * update root item of reloc tree 1042 */ 1043 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 1044 struct btrfs_root *root) 1045 { 1046 struct btrfs_fs_info *fs_info = root->fs_info; 1047 struct btrfs_root *reloc_root; 1048 struct btrfs_root_item *root_item; 1049 int ret; 1050 1051 if (!have_reloc_root(root)) 1052 goto out; 1053 1054 reloc_root = root->reloc_root; 1055 root_item = &reloc_root->root_item; 1056 1057 /* 1058 * We are probably ok here, but __del_reloc_root() will drop its ref of 1059 * the root. We have the ref for root->reloc_root, but just in case 1060 * hold it while we update the reloc root. 1061 */ 1062 btrfs_grab_root(reloc_root); 1063 1064 /* root->reloc_root will stay until current relocation finished */ 1065 if (fs_info->reloc_ctl->merge_reloc_tree && 1066 btrfs_root_refs(root_item) == 0) { 1067 set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); 1068 /* 1069 * Mark the tree as dead before we change reloc_root so 1070 * have_reloc_root will not touch it from now on. 1071 */ 1072 smp_wmb(); 1073 __del_reloc_root(reloc_root); 1074 } 1075 1076 if (reloc_root->commit_root != reloc_root->node) { 1077 __update_reloc_root(reloc_root); 1078 btrfs_set_root_node(root_item, reloc_root->node); 1079 free_extent_buffer(reloc_root->commit_root); 1080 reloc_root->commit_root = btrfs_root_node(reloc_root); 1081 } 1082 1083 ret = btrfs_update_root(trans, fs_info->tree_root, 1084 &reloc_root->root_key, root_item); 1085 BUG_ON(ret); 1086 btrfs_put_root(reloc_root); 1087 out: 1088 return 0; 1089 } 1090 1091 /* 1092 * helper to find first cached inode with inode number >= objectid 1093 * in a subvolume 1094 */ 1095 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 1096 { 1097 struct rb_node *node; 1098 struct rb_node *prev; 1099 struct btrfs_inode *entry; 1100 struct inode *inode; 1101 1102 spin_lock(&root->inode_lock); 1103 again: 1104 node = root->inode_tree.rb_node; 1105 prev = NULL; 1106 while (node) { 1107 prev = node; 1108 entry = rb_entry(node, struct btrfs_inode, rb_node); 1109 1110 if (objectid < btrfs_ino(entry)) 1111 node = node->rb_left; 1112 else if (objectid > btrfs_ino(entry)) 1113 node = node->rb_right; 1114 else 1115 break; 1116 } 1117 if (!node) { 1118 while (prev) { 1119 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1120 if (objectid <= btrfs_ino(entry)) { 1121 node = prev; 1122 break; 1123 } 1124 prev = rb_next(prev); 1125 } 1126 } 1127 while (node) { 1128 entry = rb_entry(node, struct btrfs_inode, rb_node); 1129 inode = igrab(&entry->vfs_inode); 1130 if (inode) { 1131 spin_unlock(&root->inode_lock); 1132 return inode; 1133 } 1134 1135 objectid = btrfs_ino(entry) + 1; 1136 if (cond_resched_lock(&root->inode_lock)) 1137 goto again; 1138 1139 node = rb_next(node); 1140 } 1141 spin_unlock(&root->inode_lock); 1142 return NULL; 1143 } 1144 1145 /* 1146 * get new location of data 1147 */ 1148 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1149 u64 bytenr, u64 num_bytes) 1150 { 1151 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1152 struct btrfs_path *path; 1153 struct btrfs_file_extent_item *fi; 1154 struct extent_buffer *leaf; 1155 int ret; 1156 1157 path = btrfs_alloc_path(); 1158 if (!path) 1159 return -ENOMEM; 1160 1161 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1162 ret = btrfs_lookup_file_extent(NULL, root, path, 1163 btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0); 1164 if (ret < 0) 1165 goto out; 1166 if (ret > 0) { 1167 ret = -ENOENT; 1168 goto out; 1169 } 1170 1171 leaf = path->nodes[0]; 1172 fi = btrfs_item_ptr(leaf, path->slots[0], 1173 struct btrfs_file_extent_item); 1174 1175 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1176 btrfs_file_extent_compression(leaf, fi) || 1177 btrfs_file_extent_encryption(leaf, fi) || 1178 btrfs_file_extent_other_encoding(leaf, fi)); 1179 1180 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1181 ret = -EINVAL; 1182 goto out; 1183 } 1184 1185 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1186 ret = 0; 1187 out: 1188 btrfs_free_path(path); 1189 return ret; 1190 } 1191 1192 /* 1193 * update file extent items in the tree leaf to point to 1194 * the new locations. 1195 */ 1196 static noinline_for_stack 1197 int replace_file_extents(struct btrfs_trans_handle *trans, 1198 struct reloc_control *rc, 1199 struct btrfs_root *root, 1200 struct extent_buffer *leaf) 1201 { 1202 struct btrfs_fs_info *fs_info = root->fs_info; 1203 struct btrfs_key key; 1204 struct btrfs_file_extent_item *fi; 1205 struct inode *inode = NULL; 1206 u64 parent; 1207 u64 bytenr; 1208 u64 new_bytenr = 0; 1209 u64 num_bytes; 1210 u64 end; 1211 u32 nritems; 1212 u32 i; 1213 int ret = 0; 1214 int first = 1; 1215 int dirty = 0; 1216 1217 if (rc->stage != UPDATE_DATA_PTRS) 1218 return 0; 1219 1220 /* reloc trees always use full backref */ 1221 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1222 parent = leaf->start; 1223 else 1224 parent = 0; 1225 1226 nritems = btrfs_header_nritems(leaf); 1227 for (i = 0; i < nritems; i++) { 1228 struct btrfs_ref ref = { 0 }; 1229 1230 cond_resched(); 1231 btrfs_item_key_to_cpu(leaf, &key, i); 1232 if (key.type != BTRFS_EXTENT_DATA_KEY) 1233 continue; 1234 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1235 if (btrfs_file_extent_type(leaf, fi) == 1236 BTRFS_FILE_EXTENT_INLINE) 1237 continue; 1238 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1239 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1240 if (bytenr == 0) 1241 continue; 1242 if (!in_range(bytenr, rc->block_group->start, 1243 rc->block_group->length)) 1244 continue; 1245 1246 /* 1247 * if we are modifying block in fs tree, wait for readpage 1248 * to complete and drop the extent cache 1249 */ 1250 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1251 if (first) { 1252 inode = find_next_inode(root, key.objectid); 1253 first = 0; 1254 } else if (inode && btrfs_ino(BTRFS_I(inode)) < key.objectid) { 1255 btrfs_add_delayed_iput(inode); 1256 inode = find_next_inode(root, key.objectid); 1257 } 1258 if (inode && btrfs_ino(BTRFS_I(inode)) == key.objectid) { 1259 end = key.offset + 1260 btrfs_file_extent_num_bytes(leaf, fi); 1261 WARN_ON(!IS_ALIGNED(key.offset, 1262 fs_info->sectorsize)); 1263 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize)); 1264 end--; 1265 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1266 key.offset, end); 1267 if (!ret) 1268 continue; 1269 1270 btrfs_drop_extent_cache(BTRFS_I(inode), 1271 key.offset, end, 1); 1272 unlock_extent(&BTRFS_I(inode)->io_tree, 1273 key.offset, end); 1274 } 1275 } 1276 1277 ret = get_new_location(rc->data_inode, &new_bytenr, 1278 bytenr, num_bytes); 1279 if (ret) { 1280 /* 1281 * Don't have to abort since we've not changed anything 1282 * in the file extent yet. 1283 */ 1284 break; 1285 } 1286 1287 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1288 dirty = 1; 1289 1290 key.offset -= btrfs_file_extent_offset(leaf, fi); 1291 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr, 1292 num_bytes, parent); 1293 ref.real_root = root->root_key.objectid; 1294 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), 1295 key.objectid, key.offset); 1296 ret = btrfs_inc_extent_ref(trans, &ref); 1297 if (ret) { 1298 btrfs_abort_transaction(trans, ret); 1299 break; 1300 } 1301 1302 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, 1303 num_bytes, parent); 1304 ref.real_root = root->root_key.objectid; 1305 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf), 1306 key.objectid, key.offset); 1307 ret = btrfs_free_extent(trans, &ref); 1308 if (ret) { 1309 btrfs_abort_transaction(trans, ret); 1310 break; 1311 } 1312 } 1313 if (dirty) 1314 btrfs_mark_buffer_dirty(leaf); 1315 if (inode) 1316 btrfs_add_delayed_iput(inode); 1317 return ret; 1318 } 1319 1320 static noinline_for_stack 1321 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1322 struct btrfs_path *path, int level) 1323 { 1324 struct btrfs_disk_key key1; 1325 struct btrfs_disk_key key2; 1326 btrfs_node_key(eb, &key1, slot); 1327 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1328 return memcmp(&key1, &key2, sizeof(key1)); 1329 } 1330 1331 /* 1332 * try to replace tree blocks in fs tree with the new blocks 1333 * in reloc tree. tree blocks haven't been modified since the 1334 * reloc tree was create can be replaced. 1335 * 1336 * if a block was replaced, level of the block + 1 is returned. 1337 * if no block got replaced, 0 is returned. if there are other 1338 * errors, a negative error number is returned. 1339 */ 1340 static noinline_for_stack 1341 int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, 1342 struct btrfs_root *dest, struct btrfs_root *src, 1343 struct btrfs_path *path, struct btrfs_key *next_key, 1344 int lowest_level, int max_level) 1345 { 1346 struct btrfs_fs_info *fs_info = dest->fs_info; 1347 struct extent_buffer *eb; 1348 struct extent_buffer *parent; 1349 struct btrfs_ref ref = { 0 }; 1350 struct btrfs_key key; 1351 u64 old_bytenr; 1352 u64 new_bytenr; 1353 u64 old_ptr_gen; 1354 u64 new_ptr_gen; 1355 u64 last_snapshot; 1356 u32 blocksize; 1357 int cow = 0; 1358 int level; 1359 int ret; 1360 int slot; 1361 1362 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1363 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1364 1365 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1366 again: 1367 slot = path->slots[lowest_level]; 1368 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1369 1370 eb = btrfs_lock_root_node(dest); 1371 btrfs_set_lock_blocking_write(eb); 1372 level = btrfs_header_level(eb); 1373 1374 if (level < lowest_level) { 1375 btrfs_tree_unlock(eb); 1376 free_extent_buffer(eb); 1377 return 0; 1378 } 1379 1380 if (cow) { 1381 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1382 BUG_ON(ret); 1383 } 1384 btrfs_set_lock_blocking_write(eb); 1385 1386 if (next_key) { 1387 next_key->objectid = (u64)-1; 1388 next_key->type = (u8)-1; 1389 next_key->offset = (u64)-1; 1390 } 1391 1392 parent = eb; 1393 while (1) { 1394 struct btrfs_key first_key; 1395 1396 level = btrfs_header_level(parent); 1397 BUG_ON(level < lowest_level); 1398 1399 ret = btrfs_bin_search(parent, &key, level, &slot); 1400 if (ret < 0) 1401 break; 1402 if (ret && slot > 0) 1403 slot--; 1404 1405 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1406 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1407 1408 old_bytenr = btrfs_node_blockptr(parent, slot); 1409 blocksize = fs_info->nodesize; 1410 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1411 btrfs_node_key_to_cpu(parent, &first_key, slot); 1412 1413 if (level <= max_level) { 1414 eb = path->nodes[level]; 1415 new_bytenr = btrfs_node_blockptr(eb, 1416 path->slots[level]); 1417 new_ptr_gen = btrfs_node_ptr_generation(eb, 1418 path->slots[level]); 1419 } else { 1420 new_bytenr = 0; 1421 new_ptr_gen = 0; 1422 } 1423 1424 if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) { 1425 ret = level; 1426 break; 1427 } 1428 1429 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1430 memcmp_node_keys(parent, slot, path, level)) { 1431 if (level <= lowest_level) { 1432 ret = 0; 1433 break; 1434 } 1435 1436 eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen, 1437 level - 1, &first_key); 1438 if (IS_ERR(eb)) { 1439 ret = PTR_ERR(eb); 1440 break; 1441 } else if (!extent_buffer_uptodate(eb)) { 1442 ret = -EIO; 1443 free_extent_buffer(eb); 1444 break; 1445 } 1446 btrfs_tree_lock(eb); 1447 if (cow) { 1448 ret = btrfs_cow_block(trans, dest, eb, parent, 1449 slot, &eb); 1450 BUG_ON(ret); 1451 } 1452 btrfs_set_lock_blocking_write(eb); 1453 1454 btrfs_tree_unlock(parent); 1455 free_extent_buffer(parent); 1456 1457 parent = eb; 1458 continue; 1459 } 1460 1461 if (!cow) { 1462 btrfs_tree_unlock(parent); 1463 free_extent_buffer(parent); 1464 cow = 1; 1465 goto again; 1466 } 1467 1468 btrfs_node_key_to_cpu(path->nodes[level], &key, 1469 path->slots[level]); 1470 btrfs_release_path(path); 1471 1472 path->lowest_level = level; 1473 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1474 path->lowest_level = 0; 1475 BUG_ON(ret); 1476 1477 /* 1478 * Info qgroup to trace both subtrees. 1479 * 1480 * We must trace both trees. 1481 * 1) Tree reloc subtree 1482 * If not traced, we will leak data numbers 1483 * 2) Fs subtree 1484 * If not traced, we will double count old data 1485 * 1486 * We don't scan the subtree right now, but only record 1487 * the swapped tree blocks. 1488 * The real subtree rescan is delayed until we have new 1489 * CoW on the subtree root node before transaction commit. 1490 */ 1491 ret = btrfs_qgroup_add_swapped_blocks(trans, dest, 1492 rc->block_group, parent, slot, 1493 path->nodes[level], path->slots[level], 1494 last_snapshot); 1495 if (ret < 0) 1496 break; 1497 /* 1498 * swap blocks in fs tree and reloc tree. 1499 */ 1500 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1501 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1502 btrfs_mark_buffer_dirty(parent); 1503 1504 btrfs_set_node_blockptr(path->nodes[level], 1505 path->slots[level], old_bytenr); 1506 btrfs_set_node_ptr_generation(path->nodes[level], 1507 path->slots[level], old_ptr_gen); 1508 btrfs_mark_buffer_dirty(path->nodes[level]); 1509 1510 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr, 1511 blocksize, path->nodes[level]->start); 1512 ref.skip_qgroup = true; 1513 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid); 1514 ret = btrfs_inc_extent_ref(trans, &ref); 1515 BUG_ON(ret); 1516 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr, 1517 blocksize, 0); 1518 ref.skip_qgroup = true; 1519 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid); 1520 ret = btrfs_inc_extent_ref(trans, &ref); 1521 BUG_ON(ret); 1522 1523 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr, 1524 blocksize, path->nodes[level]->start); 1525 btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid); 1526 ref.skip_qgroup = true; 1527 ret = btrfs_free_extent(trans, &ref); 1528 BUG_ON(ret); 1529 1530 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr, 1531 blocksize, 0); 1532 btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid); 1533 ref.skip_qgroup = true; 1534 ret = btrfs_free_extent(trans, &ref); 1535 BUG_ON(ret); 1536 1537 btrfs_unlock_up_safe(path, 0); 1538 1539 ret = level; 1540 break; 1541 } 1542 btrfs_tree_unlock(parent); 1543 free_extent_buffer(parent); 1544 return ret; 1545 } 1546 1547 /* 1548 * helper to find next relocated block in reloc tree 1549 */ 1550 static noinline_for_stack 1551 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1552 int *level) 1553 { 1554 struct extent_buffer *eb; 1555 int i; 1556 u64 last_snapshot; 1557 u32 nritems; 1558 1559 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1560 1561 for (i = 0; i < *level; i++) { 1562 free_extent_buffer(path->nodes[i]); 1563 path->nodes[i] = NULL; 1564 } 1565 1566 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1567 eb = path->nodes[i]; 1568 nritems = btrfs_header_nritems(eb); 1569 while (path->slots[i] + 1 < nritems) { 1570 path->slots[i]++; 1571 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1572 last_snapshot) 1573 continue; 1574 1575 *level = i; 1576 return 0; 1577 } 1578 free_extent_buffer(path->nodes[i]); 1579 path->nodes[i] = NULL; 1580 } 1581 return 1; 1582 } 1583 1584 /* 1585 * walk down reloc tree to find relocated block of lowest level 1586 */ 1587 static noinline_for_stack 1588 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1589 int *level) 1590 { 1591 struct btrfs_fs_info *fs_info = root->fs_info; 1592 struct extent_buffer *eb = NULL; 1593 int i; 1594 u64 bytenr; 1595 u64 ptr_gen = 0; 1596 u64 last_snapshot; 1597 u32 nritems; 1598 1599 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1600 1601 for (i = *level; i > 0; i--) { 1602 struct btrfs_key first_key; 1603 1604 eb = path->nodes[i]; 1605 nritems = btrfs_header_nritems(eb); 1606 while (path->slots[i] < nritems) { 1607 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1608 if (ptr_gen > last_snapshot) 1609 break; 1610 path->slots[i]++; 1611 } 1612 if (path->slots[i] >= nritems) { 1613 if (i == *level) 1614 break; 1615 *level = i + 1; 1616 return 0; 1617 } 1618 if (i == 1) { 1619 *level = i; 1620 return 0; 1621 } 1622 1623 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1624 btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]); 1625 eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1, 1626 &first_key); 1627 if (IS_ERR(eb)) { 1628 return PTR_ERR(eb); 1629 } else if (!extent_buffer_uptodate(eb)) { 1630 free_extent_buffer(eb); 1631 return -EIO; 1632 } 1633 BUG_ON(btrfs_header_level(eb) != i - 1); 1634 path->nodes[i - 1] = eb; 1635 path->slots[i - 1] = 0; 1636 } 1637 return 1; 1638 } 1639 1640 /* 1641 * invalidate extent cache for file extents whose key in range of 1642 * [min_key, max_key) 1643 */ 1644 static int invalidate_extent_cache(struct btrfs_root *root, 1645 struct btrfs_key *min_key, 1646 struct btrfs_key *max_key) 1647 { 1648 struct btrfs_fs_info *fs_info = root->fs_info; 1649 struct inode *inode = NULL; 1650 u64 objectid; 1651 u64 start, end; 1652 u64 ino; 1653 1654 objectid = min_key->objectid; 1655 while (1) { 1656 cond_resched(); 1657 iput(inode); 1658 1659 if (objectid > max_key->objectid) 1660 break; 1661 1662 inode = find_next_inode(root, objectid); 1663 if (!inode) 1664 break; 1665 ino = btrfs_ino(BTRFS_I(inode)); 1666 1667 if (ino > max_key->objectid) { 1668 iput(inode); 1669 break; 1670 } 1671 1672 objectid = ino + 1; 1673 if (!S_ISREG(inode->i_mode)) 1674 continue; 1675 1676 if (unlikely(min_key->objectid == ino)) { 1677 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1678 continue; 1679 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1680 start = 0; 1681 else { 1682 start = min_key->offset; 1683 WARN_ON(!IS_ALIGNED(start, fs_info->sectorsize)); 1684 } 1685 } else { 1686 start = 0; 1687 } 1688 1689 if (unlikely(max_key->objectid == ino)) { 1690 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1691 continue; 1692 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1693 end = (u64)-1; 1694 } else { 1695 if (max_key->offset == 0) 1696 continue; 1697 end = max_key->offset; 1698 WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize)); 1699 end--; 1700 } 1701 } else { 1702 end = (u64)-1; 1703 } 1704 1705 /* the lock_extent waits for readpage to complete */ 1706 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 1707 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1); 1708 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 1709 } 1710 return 0; 1711 } 1712 1713 static int find_next_key(struct btrfs_path *path, int level, 1714 struct btrfs_key *key) 1715 1716 { 1717 while (level < BTRFS_MAX_LEVEL) { 1718 if (!path->nodes[level]) 1719 break; 1720 if (path->slots[level] + 1 < 1721 btrfs_header_nritems(path->nodes[level])) { 1722 btrfs_node_key_to_cpu(path->nodes[level], key, 1723 path->slots[level] + 1); 1724 return 0; 1725 } 1726 level++; 1727 } 1728 return 1; 1729 } 1730 1731 /* 1732 * Insert current subvolume into reloc_control::dirty_subvol_roots 1733 */ 1734 static void insert_dirty_subvol(struct btrfs_trans_handle *trans, 1735 struct reloc_control *rc, 1736 struct btrfs_root *root) 1737 { 1738 struct btrfs_root *reloc_root = root->reloc_root; 1739 struct btrfs_root_item *reloc_root_item; 1740 1741 /* @root must be a subvolume tree root with a valid reloc tree */ 1742 ASSERT(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1743 ASSERT(reloc_root); 1744 1745 reloc_root_item = &reloc_root->root_item; 1746 memset(&reloc_root_item->drop_progress, 0, 1747 sizeof(reloc_root_item->drop_progress)); 1748 reloc_root_item->drop_level = 0; 1749 btrfs_set_root_refs(reloc_root_item, 0); 1750 btrfs_update_reloc_root(trans, root); 1751 1752 if (list_empty(&root->reloc_dirty_list)) { 1753 btrfs_grab_root(root); 1754 list_add_tail(&root->reloc_dirty_list, &rc->dirty_subvol_roots); 1755 } 1756 } 1757 1758 static int clean_dirty_subvols(struct reloc_control *rc) 1759 { 1760 struct btrfs_root *root; 1761 struct btrfs_root *next; 1762 int ret = 0; 1763 int ret2; 1764 1765 list_for_each_entry_safe(root, next, &rc->dirty_subvol_roots, 1766 reloc_dirty_list) { 1767 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1768 /* Merged subvolume, cleanup its reloc root */ 1769 struct btrfs_root *reloc_root = root->reloc_root; 1770 1771 list_del_init(&root->reloc_dirty_list); 1772 root->reloc_root = NULL; 1773 /* 1774 * Need barrier to ensure clear_bit() only happens after 1775 * root->reloc_root = NULL. Pairs with have_reloc_root. 1776 */ 1777 smp_wmb(); 1778 clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); 1779 if (reloc_root) { 1780 /* 1781 * btrfs_drop_snapshot drops our ref we hold for 1782 * ->reloc_root. If it fails however we must 1783 * drop the ref ourselves. 1784 */ 1785 ret2 = btrfs_drop_snapshot(reloc_root, 0, 1); 1786 if (ret2 < 0) { 1787 btrfs_put_root(reloc_root); 1788 if (!ret) 1789 ret = ret2; 1790 } 1791 } 1792 btrfs_put_root(root); 1793 } else { 1794 /* Orphan reloc tree, just clean it up */ 1795 ret2 = btrfs_drop_snapshot(root, 0, 1); 1796 if (ret2 < 0) { 1797 btrfs_put_root(root); 1798 if (!ret) 1799 ret = ret2; 1800 } 1801 } 1802 } 1803 return ret; 1804 } 1805 1806 /* 1807 * merge the relocated tree blocks in reloc tree with corresponding 1808 * fs tree. 1809 */ 1810 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 1811 struct btrfs_root *root) 1812 { 1813 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 1814 struct btrfs_key key; 1815 struct btrfs_key next_key; 1816 struct btrfs_trans_handle *trans = NULL; 1817 struct btrfs_root *reloc_root; 1818 struct btrfs_root_item *root_item; 1819 struct btrfs_path *path; 1820 struct extent_buffer *leaf; 1821 int level; 1822 int max_level; 1823 int replaced = 0; 1824 int ret; 1825 int err = 0; 1826 u32 min_reserved; 1827 1828 path = btrfs_alloc_path(); 1829 if (!path) 1830 return -ENOMEM; 1831 path->reada = READA_FORWARD; 1832 1833 reloc_root = root->reloc_root; 1834 root_item = &reloc_root->root_item; 1835 1836 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 1837 level = btrfs_root_level(root_item); 1838 atomic_inc(&reloc_root->node->refs); 1839 path->nodes[level] = reloc_root->node; 1840 path->slots[level] = 0; 1841 } else { 1842 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 1843 1844 level = root_item->drop_level; 1845 BUG_ON(level == 0); 1846 path->lowest_level = level; 1847 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 1848 path->lowest_level = 0; 1849 if (ret < 0) { 1850 btrfs_free_path(path); 1851 return ret; 1852 } 1853 1854 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 1855 path->slots[level]); 1856 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 1857 1858 btrfs_unlock_up_safe(path, 0); 1859 } 1860 1861 min_reserved = fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 1862 memset(&next_key, 0, sizeof(next_key)); 1863 1864 while (1) { 1865 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, 1866 BTRFS_RESERVE_FLUSH_ALL); 1867 if (ret) { 1868 err = ret; 1869 goto out; 1870 } 1871 trans = btrfs_start_transaction(root, 0); 1872 if (IS_ERR(trans)) { 1873 err = PTR_ERR(trans); 1874 trans = NULL; 1875 goto out; 1876 } 1877 1878 /* 1879 * At this point we no longer have a reloc_control, so we can't 1880 * depend on btrfs_init_reloc_root to update our last_trans. 1881 * 1882 * But that's ok, we started the trans handle on our 1883 * corresponding fs_root, which means it's been added to the 1884 * dirty list. At commit time we'll still call 1885 * btrfs_update_reloc_root() and update our root item 1886 * appropriately. 1887 */ 1888 reloc_root->last_trans = trans->transid; 1889 trans->block_rsv = rc->block_rsv; 1890 1891 replaced = 0; 1892 max_level = level; 1893 1894 ret = walk_down_reloc_tree(reloc_root, path, &level); 1895 if (ret < 0) { 1896 err = ret; 1897 goto out; 1898 } 1899 if (ret > 0) 1900 break; 1901 1902 if (!find_next_key(path, level, &key) && 1903 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 1904 ret = 0; 1905 } else { 1906 ret = replace_path(trans, rc, root, reloc_root, path, 1907 &next_key, level, max_level); 1908 } 1909 if (ret < 0) { 1910 err = ret; 1911 goto out; 1912 } 1913 1914 if (ret > 0) { 1915 level = ret; 1916 btrfs_node_key_to_cpu(path->nodes[level], &key, 1917 path->slots[level]); 1918 replaced = 1; 1919 } 1920 1921 ret = walk_up_reloc_tree(reloc_root, path, &level); 1922 if (ret > 0) 1923 break; 1924 1925 BUG_ON(level == 0); 1926 /* 1927 * save the merging progress in the drop_progress. 1928 * this is OK since root refs == 1 in this case. 1929 */ 1930 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 1931 path->slots[level]); 1932 root_item->drop_level = level; 1933 1934 btrfs_end_transaction_throttle(trans); 1935 trans = NULL; 1936 1937 btrfs_btree_balance_dirty(fs_info); 1938 1939 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1940 invalidate_extent_cache(root, &key, &next_key); 1941 } 1942 1943 /* 1944 * handle the case only one block in the fs tree need to be 1945 * relocated and the block is tree root. 1946 */ 1947 leaf = btrfs_lock_root_node(root); 1948 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 1949 btrfs_tree_unlock(leaf); 1950 free_extent_buffer(leaf); 1951 if (ret < 0) 1952 err = ret; 1953 out: 1954 btrfs_free_path(path); 1955 1956 if (err == 0) 1957 insert_dirty_subvol(trans, rc, root); 1958 1959 if (trans) 1960 btrfs_end_transaction_throttle(trans); 1961 1962 btrfs_btree_balance_dirty(fs_info); 1963 1964 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1965 invalidate_extent_cache(root, &key, &next_key); 1966 1967 return err; 1968 } 1969 1970 static noinline_for_stack 1971 int prepare_to_merge(struct reloc_control *rc, int err) 1972 { 1973 struct btrfs_root *root = rc->extent_root; 1974 struct btrfs_fs_info *fs_info = root->fs_info; 1975 struct btrfs_root *reloc_root; 1976 struct btrfs_trans_handle *trans; 1977 LIST_HEAD(reloc_roots); 1978 u64 num_bytes = 0; 1979 int ret; 1980 1981 mutex_lock(&fs_info->reloc_mutex); 1982 rc->merging_rsv_size += fs_info->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 1983 rc->merging_rsv_size += rc->nodes_relocated * 2; 1984 mutex_unlock(&fs_info->reloc_mutex); 1985 1986 again: 1987 if (!err) { 1988 num_bytes = rc->merging_rsv_size; 1989 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, 1990 BTRFS_RESERVE_FLUSH_ALL); 1991 if (ret) 1992 err = ret; 1993 } 1994 1995 trans = btrfs_join_transaction(rc->extent_root); 1996 if (IS_ERR(trans)) { 1997 if (!err) 1998 btrfs_block_rsv_release(fs_info, rc->block_rsv, 1999 num_bytes, NULL); 2000 return PTR_ERR(trans); 2001 } 2002 2003 if (!err) { 2004 if (num_bytes != rc->merging_rsv_size) { 2005 btrfs_end_transaction(trans); 2006 btrfs_block_rsv_release(fs_info, rc->block_rsv, 2007 num_bytes, NULL); 2008 goto again; 2009 } 2010 } 2011 2012 rc->merge_reloc_tree = 1; 2013 2014 while (!list_empty(&rc->reloc_roots)) { 2015 reloc_root = list_entry(rc->reloc_roots.next, 2016 struct btrfs_root, root_list); 2017 list_del_init(&reloc_root->root_list); 2018 2019 root = read_fs_root(fs_info, reloc_root->root_key.offset); 2020 BUG_ON(IS_ERR(root)); 2021 BUG_ON(root->reloc_root != reloc_root); 2022 2023 /* 2024 * set reference count to 1, so btrfs_recover_relocation 2025 * knows it should resumes merging 2026 */ 2027 if (!err) 2028 btrfs_set_root_refs(&reloc_root->root_item, 1); 2029 btrfs_update_reloc_root(trans, root); 2030 2031 list_add(&reloc_root->root_list, &reloc_roots); 2032 btrfs_put_root(root); 2033 } 2034 2035 list_splice(&reloc_roots, &rc->reloc_roots); 2036 2037 if (!err) 2038 btrfs_commit_transaction(trans); 2039 else 2040 btrfs_end_transaction(trans); 2041 return err; 2042 } 2043 2044 static noinline_for_stack 2045 void free_reloc_roots(struct list_head *list) 2046 { 2047 struct btrfs_root *reloc_root; 2048 2049 while (!list_empty(list)) { 2050 reloc_root = list_entry(list->next, struct btrfs_root, 2051 root_list); 2052 __del_reloc_root(reloc_root); 2053 } 2054 } 2055 2056 static noinline_for_stack 2057 void merge_reloc_roots(struct reloc_control *rc) 2058 { 2059 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 2060 struct btrfs_root *root; 2061 struct btrfs_root *reloc_root; 2062 LIST_HEAD(reloc_roots); 2063 int found = 0; 2064 int ret = 0; 2065 again: 2066 root = rc->extent_root; 2067 2068 /* 2069 * this serializes us with btrfs_record_root_in_transaction, 2070 * we have to make sure nobody is in the middle of 2071 * adding their roots to the list while we are 2072 * doing this splice 2073 */ 2074 mutex_lock(&fs_info->reloc_mutex); 2075 list_splice_init(&rc->reloc_roots, &reloc_roots); 2076 mutex_unlock(&fs_info->reloc_mutex); 2077 2078 while (!list_empty(&reloc_roots)) { 2079 found = 1; 2080 reloc_root = list_entry(reloc_roots.next, 2081 struct btrfs_root, root_list); 2082 2083 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 2084 root = read_fs_root(fs_info, 2085 reloc_root->root_key.offset); 2086 BUG_ON(IS_ERR(root)); 2087 BUG_ON(root->reloc_root != reloc_root); 2088 2089 ret = merge_reloc_root(rc, root); 2090 btrfs_put_root(root); 2091 if (ret) { 2092 if (list_empty(&reloc_root->root_list)) 2093 list_add_tail(&reloc_root->root_list, 2094 &reloc_roots); 2095 goto out; 2096 } 2097 } else { 2098 list_del_init(&reloc_root->root_list); 2099 /* Don't forget to queue this reloc root for cleanup */ 2100 list_add_tail(&reloc_root->reloc_dirty_list, 2101 &rc->dirty_subvol_roots); 2102 } 2103 } 2104 2105 if (found) { 2106 found = 0; 2107 goto again; 2108 } 2109 out: 2110 if (ret) { 2111 btrfs_handle_fs_error(fs_info, ret, NULL); 2112 if (!list_empty(&reloc_roots)) 2113 free_reloc_roots(&reloc_roots); 2114 2115 /* new reloc root may be added */ 2116 mutex_lock(&fs_info->reloc_mutex); 2117 list_splice_init(&rc->reloc_roots, &reloc_roots); 2118 mutex_unlock(&fs_info->reloc_mutex); 2119 if (!list_empty(&reloc_roots)) 2120 free_reloc_roots(&reloc_roots); 2121 } 2122 2123 /* 2124 * We used to have 2125 * 2126 * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 2127 * 2128 * here, but it's wrong. If we fail to start the transaction in 2129 * prepare_to_merge() we will have only 0 ref reloc roots, none of which 2130 * have actually been removed from the reloc_root_tree rb tree. This is 2131 * fine because we're bailing here, and we hold a reference on the root 2132 * for the list that holds it, so these roots will be cleaned up when we 2133 * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root 2134 * will be cleaned up on unmount. 2135 * 2136 * The remaining nodes will be cleaned up by free_reloc_control. 2137 */ 2138 } 2139 2140 static void free_block_list(struct rb_root *blocks) 2141 { 2142 struct tree_block *block; 2143 struct rb_node *rb_node; 2144 while ((rb_node = rb_first(blocks))) { 2145 block = rb_entry(rb_node, struct tree_block, rb_node); 2146 rb_erase(rb_node, blocks); 2147 kfree(block); 2148 } 2149 } 2150 2151 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 2152 struct btrfs_root *reloc_root) 2153 { 2154 struct btrfs_fs_info *fs_info = reloc_root->fs_info; 2155 struct btrfs_root *root; 2156 int ret; 2157 2158 if (reloc_root->last_trans == trans->transid) 2159 return 0; 2160 2161 root = read_fs_root(fs_info, reloc_root->root_key.offset); 2162 BUG_ON(IS_ERR(root)); 2163 BUG_ON(root->reloc_root != reloc_root); 2164 ret = btrfs_record_root_in_trans(trans, root); 2165 btrfs_put_root(root); 2166 2167 return ret; 2168 } 2169 2170 static noinline_for_stack 2171 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 2172 struct reloc_control *rc, 2173 struct btrfs_backref_node *node, 2174 struct btrfs_backref_edge *edges[]) 2175 { 2176 struct btrfs_backref_node *next; 2177 struct btrfs_root *root; 2178 int index = 0; 2179 2180 next = node; 2181 while (1) { 2182 cond_resched(); 2183 next = walk_up_backref(next, edges, &index); 2184 root = next->root; 2185 BUG_ON(!root); 2186 BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state)); 2187 2188 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2189 record_reloc_root_in_trans(trans, root); 2190 break; 2191 } 2192 2193 btrfs_record_root_in_trans(trans, root); 2194 root = root->reloc_root; 2195 2196 if (next->new_bytenr != root->node->start) { 2197 BUG_ON(next->new_bytenr); 2198 BUG_ON(!list_empty(&next->list)); 2199 next->new_bytenr = root->node->start; 2200 btrfs_put_root(next->root); 2201 next->root = btrfs_grab_root(root); 2202 ASSERT(next->root); 2203 list_add_tail(&next->list, 2204 &rc->backref_cache.changed); 2205 mark_block_processed(rc, next); 2206 break; 2207 } 2208 2209 WARN_ON(1); 2210 root = NULL; 2211 next = walk_down_backref(edges, &index); 2212 if (!next || next->level <= node->level) 2213 break; 2214 } 2215 if (!root) 2216 return NULL; 2217 2218 next = node; 2219 /* setup backref node path for btrfs_reloc_cow_block */ 2220 while (1) { 2221 rc->backref_cache.path[next->level] = next; 2222 if (--index < 0) 2223 break; 2224 next = edges[index]->node[UPPER]; 2225 } 2226 return root; 2227 } 2228 2229 /* 2230 * select a tree root for relocation. return NULL if the block 2231 * is reference counted. we should use do_relocation() in this 2232 * case. return a tree root pointer if the block isn't reference 2233 * counted. return -ENOENT if the block is root of reloc tree. 2234 */ 2235 static noinline_for_stack 2236 struct btrfs_root *select_one_root(struct btrfs_backref_node *node) 2237 { 2238 struct btrfs_backref_node *next; 2239 struct btrfs_root *root; 2240 struct btrfs_root *fs_root = NULL; 2241 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2242 int index = 0; 2243 2244 next = node; 2245 while (1) { 2246 cond_resched(); 2247 next = walk_up_backref(next, edges, &index); 2248 root = next->root; 2249 BUG_ON(!root); 2250 2251 /* no other choice for non-references counted tree */ 2252 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 2253 return root; 2254 2255 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) 2256 fs_root = root; 2257 2258 if (next != node) 2259 return NULL; 2260 2261 next = walk_down_backref(edges, &index); 2262 if (!next || next->level <= node->level) 2263 break; 2264 } 2265 2266 if (!fs_root) 2267 return ERR_PTR(-ENOENT); 2268 return fs_root; 2269 } 2270 2271 static noinline_for_stack 2272 u64 calcu_metadata_size(struct reloc_control *rc, 2273 struct btrfs_backref_node *node, int reserve) 2274 { 2275 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 2276 struct btrfs_backref_node *next = node; 2277 struct btrfs_backref_edge *edge; 2278 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2279 u64 num_bytes = 0; 2280 int index = 0; 2281 2282 BUG_ON(reserve && node->processed); 2283 2284 while (next) { 2285 cond_resched(); 2286 while (1) { 2287 if (next->processed && (reserve || next != node)) 2288 break; 2289 2290 num_bytes += fs_info->nodesize; 2291 2292 if (list_empty(&next->upper)) 2293 break; 2294 2295 edge = list_entry(next->upper.next, 2296 struct btrfs_backref_edge, list[LOWER]); 2297 edges[index++] = edge; 2298 next = edge->node[UPPER]; 2299 } 2300 next = walk_down_backref(edges, &index); 2301 } 2302 return num_bytes; 2303 } 2304 2305 static int reserve_metadata_space(struct btrfs_trans_handle *trans, 2306 struct reloc_control *rc, 2307 struct btrfs_backref_node *node) 2308 { 2309 struct btrfs_root *root = rc->extent_root; 2310 struct btrfs_fs_info *fs_info = root->fs_info; 2311 u64 num_bytes; 2312 int ret; 2313 u64 tmp; 2314 2315 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2316 2317 trans->block_rsv = rc->block_rsv; 2318 rc->reserved_bytes += num_bytes; 2319 2320 /* 2321 * We are under a transaction here so we can only do limited flushing. 2322 * If we get an enospc just kick back -EAGAIN so we know to drop the 2323 * transaction and try to refill when we can flush all the things. 2324 */ 2325 ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, 2326 BTRFS_RESERVE_FLUSH_LIMIT); 2327 if (ret) { 2328 tmp = fs_info->nodesize * RELOCATION_RESERVED_NODES; 2329 while (tmp <= rc->reserved_bytes) 2330 tmp <<= 1; 2331 /* 2332 * only one thread can access block_rsv at this point, 2333 * so we don't need hold lock to protect block_rsv. 2334 * we expand more reservation size here to allow enough 2335 * space for relocation and we will return earlier in 2336 * enospc case. 2337 */ 2338 rc->block_rsv->size = tmp + fs_info->nodesize * 2339 RELOCATION_RESERVED_NODES; 2340 return -EAGAIN; 2341 } 2342 2343 return 0; 2344 } 2345 2346 /* 2347 * relocate a block tree, and then update pointers in upper level 2348 * blocks that reference the block to point to the new location. 2349 * 2350 * if called by link_to_upper, the block has already been relocated. 2351 * in that case this function just updates pointers. 2352 */ 2353 static int do_relocation(struct btrfs_trans_handle *trans, 2354 struct reloc_control *rc, 2355 struct btrfs_backref_node *node, 2356 struct btrfs_key *key, 2357 struct btrfs_path *path, int lowest) 2358 { 2359 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 2360 struct btrfs_backref_node *upper; 2361 struct btrfs_backref_edge *edge; 2362 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2363 struct btrfs_root *root; 2364 struct extent_buffer *eb; 2365 u32 blocksize; 2366 u64 bytenr; 2367 u64 generation; 2368 int slot; 2369 int ret; 2370 int err = 0; 2371 2372 BUG_ON(lowest && node->eb); 2373 2374 path->lowest_level = node->level + 1; 2375 rc->backref_cache.path[node->level] = node; 2376 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2377 struct btrfs_key first_key; 2378 struct btrfs_ref ref = { 0 }; 2379 2380 cond_resched(); 2381 2382 upper = edge->node[UPPER]; 2383 root = select_reloc_root(trans, rc, upper, edges); 2384 BUG_ON(!root); 2385 2386 if (upper->eb && !upper->locked) { 2387 if (!lowest) { 2388 ret = btrfs_bin_search(upper->eb, key, 2389 upper->level, &slot); 2390 if (ret < 0) { 2391 err = ret; 2392 goto next; 2393 } 2394 BUG_ON(ret); 2395 bytenr = btrfs_node_blockptr(upper->eb, slot); 2396 if (node->eb->start == bytenr) 2397 goto next; 2398 } 2399 btrfs_backref_drop_node_buffer(upper); 2400 } 2401 2402 if (!upper->eb) { 2403 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2404 if (ret) { 2405 if (ret < 0) 2406 err = ret; 2407 else 2408 err = -ENOENT; 2409 2410 btrfs_release_path(path); 2411 break; 2412 } 2413 2414 if (!upper->eb) { 2415 upper->eb = path->nodes[upper->level]; 2416 path->nodes[upper->level] = NULL; 2417 } else { 2418 BUG_ON(upper->eb != path->nodes[upper->level]); 2419 } 2420 2421 upper->locked = 1; 2422 path->locks[upper->level] = 0; 2423 2424 slot = path->slots[upper->level]; 2425 btrfs_release_path(path); 2426 } else { 2427 ret = btrfs_bin_search(upper->eb, key, upper->level, 2428 &slot); 2429 if (ret < 0) { 2430 err = ret; 2431 goto next; 2432 } 2433 BUG_ON(ret); 2434 } 2435 2436 bytenr = btrfs_node_blockptr(upper->eb, slot); 2437 if (lowest) { 2438 if (bytenr != node->bytenr) { 2439 btrfs_err(root->fs_info, 2440 "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu", 2441 bytenr, node->bytenr, slot, 2442 upper->eb->start); 2443 err = -EIO; 2444 goto next; 2445 } 2446 } else { 2447 if (node->eb->start == bytenr) 2448 goto next; 2449 } 2450 2451 blocksize = root->fs_info->nodesize; 2452 generation = btrfs_node_ptr_generation(upper->eb, slot); 2453 btrfs_node_key_to_cpu(upper->eb, &first_key, slot); 2454 eb = read_tree_block(fs_info, bytenr, generation, 2455 upper->level - 1, &first_key); 2456 if (IS_ERR(eb)) { 2457 err = PTR_ERR(eb); 2458 goto next; 2459 } else if (!extent_buffer_uptodate(eb)) { 2460 free_extent_buffer(eb); 2461 err = -EIO; 2462 goto next; 2463 } 2464 btrfs_tree_lock(eb); 2465 btrfs_set_lock_blocking_write(eb); 2466 2467 if (!node->eb) { 2468 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2469 slot, &eb); 2470 btrfs_tree_unlock(eb); 2471 free_extent_buffer(eb); 2472 if (ret < 0) { 2473 err = ret; 2474 goto next; 2475 } 2476 BUG_ON(node->eb != eb); 2477 } else { 2478 btrfs_set_node_blockptr(upper->eb, slot, 2479 node->eb->start); 2480 btrfs_set_node_ptr_generation(upper->eb, slot, 2481 trans->transid); 2482 btrfs_mark_buffer_dirty(upper->eb); 2483 2484 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, 2485 node->eb->start, blocksize, 2486 upper->eb->start); 2487 ref.real_root = root->root_key.objectid; 2488 btrfs_init_tree_ref(&ref, node->level, 2489 btrfs_header_owner(upper->eb)); 2490 ret = btrfs_inc_extent_ref(trans, &ref); 2491 BUG_ON(ret); 2492 2493 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2494 BUG_ON(ret); 2495 } 2496 next: 2497 if (!upper->pending) 2498 btrfs_backref_drop_node_buffer(upper); 2499 else 2500 btrfs_backref_unlock_node_buffer(upper); 2501 if (err) 2502 break; 2503 } 2504 2505 if (!err && node->pending) { 2506 btrfs_backref_drop_node_buffer(node); 2507 list_move_tail(&node->list, &rc->backref_cache.changed); 2508 node->pending = 0; 2509 } 2510 2511 path->lowest_level = 0; 2512 BUG_ON(err == -ENOSPC); 2513 return err; 2514 } 2515 2516 static int link_to_upper(struct btrfs_trans_handle *trans, 2517 struct reloc_control *rc, 2518 struct btrfs_backref_node *node, 2519 struct btrfs_path *path) 2520 { 2521 struct btrfs_key key; 2522 2523 btrfs_node_key_to_cpu(node->eb, &key, 0); 2524 return do_relocation(trans, rc, node, &key, path, 0); 2525 } 2526 2527 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2528 struct reloc_control *rc, 2529 struct btrfs_path *path, int err) 2530 { 2531 LIST_HEAD(list); 2532 struct btrfs_backref_cache *cache = &rc->backref_cache; 2533 struct btrfs_backref_node *node; 2534 int level; 2535 int ret; 2536 2537 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2538 while (!list_empty(&cache->pending[level])) { 2539 node = list_entry(cache->pending[level].next, 2540 struct btrfs_backref_node, list); 2541 list_move_tail(&node->list, &list); 2542 BUG_ON(!node->pending); 2543 2544 if (!err) { 2545 ret = link_to_upper(trans, rc, node, path); 2546 if (ret < 0) 2547 err = ret; 2548 } 2549 } 2550 list_splice_init(&list, &cache->pending[level]); 2551 } 2552 return err; 2553 } 2554 2555 /* 2556 * mark a block and all blocks directly/indirectly reference the block 2557 * as processed. 2558 */ 2559 static void update_processed_blocks(struct reloc_control *rc, 2560 struct btrfs_backref_node *node) 2561 { 2562 struct btrfs_backref_node *next = node; 2563 struct btrfs_backref_edge *edge; 2564 struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2565 int index = 0; 2566 2567 while (next) { 2568 cond_resched(); 2569 while (1) { 2570 if (next->processed) 2571 break; 2572 2573 mark_block_processed(rc, next); 2574 2575 if (list_empty(&next->upper)) 2576 break; 2577 2578 edge = list_entry(next->upper.next, 2579 struct btrfs_backref_edge, list[LOWER]); 2580 edges[index++] = edge; 2581 next = edge->node[UPPER]; 2582 } 2583 next = walk_down_backref(edges, &index); 2584 } 2585 } 2586 2587 static int tree_block_processed(u64 bytenr, struct reloc_control *rc) 2588 { 2589 u32 blocksize = rc->extent_root->fs_info->nodesize; 2590 2591 if (test_range_bit(&rc->processed_blocks, bytenr, 2592 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2593 return 1; 2594 return 0; 2595 } 2596 2597 static int get_tree_block_key(struct btrfs_fs_info *fs_info, 2598 struct tree_block *block) 2599 { 2600 struct extent_buffer *eb; 2601 2602 eb = read_tree_block(fs_info, block->bytenr, block->key.offset, 2603 block->level, NULL); 2604 if (IS_ERR(eb)) { 2605 return PTR_ERR(eb); 2606 } else if (!extent_buffer_uptodate(eb)) { 2607 free_extent_buffer(eb); 2608 return -EIO; 2609 } 2610 if (block->level == 0) 2611 btrfs_item_key_to_cpu(eb, &block->key, 0); 2612 else 2613 btrfs_node_key_to_cpu(eb, &block->key, 0); 2614 free_extent_buffer(eb); 2615 block->key_ready = 1; 2616 return 0; 2617 } 2618 2619 /* 2620 * helper function to relocate a tree block 2621 */ 2622 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2623 struct reloc_control *rc, 2624 struct btrfs_backref_node *node, 2625 struct btrfs_key *key, 2626 struct btrfs_path *path) 2627 { 2628 struct btrfs_root *root; 2629 int ret = 0; 2630 2631 if (!node) 2632 return 0; 2633 2634 /* 2635 * If we fail here we want to drop our backref_node because we are going 2636 * to start over and regenerate the tree for it. 2637 */ 2638 ret = reserve_metadata_space(trans, rc, node); 2639 if (ret) 2640 goto out; 2641 2642 BUG_ON(node->processed); 2643 root = select_one_root(node); 2644 if (root == ERR_PTR(-ENOENT)) { 2645 update_processed_blocks(rc, node); 2646 goto out; 2647 } 2648 2649 if (root) { 2650 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { 2651 BUG_ON(node->new_bytenr); 2652 BUG_ON(!list_empty(&node->list)); 2653 btrfs_record_root_in_trans(trans, root); 2654 root = root->reloc_root; 2655 node->new_bytenr = root->node->start; 2656 btrfs_put_root(node->root); 2657 node->root = btrfs_grab_root(root); 2658 ASSERT(node->root); 2659 list_add_tail(&node->list, &rc->backref_cache.changed); 2660 } else { 2661 path->lowest_level = node->level; 2662 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2663 btrfs_release_path(path); 2664 if (ret > 0) 2665 ret = 0; 2666 } 2667 if (!ret) 2668 update_processed_blocks(rc, node); 2669 } else { 2670 ret = do_relocation(trans, rc, node, key, path, 1); 2671 } 2672 out: 2673 if (ret || node->level == 0 || node->cowonly) 2674 btrfs_backref_cleanup_node(&rc->backref_cache, node); 2675 return ret; 2676 } 2677 2678 /* 2679 * relocate a list of blocks 2680 */ 2681 static noinline_for_stack 2682 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2683 struct reloc_control *rc, struct rb_root *blocks) 2684 { 2685 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 2686 struct btrfs_backref_node *node; 2687 struct btrfs_path *path; 2688 struct tree_block *block; 2689 struct tree_block *next; 2690 int ret; 2691 int err = 0; 2692 2693 path = btrfs_alloc_path(); 2694 if (!path) { 2695 err = -ENOMEM; 2696 goto out_free_blocks; 2697 } 2698 2699 /* Kick in readahead for tree blocks with missing keys */ 2700 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) { 2701 if (!block->key_ready) 2702 readahead_tree_block(fs_info, block->bytenr); 2703 } 2704 2705 /* Get first keys */ 2706 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) { 2707 if (!block->key_ready) { 2708 err = get_tree_block_key(fs_info, block); 2709 if (err) 2710 goto out_free_path; 2711 } 2712 } 2713 2714 /* Do tree relocation */ 2715 rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) { 2716 node = build_backref_tree(rc, &block->key, 2717 block->level, block->bytenr); 2718 if (IS_ERR(node)) { 2719 err = PTR_ERR(node); 2720 goto out; 2721 } 2722 2723 ret = relocate_tree_block(trans, rc, node, &block->key, 2724 path); 2725 if (ret < 0) { 2726 err = ret; 2727 break; 2728 } 2729 } 2730 out: 2731 err = finish_pending_nodes(trans, rc, path, err); 2732 2733 out_free_path: 2734 btrfs_free_path(path); 2735 out_free_blocks: 2736 free_block_list(blocks); 2737 return err; 2738 } 2739 2740 static noinline_for_stack 2741 int prealloc_file_extent_cluster(struct inode *inode, 2742 struct file_extent_cluster *cluster) 2743 { 2744 u64 alloc_hint = 0; 2745 u64 start; 2746 u64 end; 2747 u64 offset = BTRFS_I(inode)->index_cnt; 2748 u64 num_bytes; 2749 int nr = 0; 2750 int ret = 0; 2751 u64 prealloc_start = cluster->start - offset; 2752 u64 prealloc_end = cluster->end - offset; 2753 u64 cur_offset; 2754 struct extent_changeset *data_reserved = NULL; 2755 2756 BUG_ON(cluster->start != cluster->boundary[0]); 2757 inode_lock(inode); 2758 2759 ret = btrfs_check_data_free_space(inode, &data_reserved, prealloc_start, 2760 prealloc_end + 1 - prealloc_start); 2761 if (ret) 2762 goto out; 2763 2764 cur_offset = prealloc_start; 2765 while (nr < cluster->nr) { 2766 start = cluster->boundary[nr] - offset; 2767 if (nr + 1 < cluster->nr) 2768 end = cluster->boundary[nr + 1] - 1 - offset; 2769 else 2770 end = cluster->end - offset; 2771 2772 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 2773 num_bytes = end + 1 - start; 2774 if (cur_offset < start) 2775 btrfs_free_reserved_data_space(inode, data_reserved, 2776 cur_offset, start - cur_offset); 2777 ret = btrfs_prealloc_file_range(inode, 0, start, 2778 num_bytes, num_bytes, 2779 end + 1, &alloc_hint); 2780 cur_offset = end + 1; 2781 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 2782 if (ret) 2783 break; 2784 nr++; 2785 } 2786 if (cur_offset < prealloc_end) 2787 btrfs_free_reserved_data_space(inode, data_reserved, 2788 cur_offset, prealloc_end + 1 - cur_offset); 2789 out: 2790 inode_unlock(inode); 2791 extent_changeset_free(data_reserved); 2792 return ret; 2793 } 2794 2795 static noinline_for_stack 2796 int setup_extent_mapping(struct inode *inode, u64 start, u64 end, 2797 u64 block_start) 2798 { 2799 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2800 struct extent_map *em; 2801 int ret = 0; 2802 2803 em = alloc_extent_map(); 2804 if (!em) 2805 return -ENOMEM; 2806 2807 em->start = start; 2808 em->len = end + 1 - start; 2809 em->block_len = em->len; 2810 em->block_start = block_start; 2811 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2812 2813 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 2814 while (1) { 2815 write_lock(&em_tree->lock); 2816 ret = add_extent_mapping(em_tree, em, 0); 2817 write_unlock(&em_tree->lock); 2818 if (ret != -EEXIST) { 2819 free_extent_map(em); 2820 break; 2821 } 2822 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0); 2823 } 2824 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 2825 return ret; 2826 } 2827 2828 /* 2829 * Allow error injection to test balance cancellation 2830 */ 2831 int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info) 2832 { 2833 return atomic_read(&fs_info->balance_cancel_req); 2834 } 2835 ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE); 2836 2837 static int relocate_file_extent_cluster(struct inode *inode, 2838 struct file_extent_cluster *cluster) 2839 { 2840 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 2841 u64 page_start; 2842 u64 page_end; 2843 u64 offset = BTRFS_I(inode)->index_cnt; 2844 unsigned long index; 2845 unsigned long last_index; 2846 struct page *page; 2847 struct file_ra_state *ra; 2848 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 2849 int nr = 0; 2850 int ret = 0; 2851 2852 if (!cluster->nr) 2853 return 0; 2854 2855 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2856 if (!ra) 2857 return -ENOMEM; 2858 2859 ret = prealloc_file_extent_cluster(inode, cluster); 2860 if (ret) 2861 goto out; 2862 2863 file_ra_state_init(ra, inode->i_mapping); 2864 2865 ret = setup_extent_mapping(inode, cluster->start - offset, 2866 cluster->end - offset, cluster->start); 2867 if (ret) 2868 goto out; 2869 2870 index = (cluster->start - offset) >> PAGE_SHIFT; 2871 last_index = (cluster->end - offset) >> PAGE_SHIFT; 2872 while (index <= last_index) { 2873 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), 2874 PAGE_SIZE); 2875 if (ret) 2876 goto out; 2877 2878 page = find_lock_page(inode->i_mapping, index); 2879 if (!page) { 2880 page_cache_sync_readahead(inode->i_mapping, 2881 ra, NULL, index, 2882 last_index + 1 - index); 2883 page = find_or_create_page(inode->i_mapping, index, 2884 mask); 2885 if (!page) { 2886 btrfs_delalloc_release_metadata(BTRFS_I(inode), 2887 PAGE_SIZE, true); 2888 btrfs_delalloc_release_extents(BTRFS_I(inode), 2889 PAGE_SIZE); 2890 ret = -ENOMEM; 2891 goto out; 2892 } 2893 } 2894 2895 if (PageReadahead(page)) { 2896 page_cache_async_readahead(inode->i_mapping, 2897 ra, NULL, page, index, 2898 last_index + 1 - index); 2899 } 2900 2901 if (!PageUptodate(page)) { 2902 btrfs_readpage(NULL, page); 2903 lock_page(page); 2904 if (!PageUptodate(page)) { 2905 unlock_page(page); 2906 put_page(page); 2907 btrfs_delalloc_release_metadata(BTRFS_I(inode), 2908 PAGE_SIZE, true); 2909 btrfs_delalloc_release_extents(BTRFS_I(inode), 2910 PAGE_SIZE); 2911 ret = -EIO; 2912 goto out; 2913 } 2914 } 2915 2916 page_start = page_offset(page); 2917 page_end = page_start + PAGE_SIZE - 1; 2918 2919 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); 2920 2921 set_page_extent_mapped(page); 2922 2923 if (nr < cluster->nr && 2924 page_start + offset == cluster->boundary[nr]) { 2925 set_extent_bits(&BTRFS_I(inode)->io_tree, 2926 page_start, page_end, 2927 EXTENT_BOUNDARY); 2928 nr++; 2929 } 2930 2931 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0, 2932 NULL); 2933 if (ret) { 2934 unlock_page(page); 2935 put_page(page); 2936 btrfs_delalloc_release_metadata(BTRFS_I(inode), 2937 PAGE_SIZE, true); 2938 btrfs_delalloc_release_extents(BTRFS_I(inode), 2939 PAGE_SIZE); 2940 2941 clear_extent_bits(&BTRFS_I(inode)->io_tree, 2942 page_start, page_end, 2943 EXTENT_LOCKED | EXTENT_BOUNDARY); 2944 goto out; 2945 2946 } 2947 set_page_dirty(page); 2948 2949 unlock_extent(&BTRFS_I(inode)->io_tree, 2950 page_start, page_end); 2951 unlock_page(page); 2952 put_page(page); 2953 2954 index++; 2955 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); 2956 balance_dirty_pages_ratelimited(inode->i_mapping); 2957 btrfs_throttle(fs_info); 2958 if (btrfs_should_cancel_balance(fs_info)) { 2959 ret = -ECANCELED; 2960 goto out; 2961 } 2962 } 2963 WARN_ON(nr != cluster->nr); 2964 out: 2965 kfree(ra); 2966 return ret; 2967 } 2968 2969 static noinline_for_stack 2970 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, 2971 struct file_extent_cluster *cluster) 2972 { 2973 int ret; 2974 2975 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { 2976 ret = relocate_file_extent_cluster(inode, cluster); 2977 if (ret) 2978 return ret; 2979 cluster->nr = 0; 2980 } 2981 2982 if (!cluster->nr) 2983 cluster->start = extent_key->objectid; 2984 else 2985 BUG_ON(cluster->nr >= MAX_EXTENTS); 2986 cluster->end = extent_key->objectid + extent_key->offset - 1; 2987 cluster->boundary[cluster->nr] = extent_key->objectid; 2988 cluster->nr++; 2989 2990 if (cluster->nr >= MAX_EXTENTS) { 2991 ret = relocate_file_extent_cluster(inode, cluster); 2992 if (ret) 2993 return ret; 2994 cluster->nr = 0; 2995 } 2996 return 0; 2997 } 2998 2999 /* 3000 * helper to add a tree block to the list. 3001 * the major work is getting the generation and level of the block 3002 */ 3003 static int add_tree_block(struct reloc_control *rc, 3004 struct btrfs_key *extent_key, 3005 struct btrfs_path *path, 3006 struct rb_root *blocks) 3007 { 3008 struct extent_buffer *eb; 3009 struct btrfs_extent_item *ei; 3010 struct btrfs_tree_block_info *bi; 3011 struct tree_block *block; 3012 struct rb_node *rb_node; 3013 u32 item_size; 3014 int level = -1; 3015 u64 generation; 3016 3017 eb = path->nodes[0]; 3018 item_size = btrfs_item_size_nr(eb, path->slots[0]); 3019 3020 if (extent_key->type == BTRFS_METADATA_ITEM_KEY || 3021 item_size >= sizeof(*ei) + sizeof(*bi)) { 3022 ei = btrfs_item_ptr(eb, path->slots[0], 3023 struct btrfs_extent_item); 3024 if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) { 3025 bi = (struct btrfs_tree_block_info *)(ei + 1); 3026 level = btrfs_tree_block_level(eb, bi); 3027 } else { 3028 level = (int)extent_key->offset; 3029 } 3030 generation = btrfs_extent_generation(eb, ei); 3031 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) { 3032 btrfs_print_v0_err(eb->fs_info); 3033 btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL); 3034 return -EINVAL; 3035 } else { 3036 BUG(); 3037 } 3038 3039 btrfs_release_path(path); 3040 3041 BUG_ON(level == -1); 3042 3043 block = kmalloc(sizeof(*block), GFP_NOFS); 3044 if (!block) 3045 return -ENOMEM; 3046 3047 block->bytenr = extent_key->objectid; 3048 block->key.objectid = rc->extent_root->fs_info->nodesize; 3049 block->key.offset = generation; 3050 block->level = level; 3051 block->key_ready = 0; 3052 3053 rb_node = rb_simple_insert(blocks, block->bytenr, &block->rb_node); 3054 if (rb_node) 3055 btrfs_backref_panic(rc->extent_root->fs_info, block->bytenr, 3056 -EEXIST); 3057 3058 return 0; 3059 } 3060 3061 /* 3062 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 3063 */ 3064 static int __add_tree_block(struct reloc_control *rc, 3065 u64 bytenr, u32 blocksize, 3066 struct rb_root *blocks) 3067 { 3068 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3069 struct btrfs_path *path; 3070 struct btrfs_key key; 3071 int ret; 3072 bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA); 3073 3074 if (tree_block_processed(bytenr, rc)) 3075 return 0; 3076 3077 if (rb_simple_search(blocks, bytenr)) 3078 return 0; 3079 3080 path = btrfs_alloc_path(); 3081 if (!path) 3082 return -ENOMEM; 3083 again: 3084 key.objectid = bytenr; 3085 if (skinny) { 3086 key.type = BTRFS_METADATA_ITEM_KEY; 3087 key.offset = (u64)-1; 3088 } else { 3089 key.type = BTRFS_EXTENT_ITEM_KEY; 3090 key.offset = blocksize; 3091 } 3092 3093 path->search_commit_root = 1; 3094 path->skip_locking = 1; 3095 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3096 if (ret < 0) 3097 goto out; 3098 3099 if (ret > 0 && skinny) { 3100 if (path->slots[0]) { 3101 path->slots[0]--; 3102 btrfs_item_key_to_cpu(path->nodes[0], &key, 3103 path->slots[0]); 3104 if (key.objectid == bytenr && 3105 (key.type == BTRFS_METADATA_ITEM_KEY || 3106 (key.type == BTRFS_EXTENT_ITEM_KEY && 3107 key.offset == blocksize))) 3108 ret = 0; 3109 } 3110 3111 if (ret) { 3112 skinny = false; 3113 btrfs_release_path(path); 3114 goto again; 3115 } 3116 } 3117 if (ret) { 3118 ASSERT(ret == 1); 3119 btrfs_print_leaf(path->nodes[0]); 3120 btrfs_err(fs_info, 3121 "tree block extent item (%llu) is not found in extent tree", 3122 bytenr); 3123 WARN_ON(1); 3124 ret = -EINVAL; 3125 goto out; 3126 } 3127 3128 ret = add_tree_block(rc, &key, path, blocks); 3129 out: 3130 btrfs_free_path(path); 3131 return ret; 3132 } 3133 3134 static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3135 struct btrfs_block_group *block_group, 3136 struct inode *inode, 3137 u64 ino) 3138 { 3139 struct btrfs_key key; 3140 struct btrfs_root *root = fs_info->tree_root; 3141 struct btrfs_trans_handle *trans; 3142 int ret = 0; 3143 3144 if (inode) 3145 goto truncate; 3146 3147 key.objectid = ino; 3148 key.type = BTRFS_INODE_ITEM_KEY; 3149 key.offset = 0; 3150 3151 inode = btrfs_iget(fs_info->sb, &key, root); 3152 if (IS_ERR(inode)) 3153 return -ENOENT; 3154 3155 truncate: 3156 ret = btrfs_check_trunc_cache_free_space(fs_info, 3157 &fs_info->global_block_rsv); 3158 if (ret) 3159 goto out; 3160 3161 trans = btrfs_join_transaction(root); 3162 if (IS_ERR(trans)) { 3163 ret = PTR_ERR(trans); 3164 goto out; 3165 } 3166 3167 ret = btrfs_truncate_free_space_cache(trans, block_group, inode); 3168 3169 btrfs_end_transaction(trans); 3170 btrfs_btree_balance_dirty(fs_info); 3171 out: 3172 iput(inode); 3173 return ret; 3174 } 3175 3176 /* 3177 * Locate the free space cache EXTENT_DATA in root tree leaf and delete the 3178 * cache inode, to avoid free space cache data extent blocking data relocation. 3179 */ 3180 static int delete_v1_space_cache(struct extent_buffer *leaf, 3181 struct btrfs_block_group *block_group, 3182 u64 data_bytenr) 3183 { 3184 u64 space_cache_ino; 3185 struct btrfs_file_extent_item *ei; 3186 struct btrfs_key key; 3187 bool found = false; 3188 int i; 3189 int ret; 3190 3191 if (btrfs_header_owner(leaf) != BTRFS_ROOT_TREE_OBJECTID) 3192 return 0; 3193 3194 for (i = 0; i < btrfs_header_nritems(leaf); i++) { 3195 btrfs_item_key_to_cpu(leaf, &key, i); 3196 if (key.type != BTRFS_EXTENT_DATA_KEY) 3197 continue; 3198 ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 3199 if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG && 3200 btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) { 3201 found = true; 3202 space_cache_ino = key.objectid; 3203 break; 3204 } 3205 } 3206 if (!found) 3207 return -ENOENT; 3208 ret = delete_block_group_cache(leaf->fs_info, block_group, NULL, 3209 space_cache_ino); 3210 return ret; 3211 } 3212 3213 /* 3214 * helper to find all tree blocks that reference a given data extent 3215 */ 3216 static noinline_for_stack 3217 int add_data_references(struct reloc_control *rc, 3218 struct btrfs_key *extent_key, 3219 struct btrfs_path *path, 3220 struct rb_root *blocks) 3221 { 3222 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3223 struct ulist *leaves = NULL; 3224 struct ulist_iterator leaf_uiter; 3225 struct ulist_node *ref_node = NULL; 3226 const u32 blocksize = fs_info->nodesize; 3227 int ret = 0; 3228 3229 btrfs_release_path(path); 3230 ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid, 3231 0, &leaves, NULL, true); 3232 if (ret < 0) 3233 return ret; 3234 3235 ULIST_ITER_INIT(&leaf_uiter); 3236 while ((ref_node = ulist_next(leaves, &leaf_uiter))) { 3237 struct extent_buffer *eb; 3238 3239 eb = read_tree_block(fs_info, ref_node->val, 0, 0, NULL); 3240 if (IS_ERR(eb)) { 3241 ret = PTR_ERR(eb); 3242 break; 3243 } 3244 ret = delete_v1_space_cache(eb, rc->block_group, 3245 extent_key->objectid); 3246 free_extent_buffer(eb); 3247 if (ret < 0) 3248 break; 3249 ret = __add_tree_block(rc, ref_node->val, blocksize, blocks); 3250 if (ret < 0) 3251 break; 3252 } 3253 if (ret < 0) 3254 free_block_list(blocks); 3255 ulist_free(leaves); 3256 return ret; 3257 } 3258 3259 /* 3260 * helper to find next unprocessed extent 3261 */ 3262 static noinline_for_stack 3263 int find_next_extent(struct reloc_control *rc, struct btrfs_path *path, 3264 struct btrfs_key *extent_key) 3265 { 3266 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3267 struct btrfs_key key; 3268 struct extent_buffer *leaf; 3269 u64 start, end, last; 3270 int ret; 3271 3272 last = rc->block_group->start + rc->block_group->length; 3273 while (1) { 3274 cond_resched(); 3275 if (rc->search_start >= last) { 3276 ret = 1; 3277 break; 3278 } 3279 3280 key.objectid = rc->search_start; 3281 key.type = BTRFS_EXTENT_ITEM_KEY; 3282 key.offset = 0; 3283 3284 path->search_commit_root = 1; 3285 path->skip_locking = 1; 3286 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3287 0, 0); 3288 if (ret < 0) 3289 break; 3290 next: 3291 leaf = path->nodes[0]; 3292 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3293 ret = btrfs_next_leaf(rc->extent_root, path); 3294 if (ret != 0) 3295 break; 3296 leaf = path->nodes[0]; 3297 } 3298 3299 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3300 if (key.objectid >= last) { 3301 ret = 1; 3302 break; 3303 } 3304 3305 if (key.type != BTRFS_EXTENT_ITEM_KEY && 3306 key.type != BTRFS_METADATA_ITEM_KEY) { 3307 path->slots[0]++; 3308 goto next; 3309 } 3310 3311 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3312 key.objectid + key.offset <= rc->search_start) { 3313 path->slots[0]++; 3314 goto next; 3315 } 3316 3317 if (key.type == BTRFS_METADATA_ITEM_KEY && 3318 key.objectid + fs_info->nodesize <= 3319 rc->search_start) { 3320 path->slots[0]++; 3321 goto next; 3322 } 3323 3324 ret = find_first_extent_bit(&rc->processed_blocks, 3325 key.objectid, &start, &end, 3326 EXTENT_DIRTY, NULL); 3327 3328 if (ret == 0 && start <= key.objectid) { 3329 btrfs_release_path(path); 3330 rc->search_start = end + 1; 3331 } else { 3332 if (key.type == BTRFS_EXTENT_ITEM_KEY) 3333 rc->search_start = key.objectid + key.offset; 3334 else 3335 rc->search_start = key.objectid + 3336 fs_info->nodesize; 3337 memcpy(extent_key, &key, sizeof(key)); 3338 return 0; 3339 } 3340 } 3341 btrfs_release_path(path); 3342 return ret; 3343 } 3344 3345 static void set_reloc_control(struct reloc_control *rc) 3346 { 3347 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3348 3349 mutex_lock(&fs_info->reloc_mutex); 3350 fs_info->reloc_ctl = rc; 3351 mutex_unlock(&fs_info->reloc_mutex); 3352 } 3353 3354 static void unset_reloc_control(struct reloc_control *rc) 3355 { 3356 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3357 3358 mutex_lock(&fs_info->reloc_mutex); 3359 fs_info->reloc_ctl = NULL; 3360 mutex_unlock(&fs_info->reloc_mutex); 3361 } 3362 3363 static int check_extent_flags(u64 flags) 3364 { 3365 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3366 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3367 return 1; 3368 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3369 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3370 return 1; 3371 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3372 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3373 return 1; 3374 return 0; 3375 } 3376 3377 static noinline_for_stack 3378 int prepare_to_relocate(struct reloc_control *rc) 3379 { 3380 struct btrfs_trans_handle *trans; 3381 int ret; 3382 3383 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root->fs_info, 3384 BTRFS_BLOCK_RSV_TEMP); 3385 if (!rc->block_rsv) 3386 return -ENOMEM; 3387 3388 memset(&rc->cluster, 0, sizeof(rc->cluster)); 3389 rc->search_start = rc->block_group->start; 3390 rc->extents_found = 0; 3391 rc->nodes_relocated = 0; 3392 rc->merging_rsv_size = 0; 3393 rc->reserved_bytes = 0; 3394 rc->block_rsv->size = rc->extent_root->fs_info->nodesize * 3395 RELOCATION_RESERVED_NODES; 3396 ret = btrfs_block_rsv_refill(rc->extent_root, 3397 rc->block_rsv, rc->block_rsv->size, 3398 BTRFS_RESERVE_FLUSH_ALL); 3399 if (ret) 3400 return ret; 3401 3402 rc->create_reloc_tree = 1; 3403 set_reloc_control(rc); 3404 3405 trans = btrfs_join_transaction(rc->extent_root); 3406 if (IS_ERR(trans)) { 3407 unset_reloc_control(rc); 3408 /* 3409 * extent tree is not a ref_cow tree and has no reloc_root to 3410 * cleanup. And callers are responsible to free the above 3411 * block rsv. 3412 */ 3413 return PTR_ERR(trans); 3414 } 3415 btrfs_commit_transaction(trans); 3416 return 0; 3417 } 3418 3419 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3420 { 3421 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3422 struct rb_root blocks = RB_ROOT; 3423 struct btrfs_key key; 3424 struct btrfs_trans_handle *trans = NULL; 3425 struct btrfs_path *path; 3426 struct btrfs_extent_item *ei; 3427 u64 flags; 3428 u32 item_size; 3429 int ret; 3430 int err = 0; 3431 int progress = 0; 3432 3433 path = btrfs_alloc_path(); 3434 if (!path) 3435 return -ENOMEM; 3436 path->reada = READA_FORWARD; 3437 3438 ret = prepare_to_relocate(rc); 3439 if (ret) { 3440 err = ret; 3441 goto out_free; 3442 } 3443 3444 while (1) { 3445 rc->reserved_bytes = 0; 3446 ret = btrfs_block_rsv_refill(rc->extent_root, 3447 rc->block_rsv, rc->block_rsv->size, 3448 BTRFS_RESERVE_FLUSH_ALL); 3449 if (ret) { 3450 err = ret; 3451 break; 3452 } 3453 progress++; 3454 trans = btrfs_start_transaction(rc->extent_root, 0); 3455 if (IS_ERR(trans)) { 3456 err = PTR_ERR(trans); 3457 trans = NULL; 3458 break; 3459 } 3460 restart: 3461 if (update_backref_cache(trans, &rc->backref_cache)) { 3462 btrfs_end_transaction(trans); 3463 trans = NULL; 3464 continue; 3465 } 3466 3467 ret = find_next_extent(rc, path, &key); 3468 if (ret < 0) 3469 err = ret; 3470 if (ret != 0) 3471 break; 3472 3473 rc->extents_found++; 3474 3475 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3476 struct btrfs_extent_item); 3477 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 3478 if (item_size >= sizeof(*ei)) { 3479 flags = btrfs_extent_flags(path->nodes[0], ei); 3480 ret = check_extent_flags(flags); 3481 BUG_ON(ret); 3482 } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) { 3483 err = -EINVAL; 3484 btrfs_print_v0_err(trans->fs_info); 3485 btrfs_abort_transaction(trans, err); 3486 break; 3487 } else { 3488 BUG(); 3489 } 3490 3491 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 3492 ret = add_tree_block(rc, &key, path, &blocks); 3493 } else if (rc->stage == UPDATE_DATA_PTRS && 3494 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3495 ret = add_data_references(rc, &key, path, &blocks); 3496 } else { 3497 btrfs_release_path(path); 3498 ret = 0; 3499 } 3500 if (ret < 0) { 3501 err = ret; 3502 break; 3503 } 3504 3505 if (!RB_EMPTY_ROOT(&blocks)) { 3506 ret = relocate_tree_blocks(trans, rc, &blocks); 3507 if (ret < 0) { 3508 if (ret != -EAGAIN) { 3509 err = ret; 3510 break; 3511 } 3512 rc->extents_found--; 3513 rc->search_start = key.objectid; 3514 } 3515 } 3516 3517 btrfs_end_transaction_throttle(trans); 3518 btrfs_btree_balance_dirty(fs_info); 3519 trans = NULL; 3520 3521 if (rc->stage == MOVE_DATA_EXTENTS && 3522 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3523 rc->found_file_extent = 1; 3524 ret = relocate_data_extent(rc->data_inode, 3525 &key, &rc->cluster); 3526 if (ret < 0) { 3527 err = ret; 3528 break; 3529 } 3530 } 3531 if (btrfs_should_cancel_balance(fs_info)) { 3532 err = -ECANCELED; 3533 break; 3534 } 3535 } 3536 if (trans && progress && err == -ENOSPC) { 3537 ret = btrfs_force_chunk_alloc(trans, rc->block_group->flags); 3538 if (ret == 1) { 3539 err = 0; 3540 progress = 0; 3541 goto restart; 3542 } 3543 } 3544 3545 btrfs_release_path(path); 3546 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY); 3547 3548 if (trans) { 3549 btrfs_end_transaction_throttle(trans); 3550 btrfs_btree_balance_dirty(fs_info); 3551 } 3552 3553 if (!err) { 3554 ret = relocate_file_extent_cluster(rc->data_inode, 3555 &rc->cluster); 3556 if (ret < 0) 3557 err = ret; 3558 } 3559 3560 rc->create_reloc_tree = 0; 3561 set_reloc_control(rc); 3562 3563 btrfs_backref_release_cache(&rc->backref_cache); 3564 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL); 3565 3566 /* 3567 * Even in the case when the relocation is cancelled, we should all go 3568 * through prepare_to_merge() and merge_reloc_roots(). 3569 * 3570 * For error (including cancelled balance), prepare_to_merge() will 3571 * mark all reloc trees orphan, then queue them for cleanup in 3572 * merge_reloc_roots() 3573 */ 3574 err = prepare_to_merge(rc, err); 3575 3576 merge_reloc_roots(rc); 3577 3578 rc->merge_reloc_tree = 0; 3579 unset_reloc_control(rc); 3580 btrfs_block_rsv_release(fs_info, rc->block_rsv, (u64)-1, NULL); 3581 3582 /* get rid of pinned extents */ 3583 trans = btrfs_join_transaction(rc->extent_root); 3584 if (IS_ERR(trans)) { 3585 err = PTR_ERR(trans); 3586 goto out_free; 3587 } 3588 btrfs_commit_transaction(trans); 3589 out_free: 3590 ret = clean_dirty_subvols(rc); 3591 if (ret < 0 && !err) 3592 err = ret; 3593 btrfs_free_block_rsv(fs_info, rc->block_rsv); 3594 btrfs_free_path(path); 3595 return err; 3596 } 3597 3598 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3599 struct btrfs_root *root, u64 objectid) 3600 { 3601 struct btrfs_path *path; 3602 struct btrfs_inode_item *item; 3603 struct extent_buffer *leaf; 3604 int ret; 3605 3606 path = btrfs_alloc_path(); 3607 if (!path) 3608 return -ENOMEM; 3609 3610 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 3611 if (ret) 3612 goto out; 3613 3614 leaf = path->nodes[0]; 3615 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3616 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item)); 3617 btrfs_set_inode_generation(leaf, item, 1); 3618 btrfs_set_inode_size(leaf, item, 0); 3619 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3620 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 3621 BTRFS_INODE_PREALLOC); 3622 btrfs_mark_buffer_dirty(leaf); 3623 out: 3624 btrfs_free_path(path); 3625 return ret; 3626 } 3627 3628 /* 3629 * helper to create inode for data relocation. 3630 * the inode is in data relocation tree and its link count is 0 3631 */ 3632 static noinline_for_stack 3633 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 3634 struct btrfs_block_group *group) 3635 { 3636 struct inode *inode = NULL; 3637 struct btrfs_trans_handle *trans; 3638 struct btrfs_root *root; 3639 struct btrfs_key key; 3640 u64 objectid; 3641 int err = 0; 3642 3643 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 3644 if (IS_ERR(root)) 3645 return ERR_CAST(root); 3646 3647 trans = btrfs_start_transaction(root, 6); 3648 if (IS_ERR(trans)) { 3649 btrfs_put_root(root); 3650 return ERR_CAST(trans); 3651 } 3652 3653 err = btrfs_find_free_objectid(root, &objectid); 3654 if (err) 3655 goto out; 3656 3657 err = __insert_orphan_inode(trans, root, objectid); 3658 BUG_ON(err); 3659 3660 key.objectid = objectid; 3661 key.type = BTRFS_INODE_ITEM_KEY; 3662 key.offset = 0; 3663 inode = btrfs_iget(fs_info->sb, &key, root); 3664 BUG_ON(IS_ERR(inode)); 3665 BTRFS_I(inode)->index_cnt = group->start; 3666 3667 err = btrfs_orphan_add(trans, BTRFS_I(inode)); 3668 out: 3669 btrfs_put_root(root); 3670 btrfs_end_transaction(trans); 3671 btrfs_btree_balance_dirty(fs_info); 3672 if (err) { 3673 if (inode) 3674 iput(inode); 3675 inode = ERR_PTR(err); 3676 } 3677 return inode; 3678 } 3679 3680 static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) 3681 { 3682 struct reloc_control *rc; 3683 3684 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3685 if (!rc) 3686 return NULL; 3687 3688 INIT_LIST_HEAD(&rc->reloc_roots); 3689 INIT_LIST_HEAD(&rc->dirty_subvol_roots); 3690 btrfs_backref_init_cache(fs_info, &rc->backref_cache, 1); 3691 mapping_tree_init(&rc->reloc_root_tree); 3692 extent_io_tree_init(fs_info, &rc->processed_blocks, 3693 IO_TREE_RELOC_BLOCKS, NULL); 3694 return rc; 3695 } 3696 3697 static void free_reloc_control(struct reloc_control *rc) 3698 { 3699 struct mapping_node *node, *tmp; 3700 3701 free_reloc_roots(&rc->reloc_roots); 3702 rbtree_postorder_for_each_entry_safe(node, tmp, 3703 &rc->reloc_root_tree.rb_root, rb_node) 3704 kfree(node); 3705 3706 kfree(rc); 3707 } 3708 3709 /* 3710 * Print the block group being relocated 3711 */ 3712 static void describe_relocation(struct btrfs_fs_info *fs_info, 3713 struct btrfs_block_group *block_group) 3714 { 3715 char buf[128] = {'\0'}; 3716 3717 btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf)); 3718 3719 btrfs_info(fs_info, 3720 "relocating block group %llu flags %s", 3721 block_group->start, buf); 3722 } 3723 3724 static const char *stage_to_string(int stage) 3725 { 3726 if (stage == MOVE_DATA_EXTENTS) 3727 return "move data extents"; 3728 if (stage == UPDATE_DATA_PTRS) 3729 return "update data pointers"; 3730 return "unknown"; 3731 } 3732 3733 /* 3734 * function to relocate all extents in a block group. 3735 */ 3736 int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start) 3737 { 3738 struct btrfs_block_group *bg; 3739 struct btrfs_root *extent_root = fs_info->extent_root; 3740 struct reloc_control *rc; 3741 struct inode *inode; 3742 struct btrfs_path *path; 3743 int ret; 3744 int rw = 0; 3745 int err = 0; 3746 3747 bg = btrfs_lookup_block_group(fs_info, group_start); 3748 if (!bg) 3749 return -ENOENT; 3750 3751 if (btrfs_pinned_by_swapfile(fs_info, bg)) { 3752 btrfs_put_block_group(bg); 3753 return -ETXTBSY; 3754 } 3755 3756 rc = alloc_reloc_control(fs_info); 3757 if (!rc) { 3758 btrfs_put_block_group(bg); 3759 return -ENOMEM; 3760 } 3761 3762 rc->extent_root = extent_root; 3763 rc->block_group = bg; 3764 3765 ret = btrfs_inc_block_group_ro(rc->block_group, true); 3766 if (ret) { 3767 err = ret; 3768 goto out; 3769 } 3770 rw = 1; 3771 3772 path = btrfs_alloc_path(); 3773 if (!path) { 3774 err = -ENOMEM; 3775 goto out; 3776 } 3777 3778 inode = lookup_free_space_inode(rc->block_group, path); 3779 btrfs_free_path(path); 3780 3781 if (!IS_ERR(inode)) 3782 ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0); 3783 else 3784 ret = PTR_ERR(inode); 3785 3786 if (ret && ret != -ENOENT) { 3787 err = ret; 3788 goto out; 3789 } 3790 3791 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3792 if (IS_ERR(rc->data_inode)) { 3793 err = PTR_ERR(rc->data_inode); 3794 rc->data_inode = NULL; 3795 goto out; 3796 } 3797 3798 describe_relocation(fs_info, rc->block_group); 3799 3800 btrfs_wait_block_group_reservations(rc->block_group); 3801 btrfs_wait_nocow_writers(rc->block_group); 3802 btrfs_wait_ordered_roots(fs_info, U64_MAX, 3803 rc->block_group->start, 3804 rc->block_group->length); 3805 3806 while (1) { 3807 int finishes_stage; 3808 3809 mutex_lock(&fs_info->cleaner_mutex); 3810 ret = relocate_block_group(rc); 3811 mutex_unlock(&fs_info->cleaner_mutex); 3812 if (ret < 0) 3813 err = ret; 3814 3815 finishes_stage = rc->stage; 3816 /* 3817 * We may have gotten ENOSPC after we already dirtied some 3818 * extents. If writeout happens while we're relocating a 3819 * different block group we could end up hitting the 3820 * BUG_ON(rc->stage == UPDATE_DATA_PTRS) in 3821 * btrfs_reloc_cow_block. Make sure we write everything out 3822 * properly so we don't trip over this problem, and then break 3823 * out of the loop if we hit an error. 3824 */ 3825 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 3826 ret = btrfs_wait_ordered_range(rc->data_inode, 0, 3827 (u64)-1); 3828 if (ret) 3829 err = ret; 3830 invalidate_mapping_pages(rc->data_inode->i_mapping, 3831 0, -1); 3832 rc->stage = UPDATE_DATA_PTRS; 3833 } 3834 3835 if (err < 0) 3836 goto out; 3837 3838 if (rc->extents_found == 0) 3839 break; 3840 3841 btrfs_info(fs_info, "found %llu extents, stage: %s", 3842 rc->extents_found, stage_to_string(finishes_stage)); 3843 } 3844 3845 WARN_ON(rc->block_group->pinned > 0); 3846 WARN_ON(rc->block_group->reserved > 0); 3847 WARN_ON(rc->block_group->used > 0); 3848 out: 3849 if (err && rw) 3850 btrfs_dec_block_group_ro(rc->block_group); 3851 iput(rc->data_inode); 3852 btrfs_put_block_group(rc->block_group); 3853 free_reloc_control(rc); 3854 return err; 3855 } 3856 3857 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 3858 { 3859 struct btrfs_fs_info *fs_info = root->fs_info; 3860 struct btrfs_trans_handle *trans; 3861 int ret, err; 3862 3863 trans = btrfs_start_transaction(fs_info->tree_root, 0); 3864 if (IS_ERR(trans)) 3865 return PTR_ERR(trans); 3866 3867 memset(&root->root_item.drop_progress, 0, 3868 sizeof(root->root_item.drop_progress)); 3869 root->root_item.drop_level = 0; 3870 btrfs_set_root_refs(&root->root_item, 0); 3871 ret = btrfs_update_root(trans, fs_info->tree_root, 3872 &root->root_key, &root->root_item); 3873 3874 err = btrfs_end_transaction(trans); 3875 if (err) 3876 return err; 3877 return ret; 3878 } 3879 3880 /* 3881 * recover relocation interrupted by system crash. 3882 * 3883 * this function resumes merging reloc trees with corresponding fs trees. 3884 * this is important for keeping the sharing of tree blocks 3885 */ 3886 int btrfs_recover_relocation(struct btrfs_root *root) 3887 { 3888 struct btrfs_fs_info *fs_info = root->fs_info; 3889 LIST_HEAD(reloc_roots); 3890 struct btrfs_key key; 3891 struct btrfs_root *fs_root; 3892 struct btrfs_root *reloc_root; 3893 struct btrfs_path *path; 3894 struct extent_buffer *leaf; 3895 struct reloc_control *rc = NULL; 3896 struct btrfs_trans_handle *trans; 3897 int ret; 3898 int err = 0; 3899 3900 path = btrfs_alloc_path(); 3901 if (!path) 3902 return -ENOMEM; 3903 path->reada = READA_BACK; 3904 3905 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 3906 key.type = BTRFS_ROOT_ITEM_KEY; 3907 key.offset = (u64)-1; 3908 3909 while (1) { 3910 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, 3911 path, 0, 0); 3912 if (ret < 0) { 3913 err = ret; 3914 goto out; 3915 } 3916 if (ret > 0) { 3917 if (path->slots[0] == 0) 3918 break; 3919 path->slots[0]--; 3920 } 3921 leaf = path->nodes[0]; 3922 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3923 btrfs_release_path(path); 3924 3925 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 3926 key.type != BTRFS_ROOT_ITEM_KEY) 3927 break; 3928 3929 reloc_root = btrfs_read_tree_root(root, &key); 3930 if (IS_ERR(reloc_root)) { 3931 err = PTR_ERR(reloc_root); 3932 goto out; 3933 } 3934 3935 set_bit(BTRFS_ROOT_REF_COWS, &reloc_root->state); 3936 list_add(&reloc_root->root_list, &reloc_roots); 3937 3938 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 3939 fs_root = read_fs_root(fs_info, 3940 reloc_root->root_key.offset); 3941 if (IS_ERR(fs_root)) { 3942 ret = PTR_ERR(fs_root); 3943 if (ret != -ENOENT) { 3944 err = ret; 3945 goto out; 3946 } 3947 ret = mark_garbage_root(reloc_root); 3948 if (ret < 0) { 3949 err = ret; 3950 goto out; 3951 } 3952 } else { 3953 btrfs_put_root(fs_root); 3954 } 3955 } 3956 3957 if (key.offset == 0) 3958 break; 3959 3960 key.offset--; 3961 } 3962 btrfs_release_path(path); 3963 3964 if (list_empty(&reloc_roots)) 3965 goto out; 3966 3967 rc = alloc_reloc_control(fs_info); 3968 if (!rc) { 3969 err = -ENOMEM; 3970 goto out; 3971 } 3972 3973 rc->extent_root = fs_info->extent_root; 3974 3975 set_reloc_control(rc); 3976 3977 trans = btrfs_join_transaction(rc->extent_root); 3978 if (IS_ERR(trans)) { 3979 err = PTR_ERR(trans); 3980 goto out_unset; 3981 } 3982 3983 rc->merge_reloc_tree = 1; 3984 3985 while (!list_empty(&reloc_roots)) { 3986 reloc_root = list_entry(reloc_roots.next, 3987 struct btrfs_root, root_list); 3988 list_del(&reloc_root->root_list); 3989 3990 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 3991 list_add_tail(&reloc_root->root_list, 3992 &rc->reloc_roots); 3993 continue; 3994 } 3995 3996 fs_root = read_fs_root(fs_info, reloc_root->root_key.offset); 3997 if (IS_ERR(fs_root)) { 3998 err = PTR_ERR(fs_root); 3999 list_add_tail(&reloc_root->root_list, &reloc_roots); 4000 btrfs_end_transaction(trans); 4001 goto out_unset; 4002 } 4003 4004 err = __add_reloc_root(reloc_root); 4005 BUG_ON(err < 0); /* -ENOMEM or logic error */ 4006 fs_root->reloc_root = btrfs_grab_root(reloc_root); 4007 btrfs_put_root(fs_root); 4008 } 4009 4010 err = btrfs_commit_transaction(trans); 4011 if (err) 4012 goto out_unset; 4013 4014 merge_reloc_roots(rc); 4015 4016 unset_reloc_control(rc); 4017 4018 trans = btrfs_join_transaction(rc->extent_root); 4019 if (IS_ERR(trans)) { 4020 err = PTR_ERR(trans); 4021 goto out_clean; 4022 } 4023 err = btrfs_commit_transaction(trans); 4024 out_clean: 4025 ret = clean_dirty_subvols(rc); 4026 if (ret < 0 && !err) 4027 err = ret; 4028 out_unset: 4029 unset_reloc_control(rc); 4030 free_reloc_control(rc); 4031 out: 4032 if (!list_empty(&reloc_roots)) 4033 free_reloc_roots(&reloc_roots); 4034 4035 btrfs_free_path(path); 4036 4037 if (err == 0) { 4038 /* cleanup orphan inode in data relocation tree */ 4039 fs_root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 4040 if (IS_ERR(fs_root)) { 4041 err = PTR_ERR(fs_root); 4042 } else { 4043 err = btrfs_orphan_cleanup(fs_root); 4044 btrfs_put_root(fs_root); 4045 } 4046 } 4047 return err; 4048 } 4049 4050 /* 4051 * helper to add ordered checksum for data relocation. 4052 * 4053 * cloning checksum properly handles the nodatasum extents. 4054 * it also saves CPU time to re-calculate the checksum. 4055 */ 4056 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 4057 { 4058 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4059 struct btrfs_ordered_sum *sums; 4060 struct btrfs_ordered_extent *ordered; 4061 int ret; 4062 u64 disk_bytenr; 4063 u64 new_bytenr; 4064 LIST_HEAD(list); 4065 4066 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 4067 BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len); 4068 4069 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4070 ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr, 4071 disk_bytenr + len - 1, &list, 0); 4072 if (ret) 4073 goto out; 4074 4075 while (!list_empty(&list)) { 4076 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4077 list_del_init(&sums->list); 4078 4079 /* 4080 * We need to offset the new_bytenr based on where the csum is. 4081 * We need to do this because we will read in entire prealloc 4082 * extents but we may have written to say the middle of the 4083 * prealloc extent, so we need to make sure the csum goes with 4084 * the right disk offset. 4085 * 4086 * We can do this because the data reloc inode refers strictly 4087 * to the on disk bytes, so we don't have to worry about 4088 * disk_len vs real len like with real inodes since it's all 4089 * disk length. 4090 */ 4091 new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr; 4092 sums->bytenr = new_bytenr; 4093 4094 btrfs_add_ordered_sum(ordered, sums); 4095 } 4096 out: 4097 btrfs_put_ordered_extent(ordered); 4098 return ret; 4099 } 4100 4101 int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4102 struct btrfs_root *root, struct extent_buffer *buf, 4103 struct extent_buffer *cow) 4104 { 4105 struct btrfs_fs_info *fs_info = root->fs_info; 4106 struct reloc_control *rc; 4107 struct btrfs_backref_node *node; 4108 int first_cow = 0; 4109 int level; 4110 int ret = 0; 4111 4112 rc = fs_info->reloc_ctl; 4113 if (!rc) 4114 return 0; 4115 4116 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4117 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4118 4119 level = btrfs_header_level(buf); 4120 if (btrfs_header_generation(buf) <= 4121 btrfs_root_last_snapshot(&root->root_item)) 4122 first_cow = 1; 4123 4124 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && 4125 rc->create_reloc_tree) { 4126 WARN_ON(!first_cow && level == 0); 4127 4128 node = rc->backref_cache.path[level]; 4129 BUG_ON(node->bytenr != buf->start && 4130 node->new_bytenr != buf->start); 4131 4132 btrfs_backref_drop_node_buffer(node); 4133 atomic_inc(&cow->refs); 4134 node->eb = cow; 4135 node->new_bytenr = cow->start; 4136 4137 if (!node->pending) { 4138 list_move_tail(&node->list, 4139 &rc->backref_cache.pending[level]); 4140 node->pending = 1; 4141 } 4142 4143 if (first_cow) 4144 mark_block_processed(rc, node); 4145 4146 if (first_cow && level > 0) 4147 rc->nodes_relocated += buf->len; 4148 } 4149 4150 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) 4151 ret = replace_file_extents(trans, rc, root, cow); 4152 return ret; 4153 } 4154 4155 /* 4156 * called before creating snapshot. it calculates metadata reservation 4157 * required for relocating tree blocks in the snapshot 4158 */ 4159 void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, 4160 u64 *bytes_to_reserve) 4161 { 4162 struct btrfs_root *root = pending->root; 4163 struct reloc_control *rc = root->fs_info->reloc_ctl; 4164 4165 if (!rc || !have_reloc_root(root)) 4166 return; 4167 4168 if (!rc->merge_reloc_tree) 4169 return; 4170 4171 root = root->reloc_root; 4172 BUG_ON(btrfs_root_refs(&root->root_item) == 0); 4173 /* 4174 * relocation is in the stage of merging trees. the space 4175 * used by merging a reloc tree is twice the size of 4176 * relocated tree nodes in the worst case. half for cowing 4177 * the reloc tree, half for cowing the fs tree. the space 4178 * used by cowing the reloc tree will be freed after the 4179 * tree is dropped. if we create snapshot, cowing the fs 4180 * tree may use more space than it frees. so we need 4181 * reserve extra space. 4182 */ 4183 *bytes_to_reserve += rc->nodes_relocated; 4184 } 4185 4186 /* 4187 * called after snapshot is created. migrate block reservation 4188 * and create reloc root for the newly created snapshot 4189 * 4190 * This is similar to btrfs_init_reloc_root(), we come out of here with two 4191 * references held on the reloc_root, one for root->reloc_root and one for 4192 * rc->reloc_roots. 4193 */ 4194 int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 4195 struct btrfs_pending_snapshot *pending) 4196 { 4197 struct btrfs_root *root = pending->root; 4198 struct btrfs_root *reloc_root; 4199 struct btrfs_root *new_root; 4200 struct reloc_control *rc = root->fs_info->reloc_ctl; 4201 int ret; 4202 4203 if (!rc || !have_reloc_root(root)) 4204 return 0; 4205 4206 rc = root->fs_info->reloc_ctl; 4207 rc->merging_rsv_size += rc->nodes_relocated; 4208 4209 if (rc->merge_reloc_tree) { 4210 ret = btrfs_block_rsv_migrate(&pending->block_rsv, 4211 rc->block_rsv, 4212 rc->nodes_relocated, true); 4213 if (ret) 4214 return ret; 4215 } 4216 4217 new_root = pending->snap; 4218 reloc_root = create_reloc_root(trans, root->reloc_root, 4219 new_root->root_key.objectid); 4220 if (IS_ERR(reloc_root)) 4221 return PTR_ERR(reloc_root); 4222 4223 ret = __add_reloc_root(reloc_root); 4224 BUG_ON(ret < 0); 4225 new_root->reloc_root = btrfs_grab_root(reloc_root); 4226 4227 if (rc->create_reloc_tree) 4228 ret = clone_backref_node(trans, rc, root, reloc_root); 4229 return ret; 4230 } 4231