1 /* 2 * Copyright (C) 2009 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include "ctree.h" 26 #include "disk-io.h" 27 #include "transaction.h" 28 #include "volumes.h" 29 #include "locking.h" 30 #include "btrfs_inode.h" 31 #include "async-thread.h" 32 #include "free-space-cache.h" 33 #include "inode-map.h" 34 35 /* 36 * backref_node, mapping_node and tree_block start with this 37 */ 38 struct tree_entry { 39 struct rb_node rb_node; 40 u64 bytenr; 41 }; 42 43 /* 44 * present a tree block in the backref cache 45 */ 46 struct backref_node { 47 struct rb_node rb_node; 48 u64 bytenr; 49 50 u64 new_bytenr; 51 /* objectid of tree block owner, can be not uptodate */ 52 u64 owner; 53 /* link to pending, changed or detached list */ 54 struct list_head list; 55 /* list of upper level blocks reference this block */ 56 struct list_head upper; 57 /* list of child blocks in the cache */ 58 struct list_head lower; 59 /* NULL if this node is not tree root */ 60 struct btrfs_root *root; 61 /* extent buffer got by COW the block */ 62 struct extent_buffer *eb; 63 /* level of tree block */ 64 unsigned int level:8; 65 /* is the block in non-reference counted tree */ 66 unsigned int cowonly:1; 67 /* 1 if no child node in the cache */ 68 unsigned int lowest:1; 69 /* is the extent buffer locked */ 70 unsigned int locked:1; 71 /* has the block been processed */ 72 unsigned int processed:1; 73 /* have backrefs of this block been checked */ 74 unsigned int checked:1; 75 /* 76 * 1 if corresponding block has been cowed but some upper 77 * level block pointers may not point to the new location 78 */ 79 unsigned int pending:1; 80 /* 81 * 1 if the backref node isn't connected to any other 82 * backref node. 83 */ 84 unsigned int detached:1; 85 }; 86 87 /* 88 * present a block pointer in the backref cache 89 */ 90 struct backref_edge { 91 struct list_head list[2]; 92 struct backref_node *node[2]; 93 }; 94 95 #define LOWER 0 96 #define UPPER 1 97 98 struct backref_cache { 99 /* red black tree of all backref nodes in the cache */ 100 struct rb_root rb_root; 101 /* for passing backref nodes to btrfs_reloc_cow_block */ 102 struct backref_node *path[BTRFS_MAX_LEVEL]; 103 /* 104 * list of blocks that have been cowed but some block 105 * pointers in upper level blocks may not reflect the 106 * new location 107 */ 108 struct list_head pending[BTRFS_MAX_LEVEL]; 109 /* list of backref nodes with no child node */ 110 struct list_head leaves; 111 /* list of blocks that have been cowed in current transaction */ 112 struct list_head changed; 113 /* list of detached backref node. */ 114 struct list_head detached; 115 116 u64 last_trans; 117 118 int nr_nodes; 119 int nr_edges; 120 }; 121 122 /* 123 * map address of tree root to tree 124 */ 125 struct mapping_node { 126 struct rb_node rb_node; 127 u64 bytenr; 128 void *data; 129 }; 130 131 struct mapping_tree { 132 struct rb_root rb_root; 133 spinlock_t lock; 134 }; 135 136 /* 137 * present a tree block to process 138 */ 139 struct tree_block { 140 struct rb_node rb_node; 141 u64 bytenr; 142 struct btrfs_key key; 143 unsigned int level:8; 144 unsigned int key_ready:1; 145 }; 146 147 #define MAX_EXTENTS 128 148 149 struct file_extent_cluster { 150 u64 start; 151 u64 end; 152 u64 boundary[MAX_EXTENTS]; 153 unsigned int nr; 154 }; 155 156 struct reloc_control { 157 /* block group to relocate */ 158 struct btrfs_block_group_cache *block_group; 159 /* extent tree */ 160 struct btrfs_root *extent_root; 161 /* inode for moving data */ 162 struct inode *data_inode; 163 164 struct btrfs_block_rsv *block_rsv; 165 166 struct backref_cache backref_cache; 167 168 struct file_extent_cluster cluster; 169 /* tree blocks have been processed */ 170 struct extent_io_tree processed_blocks; 171 /* map start of tree root to corresponding reloc tree */ 172 struct mapping_tree reloc_root_tree; 173 /* list of reloc trees */ 174 struct list_head reloc_roots; 175 /* size of metadata reservation for merging reloc trees */ 176 u64 merging_rsv_size; 177 /* size of relocated tree nodes */ 178 u64 nodes_relocated; 179 180 u64 search_start; 181 u64 extents_found; 182 183 unsigned int stage:8; 184 unsigned int create_reloc_tree:1; 185 unsigned int merge_reloc_tree:1; 186 unsigned int found_file_extent:1; 187 unsigned int commit_transaction:1; 188 }; 189 190 /* stages of data relocation */ 191 #define MOVE_DATA_EXTENTS 0 192 #define UPDATE_DATA_PTRS 1 193 194 static void remove_backref_node(struct backref_cache *cache, 195 struct backref_node *node); 196 static void __mark_block_processed(struct reloc_control *rc, 197 struct backref_node *node); 198 199 static void mapping_tree_init(struct mapping_tree *tree) 200 { 201 tree->rb_root = RB_ROOT; 202 spin_lock_init(&tree->lock); 203 } 204 205 static void backref_cache_init(struct backref_cache *cache) 206 { 207 int i; 208 cache->rb_root = RB_ROOT; 209 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 210 INIT_LIST_HEAD(&cache->pending[i]); 211 INIT_LIST_HEAD(&cache->changed); 212 INIT_LIST_HEAD(&cache->detached); 213 INIT_LIST_HEAD(&cache->leaves); 214 } 215 216 static void backref_cache_cleanup(struct backref_cache *cache) 217 { 218 struct backref_node *node; 219 int i; 220 221 while (!list_empty(&cache->detached)) { 222 node = list_entry(cache->detached.next, 223 struct backref_node, list); 224 remove_backref_node(cache, node); 225 } 226 227 while (!list_empty(&cache->leaves)) { 228 node = list_entry(cache->leaves.next, 229 struct backref_node, lower); 230 remove_backref_node(cache, node); 231 } 232 233 cache->last_trans = 0; 234 235 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 236 BUG_ON(!list_empty(&cache->pending[i])); 237 BUG_ON(!list_empty(&cache->changed)); 238 BUG_ON(!list_empty(&cache->detached)); 239 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); 240 BUG_ON(cache->nr_nodes); 241 BUG_ON(cache->nr_edges); 242 } 243 244 static struct backref_node *alloc_backref_node(struct backref_cache *cache) 245 { 246 struct backref_node *node; 247 248 node = kzalloc(sizeof(*node), GFP_NOFS); 249 if (node) { 250 INIT_LIST_HEAD(&node->list); 251 INIT_LIST_HEAD(&node->upper); 252 INIT_LIST_HEAD(&node->lower); 253 RB_CLEAR_NODE(&node->rb_node); 254 cache->nr_nodes++; 255 } 256 return node; 257 } 258 259 static void free_backref_node(struct backref_cache *cache, 260 struct backref_node *node) 261 { 262 if (node) { 263 cache->nr_nodes--; 264 kfree(node); 265 } 266 } 267 268 static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) 269 { 270 struct backref_edge *edge; 271 272 edge = kzalloc(sizeof(*edge), GFP_NOFS); 273 if (edge) 274 cache->nr_edges++; 275 return edge; 276 } 277 278 static void free_backref_edge(struct backref_cache *cache, 279 struct backref_edge *edge) 280 { 281 if (edge) { 282 cache->nr_edges--; 283 kfree(edge); 284 } 285 } 286 287 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 288 struct rb_node *node) 289 { 290 struct rb_node **p = &root->rb_node; 291 struct rb_node *parent = NULL; 292 struct tree_entry *entry; 293 294 while (*p) { 295 parent = *p; 296 entry = rb_entry(parent, struct tree_entry, rb_node); 297 298 if (bytenr < entry->bytenr) 299 p = &(*p)->rb_left; 300 else if (bytenr > entry->bytenr) 301 p = &(*p)->rb_right; 302 else 303 return parent; 304 } 305 306 rb_link_node(node, parent, p); 307 rb_insert_color(node, root); 308 return NULL; 309 } 310 311 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) 312 { 313 struct rb_node *n = root->rb_node; 314 struct tree_entry *entry; 315 316 while (n) { 317 entry = rb_entry(n, struct tree_entry, rb_node); 318 319 if (bytenr < entry->bytenr) 320 n = n->rb_left; 321 else if (bytenr > entry->bytenr) 322 n = n->rb_right; 323 else 324 return n; 325 } 326 return NULL; 327 } 328 329 void backref_tree_panic(struct rb_node *rb_node, int errno, 330 u64 bytenr) 331 { 332 333 struct btrfs_fs_info *fs_info = NULL; 334 struct backref_node *bnode = rb_entry(rb_node, struct backref_node, 335 rb_node); 336 if (bnode->root) 337 fs_info = bnode->root->fs_info; 338 btrfs_panic(fs_info, errno, "Inconsistency in backref cache " 339 "found at offset %llu\n", (unsigned long long)bytenr); 340 } 341 342 /* 343 * walk up backref nodes until reach node presents tree root 344 */ 345 static struct backref_node *walk_up_backref(struct backref_node *node, 346 struct backref_edge *edges[], 347 int *index) 348 { 349 struct backref_edge *edge; 350 int idx = *index; 351 352 while (!list_empty(&node->upper)) { 353 edge = list_entry(node->upper.next, 354 struct backref_edge, list[LOWER]); 355 edges[idx++] = edge; 356 node = edge->node[UPPER]; 357 } 358 BUG_ON(node->detached); 359 *index = idx; 360 return node; 361 } 362 363 /* 364 * walk down backref nodes to find start of next reference path 365 */ 366 static struct backref_node *walk_down_backref(struct backref_edge *edges[], 367 int *index) 368 { 369 struct backref_edge *edge; 370 struct backref_node *lower; 371 int idx = *index; 372 373 while (idx > 0) { 374 edge = edges[idx - 1]; 375 lower = edge->node[LOWER]; 376 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 377 idx--; 378 continue; 379 } 380 edge = list_entry(edge->list[LOWER].next, 381 struct backref_edge, list[LOWER]); 382 edges[idx - 1] = edge; 383 *index = idx; 384 return edge->node[UPPER]; 385 } 386 *index = 0; 387 return NULL; 388 } 389 390 static void unlock_node_buffer(struct backref_node *node) 391 { 392 if (node->locked) { 393 btrfs_tree_unlock(node->eb); 394 node->locked = 0; 395 } 396 } 397 398 static void drop_node_buffer(struct backref_node *node) 399 { 400 if (node->eb) { 401 unlock_node_buffer(node); 402 free_extent_buffer(node->eb); 403 node->eb = NULL; 404 } 405 } 406 407 static void drop_backref_node(struct backref_cache *tree, 408 struct backref_node *node) 409 { 410 BUG_ON(!list_empty(&node->upper)); 411 412 drop_node_buffer(node); 413 list_del(&node->list); 414 list_del(&node->lower); 415 if (!RB_EMPTY_NODE(&node->rb_node)) 416 rb_erase(&node->rb_node, &tree->rb_root); 417 free_backref_node(tree, node); 418 } 419 420 /* 421 * remove a backref node from the backref cache 422 */ 423 static void remove_backref_node(struct backref_cache *cache, 424 struct backref_node *node) 425 { 426 struct backref_node *upper; 427 struct backref_edge *edge; 428 429 if (!node) 430 return; 431 432 BUG_ON(!node->lowest && !node->detached); 433 while (!list_empty(&node->upper)) { 434 edge = list_entry(node->upper.next, struct backref_edge, 435 list[LOWER]); 436 upper = edge->node[UPPER]; 437 list_del(&edge->list[LOWER]); 438 list_del(&edge->list[UPPER]); 439 free_backref_edge(cache, edge); 440 441 if (RB_EMPTY_NODE(&upper->rb_node)) { 442 BUG_ON(!list_empty(&node->upper)); 443 drop_backref_node(cache, node); 444 node = upper; 445 node->lowest = 1; 446 continue; 447 } 448 /* 449 * add the node to leaf node list if no other 450 * child block cached. 451 */ 452 if (list_empty(&upper->lower)) { 453 list_add_tail(&upper->lower, &cache->leaves); 454 upper->lowest = 1; 455 } 456 } 457 458 drop_backref_node(cache, node); 459 } 460 461 static void update_backref_node(struct backref_cache *cache, 462 struct backref_node *node, u64 bytenr) 463 { 464 struct rb_node *rb_node; 465 rb_erase(&node->rb_node, &cache->rb_root); 466 node->bytenr = bytenr; 467 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 468 if (rb_node) 469 backref_tree_panic(rb_node, -EEXIST, bytenr); 470 } 471 472 /* 473 * update backref cache after a transaction commit 474 */ 475 static int update_backref_cache(struct btrfs_trans_handle *trans, 476 struct backref_cache *cache) 477 { 478 struct backref_node *node; 479 int level = 0; 480 481 if (cache->last_trans == 0) { 482 cache->last_trans = trans->transid; 483 return 0; 484 } 485 486 if (cache->last_trans == trans->transid) 487 return 0; 488 489 /* 490 * detached nodes are used to avoid unnecessary backref 491 * lookup. transaction commit changes the extent tree. 492 * so the detached nodes are no longer useful. 493 */ 494 while (!list_empty(&cache->detached)) { 495 node = list_entry(cache->detached.next, 496 struct backref_node, list); 497 remove_backref_node(cache, node); 498 } 499 500 while (!list_empty(&cache->changed)) { 501 node = list_entry(cache->changed.next, 502 struct backref_node, list); 503 list_del_init(&node->list); 504 BUG_ON(node->pending); 505 update_backref_node(cache, node, node->new_bytenr); 506 } 507 508 /* 509 * some nodes can be left in the pending list if there were 510 * errors during processing the pending nodes. 511 */ 512 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 513 list_for_each_entry(node, &cache->pending[level], list) { 514 BUG_ON(!node->pending); 515 if (node->bytenr == node->new_bytenr) 516 continue; 517 update_backref_node(cache, node, node->new_bytenr); 518 } 519 } 520 521 cache->last_trans = 0; 522 return 1; 523 } 524 525 526 static int should_ignore_root(struct btrfs_root *root) 527 { 528 struct btrfs_root *reloc_root; 529 530 if (!root->ref_cows) 531 return 0; 532 533 reloc_root = root->reloc_root; 534 if (!reloc_root) 535 return 0; 536 537 if (btrfs_root_last_snapshot(&reloc_root->root_item) == 538 root->fs_info->running_transaction->transid - 1) 539 return 0; 540 /* 541 * if there is reloc tree and it was created in previous 542 * transaction backref lookup can find the reloc tree, 543 * so backref node for the fs tree root is useless for 544 * relocation. 545 */ 546 return 1; 547 } 548 /* 549 * find reloc tree by address of tree root 550 */ 551 static struct btrfs_root *find_reloc_root(struct reloc_control *rc, 552 u64 bytenr) 553 { 554 struct rb_node *rb_node; 555 struct mapping_node *node; 556 struct btrfs_root *root = NULL; 557 558 spin_lock(&rc->reloc_root_tree.lock); 559 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); 560 if (rb_node) { 561 node = rb_entry(rb_node, struct mapping_node, rb_node); 562 root = (struct btrfs_root *)node->data; 563 } 564 spin_unlock(&rc->reloc_root_tree.lock); 565 return root; 566 } 567 568 static int is_cowonly_root(u64 root_objectid) 569 { 570 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 571 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 572 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 573 root_objectid == BTRFS_DEV_TREE_OBJECTID || 574 root_objectid == BTRFS_TREE_LOG_OBJECTID || 575 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 576 return 1; 577 return 0; 578 } 579 580 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 581 u64 root_objectid) 582 { 583 struct btrfs_key key; 584 585 key.objectid = root_objectid; 586 key.type = BTRFS_ROOT_ITEM_KEY; 587 if (is_cowonly_root(root_objectid)) 588 key.offset = 0; 589 else 590 key.offset = (u64)-1; 591 592 return btrfs_read_fs_root_no_name(fs_info, &key); 593 } 594 595 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 596 static noinline_for_stack 597 struct btrfs_root *find_tree_root(struct reloc_control *rc, 598 struct extent_buffer *leaf, 599 struct btrfs_extent_ref_v0 *ref0) 600 { 601 struct btrfs_root *root; 602 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 603 u64 generation = btrfs_ref_generation_v0(leaf, ref0); 604 605 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 606 607 root = read_fs_root(rc->extent_root->fs_info, root_objectid); 608 BUG_ON(IS_ERR(root)); 609 610 if (root->ref_cows && 611 generation != btrfs_root_generation(&root->root_item)) 612 return NULL; 613 614 return root; 615 } 616 #endif 617 618 static noinline_for_stack 619 int find_inline_backref(struct extent_buffer *leaf, int slot, 620 unsigned long *ptr, unsigned long *end) 621 { 622 struct btrfs_extent_item *ei; 623 struct btrfs_tree_block_info *bi; 624 u32 item_size; 625 626 item_size = btrfs_item_size_nr(leaf, slot); 627 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 628 if (item_size < sizeof(*ei)) { 629 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 630 return 1; 631 } 632 #endif 633 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 634 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 635 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 636 637 if (item_size <= sizeof(*ei) + sizeof(*bi)) { 638 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 639 return 1; 640 } 641 642 bi = (struct btrfs_tree_block_info *)(ei + 1); 643 *ptr = (unsigned long)(bi + 1); 644 *end = (unsigned long)ei + item_size; 645 return 0; 646 } 647 648 /* 649 * build backref tree for a given tree block. root of the backref tree 650 * corresponds the tree block, leaves of the backref tree correspond 651 * roots of b-trees that reference the tree block. 652 * 653 * the basic idea of this function is check backrefs of a given block 654 * to find upper level blocks that refernece the block, and then check 655 * bakcrefs of these upper level blocks recursively. the recursion stop 656 * when tree root is reached or backrefs for the block is cached. 657 * 658 * NOTE: if we find backrefs for a block are cached, we know backrefs 659 * for all upper level blocks that directly/indirectly reference the 660 * block are also cached. 661 */ 662 static noinline_for_stack 663 struct backref_node *build_backref_tree(struct reloc_control *rc, 664 struct btrfs_key *node_key, 665 int level, u64 bytenr) 666 { 667 struct backref_cache *cache = &rc->backref_cache; 668 struct btrfs_path *path1; 669 struct btrfs_path *path2; 670 struct extent_buffer *eb; 671 struct btrfs_root *root; 672 struct backref_node *cur; 673 struct backref_node *upper; 674 struct backref_node *lower; 675 struct backref_node *node = NULL; 676 struct backref_node *exist = NULL; 677 struct backref_edge *edge; 678 struct rb_node *rb_node; 679 struct btrfs_key key; 680 unsigned long end; 681 unsigned long ptr; 682 LIST_HEAD(list); 683 LIST_HEAD(useless); 684 int cowonly; 685 int ret; 686 int err = 0; 687 688 path1 = btrfs_alloc_path(); 689 path2 = btrfs_alloc_path(); 690 if (!path1 || !path2) { 691 err = -ENOMEM; 692 goto out; 693 } 694 path1->reada = 1; 695 path2->reada = 2; 696 697 node = alloc_backref_node(cache); 698 if (!node) { 699 err = -ENOMEM; 700 goto out; 701 } 702 703 node->bytenr = bytenr; 704 node->level = level; 705 node->lowest = 1; 706 cur = node; 707 again: 708 end = 0; 709 ptr = 0; 710 key.objectid = cur->bytenr; 711 key.type = BTRFS_EXTENT_ITEM_KEY; 712 key.offset = (u64)-1; 713 714 path1->search_commit_root = 1; 715 path1->skip_locking = 1; 716 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, 717 0, 0); 718 if (ret < 0) { 719 err = ret; 720 goto out; 721 } 722 BUG_ON(!ret || !path1->slots[0]); 723 724 path1->slots[0]--; 725 726 WARN_ON(cur->checked); 727 if (!list_empty(&cur->upper)) { 728 /* 729 * the backref was added previously when processing 730 * backref of type BTRFS_TREE_BLOCK_REF_KEY 731 */ 732 BUG_ON(!list_is_singular(&cur->upper)); 733 edge = list_entry(cur->upper.next, struct backref_edge, 734 list[LOWER]); 735 BUG_ON(!list_empty(&edge->list[UPPER])); 736 exist = edge->node[UPPER]; 737 /* 738 * add the upper level block to pending list if we need 739 * check its backrefs 740 */ 741 if (!exist->checked) 742 list_add_tail(&edge->list[UPPER], &list); 743 } else { 744 exist = NULL; 745 } 746 747 while (1) { 748 cond_resched(); 749 eb = path1->nodes[0]; 750 751 if (ptr >= end) { 752 if (path1->slots[0] >= btrfs_header_nritems(eb)) { 753 ret = btrfs_next_leaf(rc->extent_root, path1); 754 if (ret < 0) { 755 err = ret; 756 goto out; 757 } 758 if (ret > 0) 759 break; 760 eb = path1->nodes[0]; 761 } 762 763 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); 764 if (key.objectid != cur->bytenr) { 765 WARN_ON(exist); 766 break; 767 } 768 769 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 770 ret = find_inline_backref(eb, path1->slots[0], 771 &ptr, &end); 772 if (ret) 773 goto next; 774 } 775 } 776 777 if (ptr < end) { 778 /* update key for inline back ref */ 779 struct btrfs_extent_inline_ref *iref; 780 iref = (struct btrfs_extent_inline_ref *)ptr; 781 key.type = btrfs_extent_inline_ref_type(eb, iref); 782 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 783 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 784 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 785 } 786 787 if (exist && 788 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 789 exist->owner == key.offset) || 790 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 791 exist->bytenr == key.offset))) { 792 exist = NULL; 793 goto next; 794 } 795 796 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 797 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 798 key.type == BTRFS_EXTENT_REF_V0_KEY) { 799 if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 800 struct btrfs_extent_ref_v0 *ref0; 801 ref0 = btrfs_item_ptr(eb, path1->slots[0], 802 struct btrfs_extent_ref_v0); 803 if (key.objectid == key.offset) { 804 root = find_tree_root(rc, eb, ref0); 805 if (root && !should_ignore_root(root)) 806 cur->root = root; 807 else 808 list_add(&cur->list, &useless); 809 break; 810 } 811 if (is_cowonly_root(btrfs_ref_root_v0(eb, 812 ref0))) 813 cur->cowonly = 1; 814 } 815 #else 816 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 817 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 818 #endif 819 if (key.objectid == key.offset) { 820 /* 821 * only root blocks of reloc trees use 822 * backref of this type. 823 */ 824 root = find_reloc_root(rc, cur->bytenr); 825 BUG_ON(!root); 826 cur->root = root; 827 break; 828 } 829 830 edge = alloc_backref_edge(cache); 831 if (!edge) { 832 err = -ENOMEM; 833 goto out; 834 } 835 rb_node = tree_search(&cache->rb_root, key.offset); 836 if (!rb_node) { 837 upper = alloc_backref_node(cache); 838 if (!upper) { 839 free_backref_edge(cache, edge); 840 err = -ENOMEM; 841 goto out; 842 } 843 upper->bytenr = key.offset; 844 upper->level = cur->level + 1; 845 /* 846 * backrefs for the upper level block isn't 847 * cached, add the block to pending list 848 */ 849 list_add_tail(&edge->list[UPPER], &list); 850 } else { 851 upper = rb_entry(rb_node, struct backref_node, 852 rb_node); 853 BUG_ON(!upper->checked); 854 INIT_LIST_HEAD(&edge->list[UPPER]); 855 } 856 list_add_tail(&edge->list[LOWER], &cur->upper); 857 edge->node[LOWER] = cur; 858 edge->node[UPPER] = upper; 859 860 goto next; 861 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 862 goto next; 863 } 864 865 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ 866 root = read_fs_root(rc->extent_root->fs_info, key.offset); 867 if (IS_ERR(root)) { 868 err = PTR_ERR(root); 869 goto out; 870 } 871 872 if (!root->ref_cows) 873 cur->cowonly = 1; 874 875 if (btrfs_root_level(&root->root_item) == cur->level) { 876 /* tree root */ 877 BUG_ON(btrfs_root_bytenr(&root->root_item) != 878 cur->bytenr); 879 if (should_ignore_root(root)) 880 list_add(&cur->list, &useless); 881 else 882 cur->root = root; 883 break; 884 } 885 886 level = cur->level + 1; 887 888 /* 889 * searching the tree to find upper level blocks 890 * reference the block. 891 */ 892 path2->search_commit_root = 1; 893 path2->skip_locking = 1; 894 path2->lowest_level = level; 895 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); 896 path2->lowest_level = 0; 897 if (ret < 0) { 898 err = ret; 899 goto out; 900 } 901 if (ret > 0 && path2->slots[level] > 0) 902 path2->slots[level]--; 903 904 eb = path2->nodes[level]; 905 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 906 cur->bytenr); 907 908 lower = cur; 909 for (; level < BTRFS_MAX_LEVEL; level++) { 910 if (!path2->nodes[level]) { 911 BUG_ON(btrfs_root_bytenr(&root->root_item) != 912 lower->bytenr); 913 if (should_ignore_root(root)) 914 list_add(&lower->list, &useless); 915 else 916 lower->root = root; 917 break; 918 } 919 920 edge = alloc_backref_edge(cache); 921 if (!edge) { 922 err = -ENOMEM; 923 goto out; 924 } 925 926 eb = path2->nodes[level]; 927 rb_node = tree_search(&cache->rb_root, eb->start); 928 if (!rb_node) { 929 upper = alloc_backref_node(cache); 930 if (!upper) { 931 free_backref_edge(cache, edge); 932 err = -ENOMEM; 933 goto out; 934 } 935 upper->bytenr = eb->start; 936 upper->owner = btrfs_header_owner(eb); 937 upper->level = lower->level + 1; 938 if (!root->ref_cows) 939 upper->cowonly = 1; 940 941 /* 942 * if we know the block isn't shared 943 * we can void checking its backrefs. 944 */ 945 if (btrfs_block_can_be_shared(root, eb)) 946 upper->checked = 0; 947 else 948 upper->checked = 1; 949 950 /* 951 * add the block to pending list if we 952 * need check its backrefs. only block 953 * at 'cur->level + 1' is added to the 954 * tail of pending list. this guarantees 955 * we check backrefs from lower level 956 * blocks to upper level blocks. 957 */ 958 if (!upper->checked && 959 level == cur->level + 1) { 960 list_add_tail(&edge->list[UPPER], 961 &list); 962 } else 963 INIT_LIST_HEAD(&edge->list[UPPER]); 964 } else { 965 upper = rb_entry(rb_node, struct backref_node, 966 rb_node); 967 BUG_ON(!upper->checked); 968 INIT_LIST_HEAD(&edge->list[UPPER]); 969 if (!upper->owner) 970 upper->owner = btrfs_header_owner(eb); 971 } 972 list_add_tail(&edge->list[LOWER], &lower->upper); 973 edge->node[LOWER] = lower; 974 edge->node[UPPER] = upper; 975 976 if (rb_node) 977 break; 978 lower = upper; 979 upper = NULL; 980 } 981 btrfs_release_path(path2); 982 next: 983 if (ptr < end) { 984 ptr += btrfs_extent_inline_ref_size(key.type); 985 if (ptr >= end) { 986 WARN_ON(ptr > end); 987 ptr = 0; 988 end = 0; 989 } 990 } 991 if (ptr >= end) 992 path1->slots[0]++; 993 } 994 btrfs_release_path(path1); 995 996 cur->checked = 1; 997 WARN_ON(exist); 998 999 /* the pending list isn't empty, take the first block to process */ 1000 if (!list_empty(&list)) { 1001 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 1002 list_del_init(&edge->list[UPPER]); 1003 cur = edge->node[UPPER]; 1004 goto again; 1005 } 1006 1007 /* 1008 * everything goes well, connect backref nodes and insert backref nodes 1009 * into the cache. 1010 */ 1011 BUG_ON(!node->checked); 1012 cowonly = node->cowonly; 1013 if (!cowonly) { 1014 rb_node = tree_insert(&cache->rb_root, node->bytenr, 1015 &node->rb_node); 1016 if (rb_node) 1017 backref_tree_panic(rb_node, -EEXIST, node->bytenr); 1018 list_add_tail(&node->lower, &cache->leaves); 1019 } 1020 1021 list_for_each_entry(edge, &node->upper, list[LOWER]) 1022 list_add_tail(&edge->list[UPPER], &list); 1023 1024 while (!list_empty(&list)) { 1025 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 1026 list_del_init(&edge->list[UPPER]); 1027 upper = edge->node[UPPER]; 1028 if (upper->detached) { 1029 list_del(&edge->list[LOWER]); 1030 lower = edge->node[LOWER]; 1031 free_backref_edge(cache, edge); 1032 if (list_empty(&lower->upper)) 1033 list_add(&lower->list, &useless); 1034 continue; 1035 } 1036 1037 if (!RB_EMPTY_NODE(&upper->rb_node)) { 1038 if (upper->lowest) { 1039 list_del_init(&upper->lower); 1040 upper->lowest = 0; 1041 } 1042 1043 list_add_tail(&edge->list[UPPER], &upper->lower); 1044 continue; 1045 } 1046 1047 BUG_ON(!upper->checked); 1048 BUG_ON(cowonly != upper->cowonly); 1049 if (!cowonly) { 1050 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 1051 &upper->rb_node); 1052 if (rb_node) 1053 backref_tree_panic(rb_node, -EEXIST, 1054 upper->bytenr); 1055 } 1056 1057 list_add_tail(&edge->list[UPPER], &upper->lower); 1058 1059 list_for_each_entry(edge, &upper->upper, list[LOWER]) 1060 list_add_tail(&edge->list[UPPER], &list); 1061 } 1062 /* 1063 * process useless backref nodes. backref nodes for tree leaves 1064 * are deleted from the cache. backref nodes for upper level 1065 * tree blocks are left in the cache to avoid unnecessary backref 1066 * lookup. 1067 */ 1068 while (!list_empty(&useless)) { 1069 upper = list_entry(useless.next, struct backref_node, list); 1070 list_del_init(&upper->list); 1071 BUG_ON(!list_empty(&upper->upper)); 1072 if (upper == node) 1073 node = NULL; 1074 if (upper->lowest) { 1075 list_del_init(&upper->lower); 1076 upper->lowest = 0; 1077 } 1078 while (!list_empty(&upper->lower)) { 1079 edge = list_entry(upper->lower.next, 1080 struct backref_edge, list[UPPER]); 1081 list_del(&edge->list[UPPER]); 1082 list_del(&edge->list[LOWER]); 1083 lower = edge->node[LOWER]; 1084 free_backref_edge(cache, edge); 1085 1086 if (list_empty(&lower->upper)) 1087 list_add(&lower->list, &useless); 1088 } 1089 __mark_block_processed(rc, upper); 1090 if (upper->level > 0) { 1091 list_add(&upper->list, &cache->detached); 1092 upper->detached = 1; 1093 } else { 1094 rb_erase(&upper->rb_node, &cache->rb_root); 1095 free_backref_node(cache, upper); 1096 } 1097 } 1098 out: 1099 btrfs_free_path(path1); 1100 btrfs_free_path(path2); 1101 if (err) { 1102 while (!list_empty(&useless)) { 1103 lower = list_entry(useless.next, 1104 struct backref_node, upper); 1105 list_del_init(&lower->upper); 1106 } 1107 upper = node; 1108 INIT_LIST_HEAD(&list); 1109 while (upper) { 1110 if (RB_EMPTY_NODE(&upper->rb_node)) { 1111 list_splice_tail(&upper->upper, &list); 1112 free_backref_node(cache, upper); 1113 } 1114 1115 if (list_empty(&list)) 1116 break; 1117 1118 edge = list_entry(list.next, struct backref_edge, 1119 list[LOWER]); 1120 list_del(&edge->list[LOWER]); 1121 upper = edge->node[UPPER]; 1122 free_backref_edge(cache, edge); 1123 } 1124 return ERR_PTR(err); 1125 } 1126 BUG_ON(node && node->detached); 1127 return node; 1128 } 1129 1130 /* 1131 * helper to add backref node for the newly created snapshot. 1132 * the backref node is created by cloning backref node that 1133 * corresponds to root of source tree 1134 */ 1135 static int clone_backref_node(struct btrfs_trans_handle *trans, 1136 struct reloc_control *rc, 1137 struct btrfs_root *src, 1138 struct btrfs_root *dest) 1139 { 1140 struct btrfs_root *reloc_root = src->reloc_root; 1141 struct backref_cache *cache = &rc->backref_cache; 1142 struct backref_node *node = NULL; 1143 struct backref_node *new_node; 1144 struct backref_edge *edge; 1145 struct backref_edge *new_edge; 1146 struct rb_node *rb_node; 1147 1148 if (cache->last_trans > 0) 1149 update_backref_cache(trans, cache); 1150 1151 rb_node = tree_search(&cache->rb_root, src->commit_root->start); 1152 if (rb_node) { 1153 node = rb_entry(rb_node, struct backref_node, rb_node); 1154 if (node->detached) 1155 node = NULL; 1156 else 1157 BUG_ON(node->new_bytenr != reloc_root->node->start); 1158 } 1159 1160 if (!node) { 1161 rb_node = tree_search(&cache->rb_root, 1162 reloc_root->commit_root->start); 1163 if (rb_node) { 1164 node = rb_entry(rb_node, struct backref_node, 1165 rb_node); 1166 BUG_ON(node->detached); 1167 } 1168 } 1169 1170 if (!node) 1171 return 0; 1172 1173 new_node = alloc_backref_node(cache); 1174 if (!new_node) 1175 return -ENOMEM; 1176 1177 new_node->bytenr = dest->node->start; 1178 new_node->level = node->level; 1179 new_node->lowest = node->lowest; 1180 new_node->checked = 1; 1181 new_node->root = dest; 1182 1183 if (!node->lowest) { 1184 list_for_each_entry(edge, &node->lower, list[UPPER]) { 1185 new_edge = alloc_backref_edge(cache); 1186 if (!new_edge) 1187 goto fail; 1188 1189 new_edge->node[UPPER] = new_node; 1190 new_edge->node[LOWER] = edge->node[LOWER]; 1191 list_add_tail(&new_edge->list[UPPER], 1192 &new_node->lower); 1193 } 1194 } else { 1195 list_add_tail(&new_node->lower, &cache->leaves); 1196 } 1197 1198 rb_node = tree_insert(&cache->rb_root, new_node->bytenr, 1199 &new_node->rb_node); 1200 if (rb_node) 1201 backref_tree_panic(rb_node, -EEXIST, new_node->bytenr); 1202 1203 if (!new_node->lowest) { 1204 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { 1205 list_add_tail(&new_edge->list[LOWER], 1206 &new_edge->node[LOWER]->upper); 1207 } 1208 } 1209 return 0; 1210 fail: 1211 while (!list_empty(&new_node->lower)) { 1212 new_edge = list_entry(new_node->lower.next, 1213 struct backref_edge, list[UPPER]); 1214 list_del(&new_edge->list[UPPER]); 1215 free_backref_edge(cache, new_edge); 1216 } 1217 free_backref_node(cache, new_node); 1218 return -ENOMEM; 1219 } 1220 1221 /* 1222 * helper to add 'address of tree root -> reloc tree' mapping 1223 */ 1224 static int __must_check __add_reloc_root(struct btrfs_root *root) 1225 { 1226 struct rb_node *rb_node; 1227 struct mapping_node *node; 1228 struct reloc_control *rc = root->fs_info->reloc_ctl; 1229 1230 node = kmalloc(sizeof(*node), GFP_NOFS); 1231 if (!node) 1232 return -ENOMEM; 1233 1234 node->bytenr = root->node->start; 1235 node->data = root; 1236 1237 spin_lock(&rc->reloc_root_tree.lock); 1238 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1239 node->bytenr, &node->rb_node); 1240 spin_unlock(&rc->reloc_root_tree.lock); 1241 if (rb_node) { 1242 btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " 1243 "for start=%llu while inserting into relocation " 1244 "tree\n", node->bytenr); 1245 kfree(node); 1246 return -EEXIST; 1247 } 1248 1249 list_add_tail(&root->root_list, &rc->reloc_roots); 1250 return 0; 1251 } 1252 1253 /* 1254 * helper to update/delete the 'address of tree root -> reloc tree' 1255 * mapping 1256 */ 1257 static int __update_reloc_root(struct btrfs_root *root, int del) 1258 { 1259 struct rb_node *rb_node; 1260 struct mapping_node *node = NULL; 1261 struct reloc_control *rc = root->fs_info->reloc_ctl; 1262 1263 spin_lock(&rc->reloc_root_tree.lock); 1264 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1265 root->commit_root->start); 1266 if (rb_node) { 1267 node = rb_entry(rb_node, struct mapping_node, rb_node); 1268 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1269 } 1270 spin_unlock(&rc->reloc_root_tree.lock); 1271 1272 if (!node) 1273 return 0; 1274 BUG_ON((struct btrfs_root *)node->data != root); 1275 1276 if (!del) { 1277 spin_lock(&rc->reloc_root_tree.lock); 1278 node->bytenr = root->node->start; 1279 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1280 node->bytenr, &node->rb_node); 1281 spin_unlock(&rc->reloc_root_tree.lock); 1282 if (rb_node) 1283 backref_tree_panic(rb_node, -EEXIST, node->bytenr); 1284 } else { 1285 spin_lock(&root->fs_info->trans_lock); 1286 list_del_init(&root->root_list); 1287 spin_unlock(&root->fs_info->trans_lock); 1288 kfree(node); 1289 } 1290 return 0; 1291 } 1292 1293 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, 1294 struct btrfs_root *root, u64 objectid) 1295 { 1296 struct btrfs_root *reloc_root; 1297 struct extent_buffer *eb; 1298 struct btrfs_root_item *root_item; 1299 struct btrfs_key root_key; 1300 int ret; 1301 1302 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 1303 BUG_ON(!root_item); 1304 1305 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 1306 root_key.type = BTRFS_ROOT_ITEM_KEY; 1307 root_key.offset = objectid; 1308 1309 if (root->root_key.objectid == objectid) { 1310 /* called by btrfs_init_reloc_root */ 1311 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 1312 BTRFS_TREE_RELOC_OBJECTID); 1313 BUG_ON(ret); 1314 1315 btrfs_set_root_last_snapshot(&root->root_item, 1316 trans->transid - 1); 1317 } else { 1318 /* 1319 * called by btrfs_reloc_post_snapshot_hook. 1320 * the source tree is a reloc tree, all tree blocks 1321 * modified after it was created have RELOC flag 1322 * set in their headers. so it's OK to not update 1323 * the 'last_snapshot'. 1324 */ 1325 ret = btrfs_copy_root(trans, root, root->node, &eb, 1326 BTRFS_TREE_RELOC_OBJECTID); 1327 BUG_ON(ret); 1328 } 1329 1330 memcpy(root_item, &root->root_item, sizeof(*root_item)); 1331 btrfs_set_root_bytenr(root_item, eb->start); 1332 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 1333 btrfs_set_root_generation(root_item, trans->transid); 1334 1335 if (root->root_key.objectid == objectid) { 1336 btrfs_set_root_refs(root_item, 0); 1337 memset(&root_item->drop_progress, 0, 1338 sizeof(struct btrfs_disk_key)); 1339 root_item->drop_level = 0; 1340 } 1341 1342 btrfs_tree_unlock(eb); 1343 free_extent_buffer(eb); 1344 1345 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 1346 &root_key, root_item); 1347 BUG_ON(ret); 1348 kfree(root_item); 1349 1350 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 1351 &root_key); 1352 BUG_ON(IS_ERR(reloc_root)); 1353 reloc_root->last_trans = trans->transid; 1354 return reloc_root; 1355 } 1356 1357 /* 1358 * create reloc tree for a given fs tree. reloc tree is just a 1359 * snapshot of the fs tree with special root objectid. 1360 */ 1361 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 1362 struct btrfs_root *root) 1363 { 1364 struct btrfs_root *reloc_root; 1365 struct reloc_control *rc = root->fs_info->reloc_ctl; 1366 int clear_rsv = 0; 1367 int ret; 1368 1369 if (root->reloc_root) { 1370 reloc_root = root->reloc_root; 1371 reloc_root->last_trans = trans->transid; 1372 return 0; 1373 } 1374 1375 if (!rc || !rc->create_reloc_tree || 1376 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1377 return 0; 1378 1379 if (!trans->block_rsv) { 1380 trans->block_rsv = rc->block_rsv; 1381 clear_rsv = 1; 1382 } 1383 reloc_root = create_reloc_root(trans, root, root->root_key.objectid); 1384 if (clear_rsv) 1385 trans->block_rsv = NULL; 1386 1387 ret = __add_reloc_root(reloc_root); 1388 BUG_ON(ret < 0); 1389 root->reloc_root = reloc_root; 1390 return 0; 1391 } 1392 1393 /* 1394 * update root item of reloc tree 1395 */ 1396 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 1397 struct btrfs_root *root) 1398 { 1399 struct btrfs_root *reloc_root; 1400 struct btrfs_root_item *root_item; 1401 int del = 0; 1402 int ret; 1403 1404 if (!root->reloc_root) 1405 goto out; 1406 1407 reloc_root = root->reloc_root; 1408 root_item = &reloc_root->root_item; 1409 1410 if (root->fs_info->reloc_ctl->merge_reloc_tree && 1411 btrfs_root_refs(root_item) == 0) { 1412 root->reloc_root = NULL; 1413 del = 1; 1414 } 1415 1416 __update_reloc_root(reloc_root, del); 1417 1418 if (reloc_root->commit_root != reloc_root->node) { 1419 btrfs_set_root_node(root_item, reloc_root->node); 1420 free_extent_buffer(reloc_root->commit_root); 1421 reloc_root->commit_root = btrfs_root_node(reloc_root); 1422 } 1423 1424 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1425 &reloc_root->root_key, root_item); 1426 BUG_ON(ret); 1427 1428 out: 1429 return 0; 1430 } 1431 1432 /* 1433 * helper to find first cached inode with inode number >= objectid 1434 * in a subvolume 1435 */ 1436 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 1437 { 1438 struct rb_node *node; 1439 struct rb_node *prev; 1440 struct btrfs_inode *entry; 1441 struct inode *inode; 1442 1443 spin_lock(&root->inode_lock); 1444 again: 1445 node = root->inode_tree.rb_node; 1446 prev = NULL; 1447 while (node) { 1448 prev = node; 1449 entry = rb_entry(node, struct btrfs_inode, rb_node); 1450 1451 if (objectid < btrfs_ino(&entry->vfs_inode)) 1452 node = node->rb_left; 1453 else if (objectid > btrfs_ino(&entry->vfs_inode)) 1454 node = node->rb_right; 1455 else 1456 break; 1457 } 1458 if (!node) { 1459 while (prev) { 1460 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1461 if (objectid <= btrfs_ino(&entry->vfs_inode)) { 1462 node = prev; 1463 break; 1464 } 1465 prev = rb_next(prev); 1466 } 1467 } 1468 while (node) { 1469 entry = rb_entry(node, struct btrfs_inode, rb_node); 1470 inode = igrab(&entry->vfs_inode); 1471 if (inode) { 1472 spin_unlock(&root->inode_lock); 1473 return inode; 1474 } 1475 1476 objectid = btrfs_ino(&entry->vfs_inode) + 1; 1477 if (cond_resched_lock(&root->inode_lock)) 1478 goto again; 1479 1480 node = rb_next(node); 1481 } 1482 spin_unlock(&root->inode_lock); 1483 return NULL; 1484 } 1485 1486 static int in_block_group(u64 bytenr, 1487 struct btrfs_block_group_cache *block_group) 1488 { 1489 if (bytenr >= block_group->key.objectid && 1490 bytenr < block_group->key.objectid + block_group->key.offset) 1491 return 1; 1492 return 0; 1493 } 1494 1495 /* 1496 * get new location of data 1497 */ 1498 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1499 u64 bytenr, u64 num_bytes) 1500 { 1501 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1502 struct btrfs_path *path; 1503 struct btrfs_file_extent_item *fi; 1504 struct extent_buffer *leaf; 1505 int ret; 1506 1507 path = btrfs_alloc_path(); 1508 if (!path) 1509 return -ENOMEM; 1510 1511 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1512 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode), 1513 bytenr, 0); 1514 if (ret < 0) 1515 goto out; 1516 if (ret > 0) { 1517 ret = -ENOENT; 1518 goto out; 1519 } 1520 1521 leaf = path->nodes[0]; 1522 fi = btrfs_item_ptr(leaf, path->slots[0], 1523 struct btrfs_file_extent_item); 1524 1525 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1526 btrfs_file_extent_compression(leaf, fi) || 1527 btrfs_file_extent_encryption(leaf, fi) || 1528 btrfs_file_extent_other_encoding(leaf, fi)); 1529 1530 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1531 ret = 1; 1532 goto out; 1533 } 1534 1535 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1536 ret = 0; 1537 out: 1538 btrfs_free_path(path); 1539 return ret; 1540 } 1541 1542 /* 1543 * update file extent items in the tree leaf to point to 1544 * the new locations. 1545 */ 1546 static noinline_for_stack 1547 int replace_file_extents(struct btrfs_trans_handle *trans, 1548 struct reloc_control *rc, 1549 struct btrfs_root *root, 1550 struct extent_buffer *leaf) 1551 { 1552 struct btrfs_key key; 1553 struct btrfs_file_extent_item *fi; 1554 struct inode *inode = NULL; 1555 u64 parent; 1556 u64 bytenr; 1557 u64 new_bytenr = 0; 1558 u64 num_bytes; 1559 u64 end; 1560 u32 nritems; 1561 u32 i; 1562 int ret; 1563 int first = 1; 1564 int dirty = 0; 1565 1566 if (rc->stage != UPDATE_DATA_PTRS) 1567 return 0; 1568 1569 /* reloc trees always use full backref */ 1570 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1571 parent = leaf->start; 1572 else 1573 parent = 0; 1574 1575 nritems = btrfs_header_nritems(leaf); 1576 for (i = 0; i < nritems; i++) { 1577 cond_resched(); 1578 btrfs_item_key_to_cpu(leaf, &key, i); 1579 if (key.type != BTRFS_EXTENT_DATA_KEY) 1580 continue; 1581 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1582 if (btrfs_file_extent_type(leaf, fi) == 1583 BTRFS_FILE_EXTENT_INLINE) 1584 continue; 1585 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1586 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1587 if (bytenr == 0) 1588 continue; 1589 if (!in_block_group(bytenr, rc->block_group)) 1590 continue; 1591 1592 /* 1593 * if we are modifying block in fs tree, wait for readpage 1594 * to complete and drop the extent cache 1595 */ 1596 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1597 if (first) { 1598 inode = find_next_inode(root, key.objectid); 1599 first = 0; 1600 } else if (inode && btrfs_ino(inode) < key.objectid) { 1601 btrfs_add_delayed_iput(inode); 1602 inode = find_next_inode(root, key.objectid); 1603 } 1604 if (inode && btrfs_ino(inode) == key.objectid) { 1605 end = key.offset + 1606 btrfs_file_extent_num_bytes(leaf, fi); 1607 WARN_ON(!IS_ALIGNED(key.offset, 1608 root->sectorsize)); 1609 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1610 end--; 1611 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1612 key.offset, end); 1613 if (!ret) 1614 continue; 1615 1616 btrfs_drop_extent_cache(inode, key.offset, end, 1617 1); 1618 unlock_extent(&BTRFS_I(inode)->io_tree, 1619 key.offset, end); 1620 } 1621 } 1622 1623 ret = get_new_location(rc->data_inode, &new_bytenr, 1624 bytenr, num_bytes); 1625 if (ret > 0) { 1626 WARN_ON(1); 1627 continue; 1628 } 1629 BUG_ON(ret < 0); 1630 1631 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1632 dirty = 1; 1633 1634 key.offset -= btrfs_file_extent_offset(leaf, fi); 1635 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1636 num_bytes, parent, 1637 btrfs_header_owner(leaf), 1638 key.objectid, key.offset, 1); 1639 BUG_ON(ret); 1640 1641 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1642 parent, btrfs_header_owner(leaf), 1643 key.objectid, key.offset, 1); 1644 BUG_ON(ret); 1645 } 1646 if (dirty) 1647 btrfs_mark_buffer_dirty(leaf); 1648 if (inode) 1649 btrfs_add_delayed_iput(inode); 1650 return 0; 1651 } 1652 1653 static noinline_for_stack 1654 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1655 struct btrfs_path *path, int level) 1656 { 1657 struct btrfs_disk_key key1; 1658 struct btrfs_disk_key key2; 1659 btrfs_node_key(eb, &key1, slot); 1660 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1661 return memcmp(&key1, &key2, sizeof(key1)); 1662 } 1663 1664 /* 1665 * try to replace tree blocks in fs tree with the new blocks 1666 * in reloc tree. tree blocks haven't been modified since the 1667 * reloc tree was create can be replaced. 1668 * 1669 * if a block was replaced, level of the block + 1 is returned. 1670 * if no block got replaced, 0 is returned. if there are other 1671 * errors, a negative error number is returned. 1672 */ 1673 static noinline_for_stack 1674 int replace_path(struct btrfs_trans_handle *trans, 1675 struct btrfs_root *dest, struct btrfs_root *src, 1676 struct btrfs_path *path, struct btrfs_key *next_key, 1677 int lowest_level, int max_level) 1678 { 1679 struct extent_buffer *eb; 1680 struct extent_buffer *parent; 1681 struct btrfs_key key; 1682 u64 old_bytenr; 1683 u64 new_bytenr; 1684 u64 old_ptr_gen; 1685 u64 new_ptr_gen; 1686 u64 last_snapshot; 1687 u32 blocksize; 1688 int cow = 0; 1689 int level; 1690 int ret; 1691 int slot; 1692 1693 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1694 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1695 1696 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1697 again: 1698 slot = path->slots[lowest_level]; 1699 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1700 1701 eb = btrfs_lock_root_node(dest); 1702 btrfs_set_lock_blocking(eb); 1703 level = btrfs_header_level(eb); 1704 1705 if (level < lowest_level) { 1706 btrfs_tree_unlock(eb); 1707 free_extent_buffer(eb); 1708 return 0; 1709 } 1710 1711 if (cow) { 1712 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1713 BUG_ON(ret); 1714 } 1715 btrfs_set_lock_blocking(eb); 1716 1717 if (next_key) { 1718 next_key->objectid = (u64)-1; 1719 next_key->type = (u8)-1; 1720 next_key->offset = (u64)-1; 1721 } 1722 1723 parent = eb; 1724 while (1) { 1725 level = btrfs_header_level(parent); 1726 BUG_ON(level < lowest_level); 1727 1728 ret = btrfs_bin_search(parent, &key, level, &slot); 1729 if (ret && slot > 0) 1730 slot--; 1731 1732 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1733 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1734 1735 old_bytenr = btrfs_node_blockptr(parent, slot); 1736 blocksize = btrfs_level_size(dest, level - 1); 1737 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1738 1739 if (level <= max_level) { 1740 eb = path->nodes[level]; 1741 new_bytenr = btrfs_node_blockptr(eb, 1742 path->slots[level]); 1743 new_ptr_gen = btrfs_node_ptr_generation(eb, 1744 path->slots[level]); 1745 } else { 1746 new_bytenr = 0; 1747 new_ptr_gen = 0; 1748 } 1749 1750 if (new_bytenr > 0 && new_bytenr == old_bytenr) { 1751 WARN_ON(1); 1752 ret = level; 1753 break; 1754 } 1755 1756 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1757 memcmp_node_keys(parent, slot, path, level)) { 1758 if (level <= lowest_level) { 1759 ret = 0; 1760 break; 1761 } 1762 1763 eb = read_tree_block(dest, old_bytenr, blocksize, 1764 old_ptr_gen); 1765 BUG_ON(!eb); 1766 btrfs_tree_lock(eb); 1767 if (cow) { 1768 ret = btrfs_cow_block(trans, dest, eb, parent, 1769 slot, &eb); 1770 BUG_ON(ret); 1771 } 1772 btrfs_set_lock_blocking(eb); 1773 1774 btrfs_tree_unlock(parent); 1775 free_extent_buffer(parent); 1776 1777 parent = eb; 1778 continue; 1779 } 1780 1781 if (!cow) { 1782 btrfs_tree_unlock(parent); 1783 free_extent_buffer(parent); 1784 cow = 1; 1785 goto again; 1786 } 1787 1788 btrfs_node_key_to_cpu(path->nodes[level], &key, 1789 path->slots[level]); 1790 btrfs_release_path(path); 1791 1792 path->lowest_level = level; 1793 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1794 path->lowest_level = 0; 1795 BUG_ON(ret); 1796 1797 /* 1798 * swap blocks in fs tree and reloc tree. 1799 */ 1800 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1801 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1802 btrfs_mark_buffer_dirty(parent); 1803 1804 btrfs_set_node_blockptr(path->nodes[level], 1805 path->slots[level], old_bytenr); 1806 btrfs_set_node_ptr_generation(path->nodes[level], 1807 path->slots[level], old_ptr_gen); 1808 btrfs_mark_buffer_dirty(path->nodes[level]); 1809 1810 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, 1811 path->nodes[level]->start, 1812 src->root_key.objectid, level - 1, 0, 1813 1); 1814 BUG_ON(ret); 1815 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 1816 0, dest->root_key.objectid, level - 1, 1817 0, 1); 1818 BUG_ON(ret); 1819 1820 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1821 path->nodes[level]->start, 1822 src->root_key.objectid, level - 1, 0, 1823 1); 1824 BUG_ON(ret); 1825 1826 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1827 0, dest->root_key.objectid, level - 1, 1828 0, 1); 1829 BUG_ON(ret); 1830 1831 btrfs_unlock_up_safe(path, 0); 1832 1833 ret = level; 1834 break; 1835 } 1836 btrfs_tree_unlock(parent); 1837 free_extent_buffer(parent); 1838 return ret; 1839 } 1840 1841 /* 1842 * helper to find next relocated block in reloc tree 1843 */ 1844 static noinline_for_stack 1845 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1846 int *level) 1847 { 1848 struct extent_buffer *eb; 1849 int i; 1850 u64 last_snapshot; 1851 u32 nritems; 1852 1853 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1854 1855 for (i = 0; i < *level; i++) { 1856 free_extent_buffer(path->nodes[i]); 1857 path->nodes[i] = NULL; 1858 } 1859 1860 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1861 eb = path->nodes[i]; 1862 nritems = btrfs_header_nritems(eb); 1863 while (path->slots[i] + 1 < nritems) { 1864 path->slots[i]++; 1865 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1866 last_snapshot) 1867 continue; 1868 1869 *level = i; 1870 return 0; 1871 } 1872 free_extent_buffer(path->nodes[i]); 1873 path->nodes[i] = NULL; 1874 } 1875 return 1; 1876 } 1877 1878 /* 1879 * walk down reloc tree to find relocated block of lowest level 1880 */ 1881 static noinline_for_stack 1882 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1883 int *level) 1884 { 1885 struct extent_buffer *eb = NULL; 1886 int i; 1887 u64 bytenr; 1888 u64 ptr_gen = 0; 1889 u64 last_snapshot; 1890 u32 blocksize; 1891 u32 nritems; 1892 1893 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1894 1895 for (i = *level; i > 0; i--) { 1896 eb = path->nodes[i]; 1897 nritems = btrfs_header_nritems(eb); 1898 while (path->slots[i] < nritems) { 1899 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1900 if (ptr_gen > last_snapshot) 1901 break; 1902 path->slots[i]++; 1903 } 1904 if (path->slots[i] >= nritems) { 1905 if (i == *level) 1906 break; 1907 *level = i + 1; 1908 return 0; 1909 } 1910 if (i == 1) { 1911 *level = i; 1912 return 0; 1913 } 1914 1915 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1916 blocksize = btrfs_level_size(root, i - 1); 1917 eb = read_tree_block(root, bytenr, blocksize, ptr_gen); 1918 BUG_ON(btrfs_header_level(eb) != i - 1); 1919 path->nodes[i - 1] = eb; 1920 path->slots[i - 1] = 0; 1921 } 1922 return 1; 1923 } 1924 1925 /* 1926 * invalidate extent cache for file extents whose key in range of 1927 * [min_key, max_key) 1928 */ 1929 static int invalidate_extent_cache(struct btrfs_root *root, 1930 struct btrfs_key *min_key, 1931 struct btrfs_key *max_key) 1932 { 1933 struct inode *inode = NULL; 1934 u64 objectid; 1935 u64 start, end; 1936 u64 ino; 1937 1938 objectid = min_key->objectid; 1939 while (1) { 1940 cond_resched(); 1941 iput(inode); 1942 1943 if (objectid > max_key->objectid) 1944 break; 1945 1946 inode = find_next_inode(root, objectid); 1947 if (!inode) 1948 break; 1949 ino = btrfs_ino(inode); 1950 1951 if (ino > max_key->objectid) { 1952 iput(inode); 1953 break; 1954 } 1955 1956 objectid = ino + 1; 1957 if (!S_ISREG(inode->i_mode)) 1958 continue; 1959 1960 if (unlikely(min_key->objectid == ino)) { 1961 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1962 continue; 1963 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1964 start = 0; 1965 else { 1966 start = min_key->offset; 1967 WARN_ON(!IS_ALIGNED(start, root->sectorsize)); 1968 } 1969 } else { 1970 start = 0; 1971 } 1972 1973 if (unlikely(max_key->objectid == ino)) { 1974 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1975 continue; 1976 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1977 end = (u64)-1; 1978 } else { 1979 if (max_key->offset == 0) 1980 continue; 1981 end = max_key->offset; 1982 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1983 end--; 1984 } 1985 } else { 1986 end = (u64)-1; 1987 } 1988 1989 /* the lock_extent waits for readpage to complete */ 1990 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 1991 btrfs_drop_extent_cache(inode, start, end, 1); 1992 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 1993 } 1994 return 0; 1995 } 1996 1997 static int find_next_key(struct btrfs_path *path, int level, 1998 struct btrfs_key *key) 1999 2000 { 2001 while (level < BTRFS_MAX_LEVEL) { 2002 if (!path->nodes[level]) 2003 break; 2004 if (path->slots[level] + 1 < 2005 btrfs_header_nritems(path->nodes[level])) { 2006 btrfs_node_key_to_cpu(path->nodes[level], key, 2007 path->slots[level] + 1); 2008 return 0; 2009 } 2010 level++; 2011 } 2012 return 1; 2013 } 2014 2015 /* 2016 * merge the relocated tree blocks in reloc tree with corresponding 2017 * fs tree. 2018 */ 2019 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 2020 struct btrfs_root *root) 2021 { 2022 LIST_HEAD(inode_list); 2023 struct btrfs_key key; 2024 struct btrfs_key next_key; 2025 struct btrfs_trans_handle *trans; 2026 struct btrfs_root *reloc_root; 2027 struct btrfs_root_item *root_item; 2028 struct btrfs_path *path; 2029 struct extent_buffer *leaf; 2030 int level; 2031 int max_level; 2032 int replaced = 0; 2033 int ret; 2034 int err = 0; 2035 u32 min_reserved; 2036 2037 path = btrfs_alloc_path(); 2038 if (!path) 2039 return -ENOMEM; 2040 path->reada = 1; 2041 2042 reloc_root = root->reloc_root; 2043 root_item = &reloc_root->root_item; 2044 2045 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 2046 level = btrfs_root_level(root_item); 2047 extent_buffer_get(reloc_root->node); 2048 path->nodes[level] = reloc_root->node; 2049 path->slots[level] = 0; 2050 } else { 2051 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 2052 2053 level = root_item->drop_level; 2054 BUG_ON(level == 0); 2055 path->lowest_level = level; 2056 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 2057 path->lowest_level = 0; 2058 if (ret < 0) { 2059 btrfs_free_path(path); 2060 return ret; 2061 } 2062 2063 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 2064 path->slots[level]); 2065 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 2066 2067 btrfs_unlock_up_safe(path, 0); 2068 } 2069 2070 min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2071 memset(&next_key, 0, sizeof(next_key)); 2072 2073 while (1) { 2074 trans = btrfs_start_transaction(root, 0); 2075 BUG_ON(IS_ERR(trans)); 2076 trans->block_rsv = rc->block_rsv; 2077 2078 ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, 2079 BTRFS_RESERVE_FLUSH_ALL); 2080 if (ret) { 2081 BUG_ON(ret != -EAGAIN); 2082 ret = btrfs_commit_transaction(trans, root); 2083 BUG_ON(ret); 2084 continue; 2085 } 2086 2087 replaced = 0; 2088 max_level = level; 2089 2090 ret = walk_down_reloc_tree(reloc_root, path, &level); 2091 if (ret < 0) { 2092 err = ret; 2093 goto out; 2094 } 2095 if (ret > 0) 2096 break; 2097 2098 if (!find_next_key(path, level, &key) && 2099 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 2100 ret = 0; 2101 } else { 2102 ret = replace_path(trans, root, reloc_root, path, 2103 &next_key, level, max_level); 2104 } 2105 if (ret < 0) { 2106 err = ret; 2107 goto out; 2108 } 2109 2110 if (ret > 0) { 2111 level = ret; 2112 btrfs_node_key_to_cpu(path->nodes[level], &key, 2113 path->slots[level]); 2114 replaced = 1; 2115 } 2116 2117 ret = walk_up_reloc_tree(reloc_root, path, &level); 2118 if (ret > 0) 2119 break; 2120 2121 BUG_ON(level == 0); 2122 /* 2123 * save the merging progress in the drop_progress. 2124 * this is OK since root refs == 1 in this case. 2125 */ 2126 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 2127 path->slots[level]); 2128 root_item->drop_level = level; 2129 2130 btrfs_end_transaction_throttle(trans, root); 2131 2132 btrfs_btree_balance_dirty(root); 2133 2134 if (replaced && rc->stage == UPDATE_DATA_PTRS) 2135 invalidate_extent_cache(root, &key, &next_key); 2136 } 2137 2138 /* 2139 * handle the case only one block in the fs tree need to be 2140 * relocated and the block is tree root. 2141 */ 2142 leaf = btrfs_lock_root_node(root); 2143 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 2144 btrfs_tree_unlock(leaf); 2145 free_extent_buffer(leaf); 2146 if (ret < 0) 2147 err = ret; 2148 out: 2149 btrfs_free_path(path); 2150 2151 if (err == 0) { 2152 memset(&root_item->drop_progress, 0, 2153 sizeof(root_item->drop_progress)); 2154 root_item->drop_level = 0; 2155 btrfs_set_root_refs(root_item, 0); 2156 btrfs_update_reloc_root(trans, root); 2157 } 2158 2159 btrfs_end_transaction_throttle(trans, root); 2160 2161 btrfs_btree_balance_dirty(root); 2162 2163 if (replaced && rc->stage == UPDATE_DATA_PTRS) 2164 invalidate_extent_cache(root, &key, &next_key); 2165 2166 return err; 2167 } 2168 2169 static noinline_for_stack 2170 int prepare_to_merge(struct reloc_control *rc, int err) 2171 { 2172 struct btrfs_root *root = rc->extent_root; 2173 struct btrfs_root *reloc_root; 2174 struct btrfs_trans_handle *trans; 2175 LIST_HEAD(reloc_roots); 2176 u64 num_bytes = 0; 2177 int ret; 2178 2179 mutex_lock(&root->fs_info->reloc_mutex); 2180 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2181 rc->merging_rsv_size += rc->nodes_relocated * 2; 2182 mutex_unlock(&root->fs_info->reloc_mutex); 2183 2184 again: 2185 if (!err) { 2186 num_bytes = rc->merging_rsv_size; 2187 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, 2188 BTRFS_RESERVE_FLUSH_ALL); 2189 if (ret) 2190 err = ret; 2191 } 2192 2193 trans = btrfs_join_transaction(rc->extent_root); 2194 if (IS_ERR(trans)) { 2195 if (!err) 2196 btrfs_block_rsv_release(rc->extent_root, 2197 rc->block_rsv, num_bytes); 2198 return PTR_ERR(trans); 2199 } 2200 2201 if (!err) { 2202 if (num_bytes != rc->merging_rsv_size) { 2203 btrfs_end_transaction(trans, rc->extent_root); 2204 btrfs_block_rsv_release(rc->extent_root, 2205 rc->block_rsv, num_bytes); 2206 goto again; 2207 } 2208 } 2209 2210 rc->merge_reloc_tree = 1; 2211 2212 while (!list_empty(&rc->reloc_roots)) { 2213 reloc_root = list_entry(rc->reloc_roots.next, 2214 struct btrfs_root, root_list); 2215 list_del_init(&reloc_root->root_list); 2216 2217 root = read_fs_root(reloc_root->fs_info, 2218 reloc_root->root_key.offset); 2219 BUG_ON(IS_ERR(root)); 2220 BUG_ON(root->reloc_root != reloc_root); 2221 2222 /* 2223 * set reference count to 1, so btrfs_recover_relocation 2224 * knows it should resumes merging 2225 */ 2226 if (!err) 2227 btrfs_set_root_refs(&reloc_root->root_item, 1); 2228 btrfs_update_reloc_root(trans, root); 2229 2230 list_add(&reloc_root->root_list, &reloc_roots); 2231 } 2232 2233 list_splice(&reloc_roots, &rc->reloc_roots); 2234 2235 if (!err) 2236 btrfs_commit_transaction(trans, rc->extent_root); 2237 else 2238 btrfs_end_transaction(trans, rc->extent_root); 2239 return err; 2240 } 2241 2242 static noinline_for_stack 2243 void free_reloc_roots(struct list_head *list) 2244 { 2245 struct btrfs_root *reloc_root; 2246 2247 while (!list_empty(list)) { 2248 reloc_root = list_entry(list->next, struct btrfs_root, 2249 root_list); 2250 __update_reloc_root(reloc_root, 1); 2251 free_extent_buffer(reloc_root->node); 2252 free_extent_buffer(reloc_root->commit_root); 2253 kfree(reloc_root); 2254 } 2255 } 2256 2257 static noinline_for_stack 2258 int merge_reloc_roots(struct reloc_control *rc) 2259 { 2260 struct btrfs_root *root; 2261 struct btrfs_root *reloc_root; 2262 LIST_HEAD(reloc_roots); 2263 int found = 0; 2264 int ret = 0; 2265 again: 2266 root = rc->extent_root; 2267 2268 /* 2269 * this serializes us with btrfs_record_root_in_transaction, 2270 * we have to make sure nobody is in the middle of 2271 * adding their roots to the list while we are 2272 * doing this splice 2273 */ 2274 mutex_lock(&root->fs_info->reloc_mutex); 2275 list_splice_init(&rc->reloc_roots, &reloc_roots); 2276 mutex_unlock(&root->fs_info->reloc_mutex); 2277 2278 while (!list_empty(&reloc_roots)) { 2279 found = 1; 2280 reloc_root = list_entry(reloc_roots.next, 2281 struct btrfs_root, root_list); 2282 2283 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 2284 root = read_fs_root(reloc_root->fs_info, 2285 reloc_root->root_key.offset); 2286 BUG_ON(IS_ERR(root)); 2287 BUG_ON(root->reloc_root != reloc_root); 2288 2289 ret = merge_reloc_root(rc, root); 2290 if (ret) 2291 goto out; 2292 } else { 2293 list_del_init(&reloc_root->root_list); 2294 } 2295 ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); 2296 if (ret < 0) { 2297 if (list_empty(&reloc_root->root_list)) 2298 list_add_tail(&reloc_root->root_list, 2299 &reloc_roots); 2300 goto out; 2301 } 2302 } 2303 2304 if (found) { 2305 found = 0; 2306 goto again; 2307 } 2308 out: 2309 if (ret) { 2310 btrfs_std_error(root->fs_info, ret); 2311 if (!list_empty(&reloc_roots)) 2312 free_reloc_roots(&reloc_roots); 2313 } 2314 2315 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 2316 return ret; 2317 } 2318 2319 static void free_block_list(struct rb_root *blocks) 2320 { 2321 struct tree_block *block; 2322 struct rb_node *rb_node; 2323 while ((rb_node = rb_first(blocks))) { 2324 block = rb_entry(rb_node, struct tree_block, rb_node); 2325 rb_erase(rb_node, blocks); 2326 kfree(block); 2327 } 2328 } 2329 2330 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 2331 struct btrfs_root *reloc_root) 2332 { 2333 struct btrfs_root *root; 2334 2335 if (reloc_root->last_trans == trans->transid) 2336 return 0; 2337 2338 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); 2339 BUG_ON(IS_ERR(root)); 2340 BUG_ON(root->reloc_root != reloc_root); 2341 2342 return btrfs_record_root_in_trans(trans, root); 2343 } 2344 2345 static noinline_for_stack 2346 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 2347 struct reloc_control *rc, 2348 struct backref_node *node, 2349 struct backref_edge *edges[], int *nr) 2350 { 2351 struct backref_node *next; 2352 struct btrfs_root *root; 2353 int index = 0; 2354 2355 next = node; 2356 while (1) { 2357 cond_resched(); 2358 next = walk_up_backref(next, edges, &index); 2359 root = next->root; 2360 BUG_ON(!root); 2361 BUG_ON(!root->ref_cows); 2362 2363 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2364 record_reloc_root_in_trans(trans, root); 2365 break; 2366 } 2367 2368 btrfs_record_root_in_trans(trans, root); 2369 root = root->reloc_root; 2370 2371 if (next->new_bytenr != root->node->start) { 2372 BUG_ON(next->new_bytenr); 2373 BUG_ON(!list_empty(&next->list)); 2374 next->new_bytenr = root->node->start; 2375 next->root = root; 2376 list_add_tail(&next->list, 2377 &rc->backref_cache.changed); 2378 __mark_block_processed(rc, next); 2379 break; 2380 } 2381 2382 WARN_ON(1); 2383 root = NULL; 2384 next = walk_down_backref(edges, &index); 2385 if (!next || next->level <= node->level) 2386 break; 2387 } 2388 if (!root) 2389 return NULL; 2390 2391 *nr = index; 2392 next = node; 2393 /* setup backref node path for btrfs_reloc_cow_block */ 2394 while (1) { 2395 rc->backref_cache.path[next->level] = next; 2396 if (--index < 0) 2397 break; 2398 next = edges[index]->node[UPPER]; 2399 } 2400 return root; 2401 } 2402 2403 /* 2404 * select a tree root for relocation. return NULL if the block 2405 * is reference counted. we should use do_relocation() in this 2406 * case. return a tree root pointer if the block isn't reference 2407 * counted. return -ENOENT if the block is root of reloc tree. 2408 */ 2409 static noinline_for_stack 2410 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, 2411 struct backref_node *node) 2412 { 2413 struct backref_node *next; 2414 struct btrfs_root *root; 2415 struct btrfs_root *fs_root = NULL; 2416 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2417 int index = 0; 2418 2419 next = node; 2420 while (1) { 2421 cond_resched(); 2422 next = walk_up_backref(next, edges, &index); 2423 root = next->root; 2424 BUG_ON(!root); 2425 2426 /* no other choice for non-references counted tree */ 2427 if (!root->ref_cows) 2428 return root; 2429 2430 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) 2431 fs_root = root; 2432 2433 if (next != node) 2434 return NULL; 2435 2436 next = walk_down_backref(edges, &index); 2437 if (!next || next->level <= node->level) 2438 break; 2439 } 2440 2441 if (!fs_root) 2442 return ERR_PTR(-ENOENT); 2443 return fs_root; 2444 } 2445 2446 static noinline_for_stack 2447 u64 calcu_metadata_size(struct reloc_control *rc, 2448 struct backref_node *node, int reserve) 2449 { 2450 struct backref_node *next = node; 2451 struct backref_edge *edge; 2452 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2453 u64 num_bytes = 0; 2454 int index = 0; 2455 2456 BUG_ON(reserve && node->processed); 2457 2458 while (next) { 2459 cond_resched(); 2460 while (1) { 2461 if (next->processed && (reserve || next != node)) 2462 break; 2463 2464 num_bytes += btrfs_level_size(rc->extent_root, 2465 next->level); 2466 2467 if (list_empty(&next->upper)) 2468 break; 2469 2470 edge = list_entry(next->upper.next, 2471 struct backref_edge, list[LOWER]); 2472 edges[index++] = edge; 2473 next = edge->node[UPPER]; 2474 } 2475 next = walk_down_backref(edges, &index); 2476 } 2477 return num_bytes; 2478 } 2479 2480 static int reserve_metadata_space(struct btrfs_trans_handle *trans, 2481 struct reloc_control *rc, 2482 struct backref_node *node) 2483 { 2484 struct btrfs_root *root = rc->extent_root; 2485 u64 num_bytes; 2486 int ret; 2487 2488 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2489 2490 trans->block_rsv = rc->block_rsv; 2491 ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, 2492 BTRFS_RESERVE_FLUSH_ALL); 2493 if (ret) { 2494 if (ret == -EAGAIN) 2495 rc->commit_transaction = 1; 2496 return ret; 2497 } 2498 2499 return 0; 2500 } 2501 2502 static void release_metadata_space(struct reloc_control *rc, 2503 struct backref_node *node) 2504 { 2505 u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; 2506 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); 2507 } 2508 2509 /* 2510 * relocate a block tree, and then update pointers in upper level 2511 * blocks that reference the block to point to the new location. 2512 * 2513 * if called by link_to_upper, the block has already been relocated. 2514 * in that case this function just updates pointers. 2515 */ 2516 static int do_relocation(struct btrfs_trans_handle *trans, 2517 struct reloc_control *rc, 2518 struct backref_node *node, 2519 struct btrfs_key *key, 2520 struct btrfs_path *path, int lowest) 2521 { 2522 struct backref_node *upper; 2523 struct backref_edge *edge; 2524 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2525 struct btrfs_root *root; 2526 struct extent_buffer *eb; 2527 u32 blocksize; 2528 u64 bytenr; 2529 u64 generation; 2530 int nr; 2531 int slot; 2532 int ret; 2533 int err = 0; 2534 2535 BUG_ON(lowest && node->eb); 2536 2537 path->lowest_level = node->level + 1; 2538 rc->backref_cache.path[node->level] = node; 2539 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2540 cond_resched(); 2541 2542 upper = edge->node[UPPER]; 2543 root = select_reloc_root(trans, rc, upper, edges, &nr); 2544 BUG_ON(!root); 2545 2546 if (upper->eb && !upper->locked) { 2547 if (!lowest) { 2548 ret = btrfs_bin_search(upper->eb, key, 2549 upper->level, &slot); 2550 BUG_ON(ret); 2551 bytenr = btrfs_node_blockptr(upper->eb, slot); 2552 if (node->eb->start == bytenr) 2553 goto next; 2554 } 2555 drop_node_buffer(upper); 2556 } 2557 2558 if (!upper->eb) { 2559 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2560 if (ret < 0) { 2561 err = ret; 2562 break; 2563 } 2564 BUG_ON(ret > 0); 2565 2566 if (!upper->eb) { 2567 upper->eb = path->nodes[upper->level]; 2568 path->nodes[upper->level] = NULL; 2569 } else { 2570 BUG_ON(upper->eb != path->nodes[upper->level]); 2571 } 2572 2573 upper->locked = 1; 2574 path->locks[upper->level] = 0; 2575 2576 slot = path->slots[upper->level]; 2577 btrfs_release_path(path); 2578 } else { 2579 ret = btrfs_bin_search(upper->eb, key, upper->level, 2580 &slot); 2581 BUG_ON(ret); 2582 } 2583 2584 bytenr = btrfs_node_blockptr(upper->eb, slot); 2585 if (lowest) { 2586 BUG_ON(bytenr != node->bytenr); 2587 } else { 2588 if (node->eb->start == bytenr) 2589 goto next; 2590 } 2591 2592 blocksize = btrfs_level_size(root, node->level); 2593 generation = btrfs_node_ptr_generation(upper->eb, slot); 2594 eb = read_tree_block(root, bytenr, blocksize, generation); 2595 if (!eb) { 2596 err = -EIO; 2597 goto next; 2598 } 2599 btrfs_tree_lock(eb); 2600 btrfs_set_lock_blocking(eb); 2601 2602 if (!node->eb) { 2603 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2604 slot, &eb); 2605 btrfs_tree_unlock(eb); 2606 free_extent_buffer(eb); 2607 if (ret < 0) { 2608 err = ret; 2609 goto next; 2610 } 2611 BUG_ON(node->eb != eb); 2612 } else { 2613 btrfs_set_node_blockptr(upper->eb, slot, 2614 node->eb->start); 2615 btrfs_set_node_ptr_generation(upper->eb, slot, 2616 trans->transid); 2617 btrfs_mark_buffer_dirty(upper->eb); 2618 2619 ret = btrfs_inc_extent_ref(trans, root, 2620 node->eb->start, blocksize, 2621 upper->eb->start, 2622 btrfs_header_owner(upper->eb), 2623 node->level, 0, 1); 2624 BUG_ON(ret); 2625 2626 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2627 BUG_ON(ret); 2628 } 2629 next: 2630 if (!upper->pending) 2631 drop_node_buffer(upper); 2632 else 2633 unlock_node_buffer(upper); 2634 if (err) 2635 break; 2636 } 2637 2638 if (!err && node->pending) { 2639 drop_node_buffer(node); 2640 list_move_tail(&node->list, &rc->backref_cache.changed); 2641 node->pending = 0; 2642 } 2643 2644 path->lowest_level = 0; 2645 BUG_ON(err == -ENOSPC); 2646 return err; 2647 } 2648 2649 static int link_to_upper(struct btrfs_trans_handle *trans, 2650 struct reloc_control *rc, 2651 struct backref_node *node, 2652 struct btrfs_path *path) 2653 { 2654 struct btrfs_key key; 2655 2656 btrfs_node_key_to_cpu(node->eb, &key, 0); 2657 return do_relocation(trans, rc, node, &key, path, 0); 2658 } 2659 2660 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2661 struct reloc_control *rc, 2662 struct btrfs_path *path, int err) 2663 { 2664 LIST_HEAD(list); 2665 struct backref_cache *cache = &rc->backref_cache; 2666 struct backref_node *node; 2667 int level; 2668 int ret; 2669 2670 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2671 while (!list_empty(&cache->pending[level])) { 2672 node = list_entry(cache->pending[level].next, 2673 struct backref_node, list); 2674 list_move_tail(&node->list, &list); 2675 BUG_ON(!node->pending); 2676 2677 if (!err) { 2678 ret = link_to_upper(trans, rc, node, path); 2679 if (ret < 0) 2680 err = ret; 2681 } 2682 } 2683 list_splice_init(&list, &cache->pending[level]); 2684 } 2685 return err; 2686 } 2687 2688 static void mark_block_processed(struct reloc_control *rc, 2689 u64 bytenr, u32 blocksize) 2690 { 2691 set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, 2692 EXTENT_DIRTY, GFP_NOFS); 2693 } 2694 2695 static void __mark_block_processed(struct reloc_control *rc, 2696 struct backref_node *node) 2697 { 2698 u32 blocksize; 2699 if (node->level == 0 || 2700 in_block_group(node->bytenr, rc->block_group)) { 2701 blocksize = btrfs_level_size(rc->extent_root, node->level); 2702 mark_block_processed(rc, node->bytenr, blocksize); 2703 } 2704 node->processed = 1; 2705 } 2706 2707 /* 2708 * mark a block and all blocks directly/indirectly reference the block 2709 * as processed. 2710 */ 2711 static void update_processed_blocks(struct reloc_control *rc, 2712 struct backref_node *node) 2713 { 2714 struct backref_node *next = node; 2715 struct backref_edge *edge; 2716 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2717 int index = 0; 2718 2719 while (next) { 2720 cond_resched(); 2721 while (1) { 2722 if (next->processed) 2723 break; 2724 2725 __mark_block_processed(rc, next); 2726 2727 if (list_empty(&next->upper)) 2728 break; 2729 2730 edge = list_entry(next->upper.next, 2731 struct backref_edge, list[LOWER]); 2732 edges[index++] = edge; 2733 next = edge->node[UPPER]; 2734 } 2735 next = walk_down_backref(edges, &index); 2736 } 2737 } 2738 2739 static int tree_block_processed(u64 bytenr, u32 blocksize, 2740 struct reloc_control *rc) 2741 { 2742 if (test_range_bit(&rc->processed_blocks, bytenr, 2743 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2744 return 1; 2745 return 0; 2746 } 2747 2748 static int get_tree_block_key(struct reloc_control *rc, 2749 struct tree_block *block) 2750 { 2751 struct extent_buffer *eb; 2752 2753 BUG_ON(block->key_ready); 2754 eb = read_tree_block(rc->extent_root, block->bytenr, 2755 block->key.objectid, block->key.offset); 2756 BUG_ON(!eb); 2757 WARN_ON(btrfs_header_level(eb) != block->level); 2758 if (block->level == 0) 2759 btrfs_item_key_to_cpu(eb, &block->key, 0); 2760 else 2761 btrfs_node_key_to_cpu(eb, &block->key, 0); 2762 free_extent_buffer(eb); 2763 block->key_ready = 1; 2764 return 0; 2765 } 2766 2767 static int reada_tree_block(struct reloc_control *rc, 2768 struct tree_block *block) 2769 { 2770 BUG_ON(block->key_ready); 2771 readahead_tree_block(rc->extent_root, block->bytenr, 2772 block->key.objectid, block->key.offset); 2773 return 0; 2774 } 2775 2776 /* 2777 * helper function to relocate a tree block 2778 */ 2779 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2780 struct reloc_control *rc, 2781 struct backref_node *node, 2782 struct btrfs_key *key, 2783 struct btrfs_path *path) 2784 { 2785 struct btrfs_root *root; 2786 int release = 0; 2787 int ret = 0; 2788 2789 if (!node) 2790 return 0; 2791 2792 BUG_ON(node->processed); 2793 root = select_one_root(trans, node); 2794 if (root == ERR_PTR(-ENOENT)) { 2795 update_processed_blocks(rc, node); 2796 goto out; 2797 } 2798 2799 if (!root || root->ref_cows) { 2800 ret = reserve_metadata_space(trans, rc, node); 2801 if (ret) 2802 goto out; 2803 release = 1; 2804 } 2805 2806 if (root) { 2807 if (root->ref_cows) { 2808 BUG_ON(node->new_bytenr); 2809 BUG_ON(!list_empty(&node->list)); 2810 btrfs_record_root_in_trans(trans, root); 2811 root = root->reloc_root; 2812 node->new_bytenr = root->node->start; 2813 node->root = root; 2814 list_add_tail(&node->list, &rc->backref_cache.changed); 2815 } else { 2816 path->lowest_level = node->level; 2817 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2818 btrfs_release_path(path); 2819 if (ret > 0) 2820 ret = 0; 2821 } 2822 if (!ret) 2823 update_processed_blocks(rc, node); 2824 } else { 2825 ret = do_relocation(trans, rc, node, key, path, 1); 2826 } 2827 out: 2828 if (ret || node->level == 0 || node->cowonly) { 2829 if (release) 2830 release_metadata_space(rc, node); 2831 remove_backref_node(&rc->backref_cache, node); 2832 } 2833 return ret; 2834 } 2835 2836 /* 2837 * relocate a list of blocks 2838 */ 2839 static noinline_for_stack 2840 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2841 struct reloc_control *rc, struct rb_root *blocks) 2842 { 2843 struct backref_node *node; 2844 struct btrfs_path *path; 2845 struct tree_block *block; 2846 struct rb_node *rb_node; 2847 int ret; 2848 int err = 0; 2849 2850 path = btrfs_alloc_path(); 2851 if (!path) { 2852 err = -ENOMEM; 2853 goto out_path; 2854 } 2855 2856 rb_node = rb_first(blocks); 2857 while (rb_node) { 2858 block = rb_entry(rb_node, struct tree_block, rb_node); 2859 if (!block->key_ready) 2860 reada_tree_block(rc, block); 2861 rb_node = rb_next(rb_node); 2862 } 2863 2864 rb_node = rb_first(blocks); 2865 while (rb_node) { 2866 block = rb_entry(rb_node, struct tree_block, rb_node); 2867 if (!block->key_ready) 2868 get_tree_block_key(rc, block); 2869 rb_node = rb_next(rb_node); 2870 } 2871 2872 rb_node = rb_first(blocks); 2873 while (rb_node) { 2874 block = rb_entry(rb_node, struct tree_block, rb_node); 2875 2876 node = build_backref_tree(rc, &block->key, 2877 block->level, block->bytenr); 2878 if (IS_ERR(node)) { 2879 err = PTR_ERR(node); 2880 goto out; 2881 } 2882 2883 ret = relocate_tree_block(trans, rc, node, &block->key, 2884 path); 2885 if (ret < 0) { 2886 if (ret != -EAGAIN || rb_node == rb_first(blocks)) 2887 err = ret; 2888 goto out; 2889 } 2890 rb_node = rb_next(rb_node); 2891 } 2892 out: 2893 err = finish_pending_nodes(trans, rc, path, err); 2894 2895 btrfs_free_path(path); 2896 out_path: 2897 free_block_list(blocks); 2898 return err; 2899 } 2900 2901 static noinline_for_stack 2902 int prealloc_file_extent_cluster(struct inode *inode, 2903 struct file_extent_cluster *cluster) 2904 { 2905 u64 alloc_hint = 0; 2906 u64 start; 2907 u64 end; 2908 u64 offset = BTRFS_I(inode)->index_cnt; 2909 u64 num_bytes; 2910 int nr = 0; 2911 int ret = 0; 2912 2913 BUG_ON(cluster->start != cluster->boundary[0]); 2914 mutex_lock(&inode->i_mutex); 2915 2916 ret = btrfs_check_data_free_space(inode, cluster->end + 2917 1 - cluster->start); 2918 if (ret) 2919 goto out; 2920 2921 while (nr < cluster->nr) { 2922 start = cluster->boundary[nr] - offset; 2923 if (nr + 1 < cluster->nr) 2924 end = cluster->boundary[nr + 1] - 1 - offset; 2925 else 2926 end = cluster->end - offset; 2927 2928 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 2929 num_bytes = end + 1 - start; 2930 ret = btrfs_prealloc_file_range(inode, 0, start, 2931 num_bytes, num_bytes, 2932 end + 1, &alloc_hint); 2933 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 2934 if (ret) 2935 break; 2936 nr++; 2937 } 2938 btrfs_free_reserved_data_space(inode, cluster->end + 2939 1 - cluster->start); 2940 out: 2941 mutex_unlock(&inode->i_mutex); 2942 return ret; 2943 } 2944 2945 static noinline_for_stack 2946 int setup_extent_mapping(struct inode *inode, u64 start, u64 end, 2947 u64 block_start) 2948 { 2949 struct btrfs_root *root = BTRFS_I(inode)->root; 2950 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2951 struct extent_map *em; 2952 int ret = 0; 2953 2954 em = alloc_extent_map(); 2955 if (!em) 2956 return -ENOMEM; 2957 2958 em->start = start; 2959 em->len = end + 1 - start; 2960 em->block_len = em->len; 2961 em->block_start = block_start; 2962 em->bdev = root->fs_info->fs_devices->latest_bdev; 2963 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2964 2965 lock_extent(&BTRFS_I(inode)->io_tree, start, end); 2966 while (1) { 2967 write_lock(&em_tree->lock); 2968 ret = add_extent_mapping(em_tree, em); 2969 write_unlock(&em_tree->lock); 2970 if (ret != -EEXIST) { 2971 free_extent_map(em); 2972 break; 2973 } 2974 btrfs_drop_extent_cache(inode, start, end, 0); 2975 } 2976 unlock_extent(&BTRFS_I(inode)->io_tree, start, end); 2977 return ret; 2978 } 2979 2980 static int relocate_file_extent_cluster(struct inode *inode, 2981 struct file_extent_cluster *cluster) 2982 { 2983 u64 page_start; 2984 u64 page_end; 2985 u64 offset = BTRFS_I(inode)->index_cnt; 2986 unsigned long index; 2987 unsigned long last_index; 2988 struct page *page; 2989 struct file_ra_state *ra; 2990 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 2991 int nr = 0; 2992 int ret = 0; 2993 2994 if (!cluster->nr) 2995 return 0; 2996 2997 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2998 if (!ra) 2999 return -ENOMEM; 3000 3001 ret = prealloc_file_extent_cluster(inode, cluster); 3002 if (ret) 3003 goto out; 3004 3005 file_ra_state_init(ra, inode->i_mapping); 3006 3007 ret = setup_extent_mapping(inode, cluster->start - offset, 3008 cluster->end - offset, cluster->start); 3009 if (ret) 3010 goto out; 3011 3012 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 3013 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 3014 while (index <= last_index) { 3015 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); 3016 if (ret) 3017 goto out; 3018 3019 page = find_lock_page(inode->i_mapping, index); 3020 if (!page) { 3021 page_cache_sync_readahead(inode->i_mapping, 3022 ra, NULL, index, 3023 last_index + 1 - index); 3024 page = find_or_create_page(inode->i_mapping, index, 3025 mask); 3026 if (!page) { 3027 btrfs_delalloc_release_metadata(inode, 3028 PAGE_CACHE_SIZE); 3029 ret = -ENOMEM; 3030 goto out; 3031 } 3032 } 3033 3034 if (PageReadahead(page)) { 3035 page_cache_async_readahead(inode->i_mapping, 3036 ra, NULL, page, index, 3037 last_index + 1 - index); 3038 } 3039 3040 if (!PageUptodate(page)) { 3041 btrfs_readpage(NULL, page); 3042 lock_page(page); 3043 if (!PageUptodate(page)) { 3044 unlock_page(page); 3045 page_cache_release(page); 3046 btrfs_delalloc_release_metadata(inode, 3047 PAGE_CACHE_SIZE); 3048 ret = -EIO; 3049 goto out; 3050 } 3051 } 3052 3053 page_start = page_offset(page); 3054 page_end = page_start + PAGE_CACHE_SIZE - 1; 3055 3056 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); 3057 3058 set_page_extent_mapped(page); 3059 3060 if (nr < cluster->nr && 3061 page_start + offset == cluster->boundary[nr]) { 3062 set_extent_bits(&BTRFS_I(inode)->io_tree, 3063 page_start, page_end, 3064 EXTENT_BOUNDARY, GFP_NOFS); 3065 nr++; 3066 } 3067 3068 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); 3069 set_page_dirty(page); 3070 3071 unlock_extent(&BTRFS_I(inode)->io_tree, 3072 page_start, page_end); 3073 unlock_page(page); 3074 page_cache_release(page); 3075 3076 index++; 3077 balance_dirty_pages_ratelimited(inode->i_mapping); 3078 btrfs_throttle(BTRFS_I(inode)->root); 3079 } 3080 WARN_ON(nr != cluster->nr); 3081 out: 3082 kfree(ra); 3083 return ret; 3084 } 3085 3086 static noinline_for_stack 3087 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, 3088 struct file_extent_cluster *cluster) 3089 { 3090 int ret; 3091 3092 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { 3093 ret = relocate_file_extent_cluster(inode, cluster); 3094 if (ret) 3095 return ret; 3096 cluster->nr = 0; 3097 } 3098 3099 if (!cluster->nr) 3100 cluster->start = extent_key->objectid; 3101 else 3102 BUG_ON(cluster->nr >= MAX_EXTENTS); 3103 cluster->end = extent_key->objectid + extent_key->offset - 1; 3104 cluster->boundary[cluster->nr] = extent_key->objectid; 3105 cluster->nr++; 3106 3107 if (cluster->nr >= MAX_EXTENTS) { 3108 ret = relocate_file_extent_cluster(inode, cluster); 3109 if (ret) 3110 return ret; 3111 cluster->nr = 0; 3112 } 3113 return 0; 3114 } 3115 3116 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3117 static int get_ref_objectid_v0(struct reloc_control *rc, 3118 struct btrfs_path *path, 3119 struct btrfs_key *extent_key, 3120 u64 *ref_objectid, int *path_change) 3121 { 3122 struct btrfs_key key; 3123 struct extent_buffer *leaf; 3124 struct btrfs_extent_ref_v0 *ref0; 3125 int ret; 3126 int slot; 3127 3128 leaf = path->nodes[0]; 3129 slot = path->slots[0]; 3130 while (1) { 3131 if (slot >= btrfs_header_nritems(leaf)) { 3132 ret = btrfs_next_leaf(rc->extent_root, path); 3133 if (ret < 0) 3134 return ret; 3135 BUG_ON(ret > 0); 3136 leaf = path->nodes[0]; 3137 slot = path->slots[0]; 3138 if (path_change) 3139 *path_change = 1; 3140 } 3141 btrfs_item_key_to_cpu(leaf, &key, slot); 3142 if (key.objectid != extent_key->objectid) 3143 return -ENOENT; 3144 3145 if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 3146 slot++; 3147 continue; 3148 } 3149 ref0 = btrfs_item_ptr(leaf, slot, 3150 struct btrfs_extent_ref_v0); 3151 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 3152 break; 3153 } 3154 return 0; 3155 } 3156 #endif 3157 3158 /* 3159 * helper to add a tree block to the list. 3160 * the major work is getting the generation and level of the block 3161 */ 3162 static int add_tree_block(struct reloc_control *rc, 3163 struct btrfs_key *extent_key, 3164 struct btrfs_path *path, 3165 struct rb_root *blocks) 3166 { 3167 struct extent_buffer *eb; 3168 struct btrfs_extent_item *ei; 3169 struct btrfs_tree_block_info *bi; 3170 struct tree_block *block; 3171 struct rb_node *rb_node; 3172 u32 item_size; 3173 int level = -1; 3174 int generation; 3175 3176 eb = path->nodes[0]; 3177 item_size = btrfs_item_size_nr(eb, path->slots[0]); 3178 3179 if (item_size >= sizeof(*ei) + sizeof(*bi)) { 3180 ei = btrfs_item_ptr(eb, path->slots[0], 3181 struct btrfs_extent_item); 3182 bi = (struct btrfs_tree_block_info *)(ei + 1); 3183 generation = btrfs_extent_generation(eb, ei); 3184 level = btrfs_tree_block_level(eb, bi); 3185 } else { 3186 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3187 u64 ref_owner; 3188 int ret; 3189 3190 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3191 ret = get_ref_objectid_v0(rc, path, extent_key, 3192 &ref_owner, NULL); 3193 if (ret < 0) 3194 return ret; 3195 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3196 level = (int)ref_owner; 3197 /* FIXME: get real generation */ 3198 generation = 0; 3199 #else 3200 BUG(); 3201 #endif 3202 } 3203 3204 btrfs_release_path(path); 3205 3206 BUG_ON(level == -1); 3207 3208 block = kmalloc(sizeof(*block), GFP_NOFS); 3209 if (!block) 3210 return -ENOMEM; 3211 3212 block->bytenr = extent_key->objectid; 3213 block->key.objectid = extent_key->offset; 3214 block->key.offset = generation; 3215 block->level = level; 3216 block->key_ready = 0; 3217 3218 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 3219 if (rb_node) 3220 backref_tree_panic(rb_node, -EEXIST, block->bytenr); 3221 3222 return 0; 3223 } 3224 3225 /* 3226 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 3227 */ 3228 static int __add_tree_block(struct reloc_control *rc, 3229 u64 bytenr, u32 blocksize, 3230 struct rb_root *blocks) 3231 { 3232 struct btrfs_path *path; 3233 struct btrfs_key key; 3234 int ret; 3235 3236 if (tree_block_processed(bytenr, blocksize, rc)) 3237 return 0; 3238 3239 if (tree_search(blocks, bytenr)) 3240 return 0; 3241 3242 path = btrfs_alloc_path(); 3243 if (!path) 3244 return -ENOMEM; 3245 3246 key.objectid = bytenr; 3247 key.type = BTRFS_EXTENT_ITEM_KEY; 3248 key.offset = blocksize; 3249 3250 path->search_commit_root = 1; 3251 path->skip_locking = 1; 3252 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3253 if (ret < 0) 3254 goto out; 3255 BUG_ON(ret); 3256 3257 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3258 ret = add_tree_block(rc, &key, path, blocks); 3259 out: 3260 btrfs_free_path(path); 3261 return ret; 3262 } 3263 3264 /* 3265 * helper to check if the block use full backrefs for pointers in it 3266 */ 3267 static int block_use_full_backref(struct reloc_control *rc, 3268 struct extent_buffer *eb) 3269 { 3270 u64 flags; 3271 int ret; 3272 3273 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || 3274 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) 3275 return 1; 3276 3277 ret = btrfs_lookup_extent_info(NULL, rc->extent_root, 3278 eb->start, eb->len, NULL, &flags); 3279 BUG_ON(ret); 3280 3281 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 3282 ret = 1; 3283 else 3284 ret = 0; 3285 return ret; 3286 } 3287 3288 static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3289 struct inode *inode, u64 ino) 3290 { 3291 struct btrfs_key key; 3292 struct btrfs_path *path; 3293 struct btrfs_root *root = fs_info->tree_root; 3294 struct btrfs_trans_handle *trans; 3295 int ret = 0; 3296 3297 if (inode) 3298 goto truncate; 3299 3300 key.objectid = ino; 3301 key.type = BTRFS_INODE_ITEM_KEY; 3302 key.offset = 0; 3303 3304 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3305 if (IS_ERR(inode) || is_bad_inode(inode)) { 3306 if (!IS_ERR(inode)) 3307 iput(inode); 3308 return -ENOENT; 3309 } 3310 3311 truncate: 3312 path = btrfs_alloc_path(); 3313 if (!path) { 3314 ret = -ENOMEM; 3315 goto out; 3316 } 3317 3318 trans = btrfs_join_transaction(root); 3319 if (IS_ERR(trans)) { 3320 btrfs_free_path(path); 3321 ret = PTR_ERR(trans); 3322 goto out; 3323 } 3324 3325 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 3326 3327 btrfs_free_path(path); 3328 btrfs_end_transaction(trans, root); 3329 btrfs_btree_balance_dirty(root); 3330 out: 3331 iput(inode); 3332 return ret; 3333 } 3334 3335 /* 3336 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3337 * this function scans fs tree to find blocks reference the data extent 3338 */ 3339 static int find_data_references(struct reloc_control *rc, 3340 struct btrfs_key *extent_key, 3341 struct extent_buffer *leaf, 3342 struct btrfs_extent_data_ref *ref, 3343 struct rb_root *blocks) 3344 { 3345 struct btrfs_path *path; 3346 struct tree_block *block; 3347 struct btrfs_root *root; 3348 struct btrfs_file_extent_item *fi; 3349 struct rb_node *rb_node; 3350 struct btrfs_key key; 3351 u64 ref_root; 3352 u64 ref_objectid; 3353 u64 ref_offset; 3354 u32 ref_count; 3355 u32 nritems; 3356 int err = 0; 3357 int added = 0; 3358 int counted; 3359 int ret; 3360 3361 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3362 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3363 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3364 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3365 3366 /* 3367 * This is an extent belonging to the free space cache, lets just delete 3368 * it and redo the search. 3369 */ 3370 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { 3371 ret = delete_block_group_cache(rc->extent_root->fs_info, 3372 NULL, ref_objectid); 3373 if (ret != -ENOENT) 3374 return ret; 3375 ret = 0; 3376 } 3377 3378 path = btrfs_alloc_path(); 3379 if (!path) 3380 return -ENOMEM; 3381 path->reada = 1; 3382 3383 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3384 if (IS_ERR(root)) { 3385 err = PTR_ERR(root); 3386 goto out; 3387 } 3388 3389 key.objectid = ref_objectid; 3390 key.type = BTRFS_EXTENT_DATA_KEY; 3391 if (ref_offset > ((u64)-1 << 32)) 3392 key.offset = 0; 3393 else 3394 key.offset = ref_offset; 3395 3396 path->search_commit_root = 1; 3397 path->skip_locking = 1; 3398 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3399 if (ret < 0) { 3400 err = ret; 3401 goto out; 3402 } 3403 3404 leaf = path->nodes[0]; 3405 nritems = btrfs_header_nritems(leaf); 3406 /* 3407 * the references in tree blocks that use full backrefs 3408 * are not counted in 3409 */ 3410 if (block_use_full_backref(rc, leaf)) 3411 counted = 0; 3412 else 3413 counted = 1; 3414 rb_node = tree_search(blocks, leaf->start); 3415 if (rb_node) { 3416 if (counted) 3417 added = 1; 3418 else 3419 path->slots[0] = nritems; 3420 } 3421 3422 while (ref_count > 0) { 3423 while (path->slots[0] >= nritems) { 3424 ret = btrfs_next_leaf(root, path); 3425 if (ret < 0) { 3426 err = ret; 3427 goto out; 3428 } 3429 if (ret > 0) { 3430 WARN_ON(1); 3431 goto out; 3432 } 3433 3434 leaf = path->nodes[0]; 3435 nritems = btrfs_header_nritems(leaf); 3436 added = 0; 3437 3438 if (block_use_full_backref(rc, leaf)) 3439 counted = 0; 3440 else 3441 counted = 1; 3442 rb_node = tree_search(blocks, leaf->start); 3443 if (rb_node) { 3444 if (counted) 3445 added = 1; 3446 else 3447 path->slots[0] = nritems; 3448 } 3449 } 3450 3451 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3452 if (key.objectid != ref_objectid || 3453 key.type != BTRFS_EXTENT_DATA_KEY) { 3454 WARN_ON(1); 3455 break; 3456 } 3457 3458 fi = btrfs_item_ptr(leaf, path->slots[0], 3459 struct btrfs_file_extent_item); 3460 3461 if (btrfs_file_extent_type(leaf, fi) == 3462 BTRFS_FILE_EXTENT_INLINE) 3463 goto next; 3464 3465 if (btrfs_file_extent_disk_bytenr(leaf, fi) != 3466 extent_key->objectid) 3467 goto next; 3468 3469 key.offset -= btrfs_file_extent_offset(leaf, fi); 3470 if (key.offset != ref_offset) 3471 goto next; 3472 3473 if (counted) 3474 ref_count--; 3475 if (added) 3476 goto next; 3477 3478 if (!tree_block_processed(leaf->start, leaf->len, rc)) { 3479 block = kmalloc(sizeof(*block), GFP_NOFS); 3480 if (!block) { 3481 err = -ENOMEM; 3482 break; 3483 } 3484 block->bytenr = leaf->start; 3485 btrfs_item_key_to_cpu(leaf, &block->key, 0); 3486 block->level = 0; 3487 block->key_ready = 1; 3488 rb_node = tree_insert(blocks, block->bytenr, 3489 &block->rb_node); 3490 if (rb_node) 3491 backref_tree_panic(rb_node, -EEXIST, 3492 block->bytenr); 3493 } 3494 if (counted) 3495 added = 1; 3496 else 3497 path->slots[0] = nritems; 3498 next: 3499 path->slots[0]++; 3500 3501 } 3502 out: 3503 btrfs_free_path(path); 3504 return err; 3505 } 3506 3507 /* 3508 * helper to find all tree blocks that reference a given data extent 3509 */ 3510 static noinline_for_stack 3511 int add_data_references(struct reloc_control *rc, 3512 struct btrfs_key *extent_key, 3513 struct btrfs_path *path, 3514 struct rb_root *blocks) 3515 { 3516 struct btrfs_key key; 3517 struct extent_buffer *eb; 3518 struct btrfs_extent_data_ref *dref; 3519 struct btrfs_extent_inline_ref *iref; 3520 unsigned long ptr; 3521 unsigned long end; 3522 u32 blocksize = btrfs_level_size(rc->extent_root, 0); 3523 int ret; 3524 int err = 0; 3525 3526 eb = path->nodes[0]; 3527 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3528 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3529 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3530 if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3531 ptr = end; 3532 else 3533 #endif 3534 ptr += sizeof(struct btrfs_extent_item); 3535 3536 while (ptr < end) { 3537 iref = (struct btrfs_extent_inline_ref *)ptr; 3538 key.type = btrfs_extent_inline_ref_type(eb, iref); 3539 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3540 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3541 ret = __add_tree_block(rc, key.offset, blocksize, 3542 blocks); 3543 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3544 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 3545 ret = find_data_references(rc, extent_key, 3546 eb, dref, blocks); 3547 } else { 3548 BUG(); 3549 } 3550 ptr += btrfs_extent_inline_ref_size(key.type); 3551 } 3552 WARN_ON(ptr > end); 3553 3554 while (1) { 3555 cond_resched(); 3556 eb = path->nodes[0]; 3557 if (path->slots[0] >= btrfs_header_nritems(eb)) { 3558 ret = btrfs_next_leaf(rc->extent_root, path); 3559 if (ret < 0) { 3560 err = ret; 3561 break; 3562 } 3563 if (ret > 0) 3564 break; 3565 eb = path->nodes[0]; 3566 } 3567 3568 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 3569 if (key.objectid != extent_key->objectid) 3570 break; 3571 3572 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3573 if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3574 key.type == BTRFS_EXTENT_REF_V0_KEY) { 3575 #else 3576 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3577 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3578 #endif 3579 ret = __add_tree_block(rc, key.offset, blocksize, 3580 blocks); 3581 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3582 dref = btrfs_item_ptr(eb, path->slots[0], 3583 struct btrfs_extent_data_ref); 3584 ret = find_data_references(rc, extent_key, 3585 eb, dref, blocks); 3586 } else { 3587 ret = 0; 3588 } 3589 if (ret) { 3590 err = ret; 3591 break; 3592 } 3593 path->slots[0]++; 3594 } 3595 btrfs_release_path(path); 3596 if (err) 3597 free_block_list(blocks); 3598 return err; 3599 } 3600 3601 /* 3602 * helper to find next unprocessed extent 3603 */ 3604 static noinline_for_stack 3605 int find_next_extent(struct btrfs_trans_handle *trans, 3606 struct reloc_control *rc, struct btrfs_path *path, 3607 struct btrfs_key *extent_key) 3608 { 3609 struct btrfs_key key; 3610 struct extent_buffer *leaf; 3611 u64 start, end, last; 3612 int ret; 3613 3614 last = rc->block_group->key.objectid + rc->block_group->key.offset; 3615 while (1) { 3616 cond_resched(); 3617 if (rc->search_start >= last) { 3618 ret = 1; 3619 break; 3620 } 3621 3622 key.objectid = rc->search_start; 3623 key.type = BTRFS_EXTENT_ITEM_KEY; 3624 key.offset = 0; 3625 3626 path->search_commit_root = 1; 3627 path->skip_locking = 1; 3628 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3629 0, 0); 3630 if (ret < 0) 3631 break; 3632 next: 3633 leaf = path->nodes[0]; 3634 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3635 ret = btrfs_next_leaf(rc->extent_root, path); 3636 if (ret != 0) 3637 break; 3638 leaf = path->nodes[0]; 3639 } 3640 3641 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3642 if (key.objectid >= last) { 3643 ret = 1; 3644 break; 3645 } 3646 3647 if (key.type != BTRFS_EXTENT_ITEM_KEY || 3648 key.objectid + key.offset <= rc->search_start) { 3649 path->slots[0]++; 3650 goto next; 3651 } 3652 3653 ret = find_first_extent_bit(&rc->processed_blocks, 3654 key.objectid, &start, &end, 3655 EXTENT_DIRTY, NULL); 3656 3657 if (ret == 0 && start <= key.objectid) { 3658 btrfs_release_path(path); 3659 rc->search_start = end + 1; 3660 } else { 3661 rc->search_start = key.objectid + key.offset; 3662 memcpy(extent_key, &key, sizeof(key)); 3663 return 0; 3664 } 3665 } 3666 btrfs_release_path(path); 3667 return ret; 3668 } 3669 3670 static void set_reloc_control(struct reloc_control *rc) 3671 { 3672 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3673 3674 mutex_lock(&fs_info->reloc_mutex); 3675 fs_info->reloc_ctl = rc; 3676 mutex_unlock(&fs_info->reloc_mutex); 3677 } 3678 3679 static void unset_reloc_control(struct reloc_control *rc) 3680 { 3681 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3682 3683 mutex_lock(&fs_info->reloc_mutex); 3684 fs_info->reloc_ctl = NULL; 3685 mutex_unlock(&fs_info->reloc_mutex); 3686 } 3687 3688 static int check_extent_flags(u64 flags) 3689 { 3690 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3691 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3692 return 1; 3693 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3694 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3695 return 1; 3696 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3697 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3698 return 1; 3699 return 0; 3700 } 3701 3702 static noinline_for_stack 3703 int prepare_to_relocate(struct reloc_control *rc) 3704 { 3705 struct btrfs_trans_handle *trans; 3706 int ret; 3707 3708 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, 3709 BTRFS_BLOCK_RSV_TEMP); 3710 if (!rc->block_rsv) 3711 return -ENOMEM; 3712 3713 /* 3714 * reserve some space for creating reloc trees. 3715 * btrfs_init_reloc_root will use them when there 3716 * is no reservation in transaction handle. 3717 */ 3718 ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, 3719 rc->extent_root->nodesize * 256, 3720 BTRFS_RESERVE_FLUSH_ALL); 3721 if (ret) 3722 return ret; 3723 3724 memset(&rc->cluster, 0, sizeof(rc->cluster)); 3725 rc->search_start = rc->block_group->key.objectid; 3726 rc->extents_found = 0; 3727 rc->nodes_relocated = 0; 3728 rc->merging_rsv_size = 0; 3729 3730 rc->create_reloc_tree = 1; 3731 set_reloc_control(rc); 3732 3733 trans = btrfs_join_transaction(rc->extent_root); 3734 if (IS_ERR(trans)) { 3735 unset_reloc_control(rc); 3736 /* 3737 * extent tree is not a ref_cow tree and has no reloc_root to 3738 * cleanup. And callers are responsible to free the above 3739 * block rsv. 3740 */ 3741 return PTR_ERR(trans); 3742 } 3743 btrfs_commit_transaction(trans, rc->extent_root); 3744 return 0; 3745 } 3746 3747 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3748 { 3749 struct rb_root blocks = RB_ROOT; 3750 struct btrfs_key key; 3751 struct btrfs_trans_handle *trans = NULL; 3752 struct btrfs_path *path; 3753 struct btrfs_extent_item *ei; 3754 u64 flags; 3755 u32 item_size; 3756 int ret; 3757 int err = 0; 3758 int progress = 0; 3759 3760 path = btrfs_alloc_path(); 3761 if (!path) 3762 return -ENOMEM; 3763 path->reada = 1; 3764 3765 ret = prepare_to_relocate(rc); 3766 if (ret) { 3767 err = ret; 3768 goto out_free; 3769 } 3770 3771 while (1) { 3772 progress++; 3773 trans = btrfs_start_transaction(rc->extent_root, 0); 3774 if (IS_ERR(trans)) { 3775 err = PTR_ERR(trans); 3776 trans = NULL; 3777 break; 3778 } 3779 restart: 3780 if (update_backref_cache(trans, &rc->backref_cache)) { 3781 btrfs_end_transaction(trans, rc->extent_root); 3782 continue; 3783 } 3784 3785 ret = find_next_extent(trans, rc, path, &key); 3786 if (ret < 0) 3787 err = ret; 3788 if (ret != 0) 3789 break; 3790 3791 rc->extents_found++; 3792 3793 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3794 struct btrfs_extent_item); 3795 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 3796 if (item_size >= sizeof(*ei)) { 3797 flags = btrfs_extent_flags(path->nodes[0], ei); 3798 ret = check_extent_flags(flags); 3799 BUG_ON(ret); 3800 3801 } else { 3802 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3803 u64 ref_owner; 3804 int path_change = 0; 3805 3806 BUG_ON(item_size != 3807 sizeof(struct btrfs_extent_item_v0)); 3808 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 3809 &path_change); 3810 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 3811 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 3812 else 3813 flags = BTRFS_EXTENT_FLAG_DATA; 3814 3815 if (path_change) { 3816 btrfs_release_path(path); 3817 3818 path->search_commit_root = 1; 3819 path->skip_locking = 1; 3820 ret = btrfs_search_slot(NULL, rc->extent_root, 3821 &key, path, 0, 0); 3822 if (ret < 0) { 3823 err = ret; 3824 break; 3825 } 3826 BUG_ON(ret > 0); 3827 } 3828 #else 3829 BUG(); 3830 #endif 3831 } 3832 3833 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 3834 ret = add_tree_block(rc, &key, path, &blocks); 3835 } else if (rc->stage == UPDATE_DATA_PTRS && 3836 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3837 ret = add_data_references(rc, &key, path, &blocks); 3838 } else { 3839 btrfs_release_path(path); 3840 ret = 0; 3841 } 3842 if (ret < 0) { 3843 err = ret; 3844 break; 3845 } 3846 3847 if (!RB_EMPTY_ROOT(&blocks)) { 3848 ret = relocate_tree_blocks(trans, rc, &blocks); 3849 if (ret < 0) { 3850 if (ret != -EAGAIN) { 3851 err = ret; 3852 break; 3853 } 3854 rc->extents_found--; 3855 rc->search_start = key.objectid; 3856 } 3857 } 3858 3859 ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); 3860 if (ret < 0) { 3861 if (ret != -ENOSPC) { 3862 err = ret; 3863 WARN_ON(1); 3864 break; 3865 } 3866 rc->commit_transaction = 1; 3867 } 3868 3869 if (rc->commit_transaction) { 3870 rc->commit_transaction = 0; 3871 ret = btrfs_commit_transaction(trans, rc->extent_root); 3872 BUG_ON(ret); 3873 } else { 3874 btrfs_end_transaction_throttle(trans, rc->extent_root); 3875 btrfs_btree_balance_dirty(rc->extent_root); 3876 } 3877 trans = NULL; 3878 3879 if (rc->stage == MOVE_DATA_EXTENTS && 3880 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3881 rc->found_file_extent = 1; 3882 ret = relocate_data_extent(rc->data_inode, 3883 &key, &rc->cluster); 3884 if (ret < 0) { 3885 err = ret; 3886 break; 3887 } 3888 } 3889 } 3890 if (trans && progress && err == -ENOSPC) { 3891 ret = btrfs_force_chunk_alloc(trans, rc->extent_root, 3892 rc->block_group->flags); 3893 if (ret == 0) { 3894 err = 0; 3895 progress = 0; 3896 goto restart; 3897 } 3898 } 3899 3900 btrfs_release_path(path); 3901 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3902 GFP_NOFS); 3903 3904 if (trans) { 3905 btrfs_end_transaction_throttle(trans, rc->extent_root); 3906 btrfs_btree_balance_dirty(rc->extent_root); 3907 } 3908 3909 if (!err) { 3910 ret = relocate_file_extent_cluster(rc->data_inode, 3911 &rc->cluster); 3912 if (ret < 0) 3913 err = ret; 3914 } 3915 3916 rc->create_reloc_tree = 0; 3917 set_reloc_control(rc); 3918 3919 backref_cache_cleanup(&rc->backref_cache); 3920 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 3921 3922 err = prepare_to_merge(rc, err); 3923 3924 merge_reloc_roots(rc); 3925 3926 rc->merge_reloc_tree = 0; 3927 unset_reloc_control(rc); 3928 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 3929 3930 /* get rid of pinned extents */ 3931 trans = btrfs_join_transaction(rc->extent_root); 3932 if (IS_ERR(trans)) 3933 err = PTR_ERR(trans); 3934 else 3935 btrfs_commit_transaction(trans, rc->extent_root); 3936 out_free: 3937 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 3938 btrfs_free_path(path); 3939 return err; 3940 } 3941 3942 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3943 struct btrfs_root *root, u64 objectid) 3944 { 3945 struct btrfs_path *path; 3946 struct btrfs_inode_item *item; 3947 struct extent_buffer *leaf; 3948 int ret; 3949 3950 path = btrfs_alloc_path(); 3951 if (!path) 3952 return -ENOMEM; 3953 3954 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 3955 if (ret) 3956 goto out; 3957 3958 leaf = path->nodes[0]; 3959 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3960 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3961 btrfs_set_inode_generation(leaf, item, 1); 3962 btrfs_set_inode_size(leaf, item, 0); 3963 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3964 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 3965 BTRFS_INODE_PREALLOC); 3966 btrfs_mark_buffer_dirty(leaf); 3967 btrfs_release_path(path); 3968 out: 3969 btrfs_free_path(path); 3970 return ret; 3971 } 3972 3973 /* 3974 * helper to create inode for data relocation. 3975 * the inode is in data relocation tree and its link count is 0 3976 */ 3977 static noinline_for_stack 3978 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 3979 struct btrfs_block_group_cache *group) 3980 { 3981 struct inode *inode = NULL; 3982 struct btrfs_trans_handle *trans; 3983 struct btrfs_root *root; 3984 struct btrfs_key key; 3985 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 3986 int err = 0; 3987 3988 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 3989 if (IS_ERR(root)) 3990 return ERR_CAST(root); 3991 3992 trans = btrfs_start_transaction(root, 6); 3993 if (IS_ERR(trans)) 3994 return ERR_CAST(trans); 3995 3996 err = btrfs_find_free_objectid(root, &objectid); 3997 if (err) 3998 goto out; 3999 4000 err = __insert_orphan_inode(trans, root, objectid); 4001 BUG_ON(err); 4002 4003 key.objectid = objectid; 4004 key.type = BTRFS_INODE_ITEM_KEY; 4005 key.offset = 0; 4006 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 4007 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 4008 BTRFS_I(inode)->index_cnt = group->key.objectid; 4009 4010 err = btrfs_orphan_add(trans, inode); 4011 out: 4012 btrfs_end_transaction(trans, root); 4013 btrfs_btree_balance_dirty(root); 4014 if (err) { 4015 if (inode) 4016 iput(inode); 4017 inode = ERR_PTR(err); 4018 } 4019 return inode; 4020 } 4021 4022 static struct reloc_control *alloc_reloc_control(void) 4023 { 4024 struct reloc_control *rc; 4025 4026 rc = kzalloc(sizeof(*rc), GFP_NOFS); 4027 if (!rc) 4028 return NULL; 4029 4030 INIT_LIST_HEAD(&rc->reloc_roots); 4031 backref_cache_init(&rc->backref_cache); 4032 mapping_tree_init(&rc->reloc_root_tree); 4033 extent_io_tree_init(&rc->processed_blocks, NULL); 4034 return rc; 4035 } 4036 4037 /* 4038 * function to relocate all extents in a block group. 4039 */ 4040 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) 4041 { 4042 struct btrfs_fs_info *fs_info = extent_root->fs_info; 4043 struct reloc_control *rc; 4044 struct inode *inode; 4045 struct btrfs_path *path; 4046 int ret; 4047 int rw = 0; 4048 int err = 0; 4049 4050 rc = alloc_reloc_control(); 4051 if (!rc) 4052 return -ENOMEM; 4053 4054 rc->extent_root = extent_root; 4055 4056 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 4057 BUG_ON(!rc->block_group); 4058 4059 if (!rc->block_group->ro) { 4060 ret = btrfs_set_block_group_ro(extent_root, rc->block_group); 4061 if (ret) { 4062 err = ret; 4063 goto out; 4064 } 4065 rw = 1; 4066 } 4067 4068 path = btrfs_alloc_path(); 4069 if (!path) { 4070 err = -ENOMEM; 4071 goto out; 4072 } 4073 4074 inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, 4075 path); 4076 btrfs_free_path(path); 4077 4078 if (!IS_ERR(inode)) 4079 ret = delete_block_group_cache(fs_info, inode, 0); 4080 else 4081 ret = PTR_ERR(inode); 4082 4083 if (ret && ret != -ENOENT) { 4084 err = ret; 4085 goto out; 4086 } 4087 4088 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 4089 if (IS_ERR(rc->data_inode)) { 4090 err = PTR_ERR(rc->data_inode); 4091 rc->data_inode = NULL; 4092 goto out; 4093 } 4094 4095 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 4096 (unsigned long long)rc->block_group->key.objectid, 4097 (unsigned long long)rc->block_group->flags); 4098 4099 ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0); 4100 if (ret < 0) { 4101 err = ret; 4102 goto out; 4103 } 4104 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 4105 4106 while (1) { 4107 mutex_lock(&fs_info->cleaner_mutex); 4108 4109 btrfs_clean_old_snapshots(fs_info->tree_root); 4110 ret = relocate_block_group(rc); 4111 4112 mutex_unlock(&fs_info->cleaner_mutex); 4113 if (ret < 0) { 4114 err = ret; 4115 goto out; 4116 } 4117 4118 if (rc->extents_found == 0) 4119 break; 4120 4121 printk(KERN_INFO "btrfs: found %llu extents\n", 4122 (unsigned long long)rc->extents_found); 4123 4124 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4125 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 4126 invalidate_mapping_pages(rc->data_inode->i_mapping, 4127 0, -1); 4128 rc->stage = UPDATE_DATA_PTRS; 4129 } 4130 } 4131 4132 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, 4133 rc->block_group->key.objectid, 4134 rc->block_group->key.objectid + 4135 rc->block_group->key.offset - 1); 4136 4137 WARN_ON(rc->block_group->pinned > 0); 4138 WARN_ON(rc->block_group->reserved > 0); 4139 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 4140 out: 4141 if (err && rw) 4142 btrfs_set_block_group_rw(extent_root, rc->block_group); 4143 iput(rc->data_inode); 4144 btrfs_put_block_group(rc->block_group); 4145 kfree(rc); 4146 return err; 4147 } 4148 4149 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 4150 { 4151 struct btrfs_trans_handle *trans; 4152 int ret, err; 4153 4154 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4155 if (IS_ERR(trans)) 4156 return PTR_ERR(trans); 4157 4158 memset(&root->root_item.drop_progress, 0, 4159 sizeof(root->root_item.drop_progress)); 4160 root->root_item.drop_level = 0; 4161 btrfs_set_root_refs(&root->root_item, 0); 4162 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4163 &root->root_key, &root->root_item); 4164 4165 err = btrfs_end_transaction(trans, root->fs_info->tree_root); 4166 if (err) 4167 return err; 4168 return ret; 4169 } 4170 4171 /* 4172 * recover relocation interrupted by system crash. 4173 * 4174 * this function resumes merging reloc trees with corresponding fs trees. 4175 * this is important for keeping the sharing of tree blocks 4176 */ 4177 int btrfs_recover_relocation(struct btrfs_root *root) 4178 { 4179 LIST_HEAD(reloc_roots); 4180 struct btrfs_key key; 4181 struct btrfs_root *fs_root; 4182 struct btrfs_root *reloc_root; 4183 struct btrfs_path *path; 4184 struct extent_buffer *leaf; 4185 struct reloc_control *rc = NULL; 4186 struct btrfs_trans_handle *trans; 4187 int ret; 4188 int err = 0; 4189 4190 path = btrfs_alloc_path(); 4191 if (!path) 4192 return -ENOMEM; 4193 path->reada = -1; 4194 4195 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 4196 key.type = BTRFS_ROOT_ITEM_KEY; 4197 key.offset = (u64)-1; 4198 4199 while (1) { 4200 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, 4201 path, 0, 0); 4202 if (ret < 0) { 4203 err = ret; 4204 goto out; 4205 } 4206 if (ret > 0) { 4207 if (path->slots[0] == 0) 4208 break; 4209 path->slots[0]--; 4210 } 4211 leaf = path->nodes[0]; 4212 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4213 btrfs_release_path(path); 4214 4215 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 4216 key.type != BTRFS_ROOT_ITEM_KEY) 4217 break; 4218 4219 reloc_root = btrfs_read_fs_root_no_radix(root, &key); 4220 if (IS_ERR(reloc_root)) { 4221 err = PTR_ERR(reloc_root); 4222 goto out; 4223 } 4224 4225 list_add(&reloc_root->root_list, &reloc_roots); 4226 4227 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 4228 fs_root = read_fs_root(root->fs_info, 4229 reloc_root->root_key.offset); 4230 if (IS_ERR(fs_root)) { 4231 ret = PTR_ERR(fs_root); 4232 if (ret != -ENOENT) { 4233 err = ret; 4234 goto out; 4235 } 4236 ret = mark_garbage_root(reloc_root); 4237 if (ret < 0) { 4238 err = ret; 4239 goto out; 4240 } 4241 } 4242 } 4243 4244 if (key.offset == 0) 4245 break; 4246 4247 key.offset--; 4248 } 4249 btrfs_release_path(path); 4250 4251 if (list_empty(&reloc_roots)) 4252 goto out; 4253 4254 rc = alloc_reloc_control(); 4255 if (!rc) { 4256 err = -ENOMEM; 4257 goto out; 4258 } 4259 4260 rc->extent_root = root->fs_info->extent_root; 4261 4262 set_reloc_control(rc); 4263 4264 trans = btrfs_join_transaction(rc->extent_root); 4265 if (IS_ERR(trans)) { 4266 unset_reloc_control(rc); 4267 err = PTR_ERR(trans); 4268 goto out_free; 4269 } 4270 4271 rc->merge_reloc_tree = 1; 4272 4273 while (!list_empty(&reloc_roots)) { 4274 reloc_root = list_entry(reloc_roots.next, 4275 struct btrfs_root, root_list); 4276 list_del(&reloc_root->root_list); 4277 4278 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 4279 list_add_tail(&reloc_root->root_list, 4280 &rc->reloc_roots); 4281 continue; 4282 } 4283 4284 fs_root = read_fs_root(root->fs_info, 4285 reloc_root->root_key.offset); 4286 if (IS_ERR(fs_root)) { 4287 err = PTR_ERR(fs_root); 4288 goto out_free; 4289 } 4290 4291 err = __add_reloc_root(reloc_root); 4292 BUG_ON(err < 0); /* -ENOMEM or logic error */ 4293 fs_root->reloc_root = reloc_root; 4294 } 4295 4296 err = btrfs_commit_transaction(trans, rc->extent_root); 4297 if (err) 4298 goto out_free; 4299 4300 merge_reloc_roots(rc); 4301 4302 unset_reloc_control(rc); 4303 4304 trans = btrfs_join_transaction(rc->extent_root); 4305 if (IS_ERR(trans)) 4306 err = PTR_ERR(trans); 4307 else 4308 err = btrfs_commit_transaction(trans, rc->extent_root); 4309 out_free: 4310 kfree(rc); 4311 out: 4312 if (!list_empty(&reloc_roots)) 4313 free_reloc_roots(&reloc_roots); 4314 4315 btrfs_free_path(path); 4316 4317 if (err == 0) { 4318 /* cleanup orphan inode in data relocation tree */ 4319 fs_root = read_fs_root(root->fs_info, 4320 BTRFS_DATA_RELOC_TREE_OBJECTID); 4321 if (IS_ERR(fs_root)) 4322 err = PTR_ERR(fs_root); 4323 else 4324 err = btrfs_orphan_cleanup(fs_root); 4325 } 4326 return err; 4327 } 4328 4329 /* 4330 * helper to add ordered checksum for data relocation. 4331 * 4332 * cloning checksum properly handles the nodatasum extents. 4333 * it also saves CPU time to re-calculate the checksum. 4334 */ 4335 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 4336 { 4337 struct btrfs_ordered_sum *sums; 4338 struct btrfs_sector_sum *sector_sum; 4339 struct btrfs_ordered_extent *ordered; 4340 struct btrfs_root *root = BTRFS_I(inode)->root; 4341 size_t offset; 4342 int ret; 4343 u64 disk_bytenr; 4344 LIST_HEAD(list); 4345 4346 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 4347 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 4348 4349 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4350 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4351 disk_bytenr + len - 1, &list, 0); 4352 if (ret) 4353 goto out; 4354 4355 while (!list_empty(&list)) { 4356 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4357 list_del_init(&sums->list); 4358 4359 sector_sum = sums->sums; 4360 sums->bytenr = ordered->start; 4361 4362 offset = 0; 4363 while (offset < sums->len) { 4364 sector_sum->bytenr += ordered->start - disk_bytenr; 4365 sector_sum++; 4366 offset += root->sectorsize; 4367 } 4368 4369 btrfs_add_ordered_sum(inode, ordered, sums); 4370 } 4371 out: 4372 btrfs_put_ordered_extent(ordered); 4373 return ret; 4374 } 4375 4376 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4377 struct btrfs_root *root, struct extent_buffer *buf, 4378 struct extent_buffer *cow) 4379 { 4380 struct reloc_control *rc; 4381 struct backref_node *node; 4382 int first_cow = 0; 4383 int level; 4384 int ret; 4385 4386 rc = root->fs_info->reloc_ctl; 4387 if (!rc) 4388 return; 4389 4390 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4391 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4392 4393 level = btrfs_header_level(buf); 4394 if (btrfs_header_generation(buf) <= 4395 btrfs_root_last_snapshot(&root->root_item)) 4396 first_cow = 1; 4397 4398 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && 4399 rc->create_reloc_tree) { 4400 WARN_ON(!first_cow && level == 0); 4401 4402 node = rc->backref_cache.path[level]; 4403 BUG_ON(node->bytenr != buf->start && 4404 node->new_bytenr != buf->start); 4405 4406 drop_node_buffer(node); 4407 extent_buffer_get(cow); 4408 node->eb = cow; 4409 node->new_bytenr = cow->start; 4410 4411 if (!node->pending) { 4412 list_move_tail(&node->list, 4413 &rc->backref_cache.pending[level]); 4414 node->pending = 1; 4415 } 4416 4417 if (first_cow) 4418 __mark_block_processed(rc, node); 4419 4420 if (first_cow && level > 0) 4421 rc->nodes_relocated += buf->len; 4422 } 4423 4424 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { 4425 ret = replace_file_extents(trans, rc, root, cow); 4426 BUG_ON(ret); 4427 } 4428 } 4429 4430 /* 4431 * called before creating snapshot. it calculates metadata reservation 4432 * requried for relocating tree blocks in the snapshot 4433 */ 4434 void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, 4435 struct btrfs_pending_snapshot *pending, 4436 u64 *bytes_to_reserve) 4437 { 4438 struct btrfs_root *root; 4439 struct reloc_control *rc; 4440 4441 root = pending->root; 4442 if (!root->reloc_root) 4443 return; 4444 4445 rc = root->fs_info->reloc_ctl; 4446 if (!rc->merge_reloc_tree) 4447 return; 4448 4449 root = root->reloc_root; 4450 BUG_ON(btrfs_root_refs(&root->root_item) == 0); 4451 /* 4452 * relocation is in the stage of merging trees. the space 4453 * used by merging a reloc tree is twice the size of 4454 * relocated tree nodes in the worst case. half for cowing 4455 * the reloc tree, half for cowing the fs tree. the space 4456 * used by cowing the reloc tree will be freed after the 4457 * tree is dropped. if we create snapshot, cowing the fs 4458 * tree may use more space than it frees. so we need 4459 * reserve extra space. 4460 */ 4461 *bytes_to_reserve += rc->nodes_relocated; 4462 } 4463 4464 /* 4465 * called after snapshot is created. migrate block reservation 4466 * and create reloc root for the newly created snapshot 4467 */ 4468 int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 4469 struct btrfs_pending_snapshot *pending) 4470 { 4471 struct btrfs_root *root = pending->root; 4472 struct btrfs_root *reloc_root; 4473 struct btrfs_root *new_root; 4474 struct reloc_control *rc; 4475 int ret; 4476 4477 if (!root->reloc_root) 4478 return 0; 4479 4480 rc = root->fs_info->reloc_ctl; 4481 rc->merging_rsv_size += rc->nodes_relocated; 4482 4483 if (rc->merge_reloc_tree) { 4484 ret = btrfs_block_rsv_migrate(&pending->block_rsv, 4485 rc->block_rsv, 4486 rc->nodes_relocated); 4487 if (ret) 4488 return ret; 4489 } 4490 4491 new_root = pending->snap; 4492 reloc_root = create_reloc_root(trans, root->reloc_root, 4493 new_root->root_key.objectid); 4494 if (IS_ERR(reloc_root)) 4495 return PTR_ERR(reloc_root); 4496 4497 ret = __add_reloc_root(reloc_root); 4498 BUG_ON(ret < 0); 4499 new_root->reloc_root = reloc_root; 4500 4501 if (rc->create_reloc_tree) 4502 ret = clone_backref_node(trans, rc, root, reloc_root); 4503 return ret; 4504 } 4505