1 /* 2 * Copyright (C) 2009 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include "ctree.h" 25 #include "disk-io.h" 26 #include "transaction.h" 27 #include "volumes.h" 28 #include "locking.h" 29 #include "btrfs_inode.h" 30 #include "async-thread.h" 31 32 /* 33 * backref_node, mapping_node and tree_block start with this 34 */ 35 struct tree_entry { 36 struct rb_node rb_node; 37 u64 bytenr; 38 }; 39 40 /* 41 * present a tree block in the backref cache 42 */ 43 struct backref_node { 44 struct rb_node rb_node; 45 u64 bytenr; 46 /* objectid tree block owner */ 47 u64 owner; 48 /* list of upper level blocks reference this block */ 49 struct list_head upper; 50 /* list of child blocks in the cache */ 51 struct list_head lower; 52 /* NULL if this node is not tree root */ 53 struct btrfs_root *root; 54 /* extent buffer got by COW the block */ 55 struct extent_buffer *eb; 56 /* level of tree block */ 57 unsigned int level:8; 58 /* 1 if the block is root of old snapshot */ 59 unsigned int old_root:1; 60 /* 1 if no child blocks in the cache */ 61 unsigned int lowest:1; 62 /* is the extent buffer locked */ 63 unsigned int locked:1; 64 /* has the block been processed */ 65 unsigned int processed:1; 66 /* have backrefs of this block been checked */ 67 unsigned int checked:1; 68 }; 69 70 /* 71 * present a block pointer in the backref cache 72 */ 73 struct backref_edge { 74 struct list_head list[2]; 75 struct backref_node *node[2]; 76 u64 blockptr; 77 }; 78 79 #define LOWER 0 80 #define UPPER 1 81 82 struct backref_cache { 83 /* red black tree of all backref nodes in the cache */ 84 struct rb_root rb_root; 85 /* list of backref nodes with no child block in the cache */ 86 struct list_head pending[BTRFS_MAX_LEVEL]; 87 spinlock_t lock; 88 }; 89 90 /* 91 * map address of tree root to tree 92 */ 93 struct mapping_node { 94 struct rb_node rb_node; 95 u64 bytenr; 96 void *data; 97 }; 98 99 struct mapping_tree { 100 struct rb_root rb_root; 101 spinlock_t lock; 102 }; 103 104 /* 105 * present a tree block to process 106 */ 107 struct tree_block { 108 struct rb_node rb_node; 109 u64 bytenr; 110 struct btrfs_key key; 111 unsigned int level:8; 112 unsigned int key_ready:1; 113 }; 114 115 /* inode vector */ 116 #define INODEVEC_SIZE 16 117 118 struct inodevec { 119 struct list_head list; 120 struct inode *inode[INODEVEC_SIZE]; 121 int nr; 122 }; 123 124 #define MAX_EXTENTS 128 125 126 struct file_extent_cluster { 127 u64 start; 128 u64 end; 129 u64 boundary[MAX_EXTENTS]; 130 unsigned int nr; 131 }; 132 133 struct reloc_control { 134 /* block group to relocate */ 135 struct btrfs_block_group_cache *block_group; 136 /* extent tree */ 137 struct btrfs_root *extent_root; 138 /* inode for moving data */ 139 struct inode *data_inode; 140 struct btrfs_workers workers; 141 /* tree blocks have been processed */ 142 struct extent_io_tree processed_blocks; 143 /* map start of tree root to corresponding reloc tree */ 144 struct mapping_tree reloc_root_tree; 145 /* list of reloc trees */ 146 struct list_head reloc_roots; 147 u64 search_start; 148 u64 extents_found; 149 u64 extents_skipped; 150 int stage; 151 int create_reloc_root; 152 unsigned int found_file_extent:1; 153 unsigned int found_old_snapshot:1; 154 }; 155 156 /* stages of data relocation */ 157 #define MOVE_DATA_EXTENTS 0 158 #define UPDATE_DATA_PTRS 1 159 160 /* 161 * merge reloc tree to corresponding fs tree in worker threads 162 */ 163 struct async_merge { 164 struct btrfs_work work; 165 struct reloc_control *rc; 166 struct btrfs_root *root; 167 struct completion *done; 168 atomic_t *num_pending; 169 }; 170 171 static void mapping_tree_init(struct mapping_tree *tree) 172 { 173 tree->rb_root.rb_node = NULL; 174 spin_lock_init(&tree->lock); 175 } 176 177 static void backref_cache_init(struct backref_cache *cache) 178 { 179 int i; 180 cache->rb_root.rb_node = NULL; 181 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 182 INIT_LIST_HEAD(&cache->pending[i]); 183 spin_lock_init(&cache->lock); 184 } 185 186 static void backref_node_init(struct backref_node *node) 187 { 188 memset(node, 0, sizeof(*node)); 189 INIT_LIST_HEAD(&node->upper); 190 INIT_LIST_HEAD(&node->lower); 191 RB_CLEAR_NODE(&node->rb_node); 192 } 193 194 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 195 struct rb_node *node) 196 { 197 struct rb_node **p = &root->rb_node; 198 struct rb_node *parent = NULL; 199 struct tree_entry *entry; 200 201 while (*p) { 202 parent = *p; 203 entry = rb_entry(parent, struct tree_entry, rb_node); 204 205 if (bytenr < entry->bytenr) 206 p = &(*p)->rb_left; 207 else if (bytenr > entry->bytenr) 208 p = &(*p)->rb_right; 209 else 210 return parent; 211 } 212 213 rb_link_node(node, parent, p); 214 rb_insert_color(node, root); 215 return NULL; 216 } 217 218 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) 219 { 220 struct rb_node *n = root->rb_node; 221 struct tree_entry *entry; 222 223 while (n) { 224 entry = rb_entry(n, struct tree_entry, rb_node); 225 226 if (bytenr < entry->bytenr) 227 n = n->rb_left; 228 else if (bytenr > entry->bytenr) 229 n = n->rb_right; 230 else 231 return n; 232 } 233 return NULL; 234 } 235 236 /* 237 * walk up backref nodes until reach node presents tree root 238 */ 239 static struct backref_node *walk_up_backref(struct backref_node *node, 240 struct backref_edge *edges[], 241 int *index) 242 { 243 struct backref_edge *edge; 244 int idx = *index; 245 246 while (!list_empty(&node->upper)) { 247 edge = list_entry(node->upper.next, 248 struct backref_edge, list[LOWER]); 249 edges[idx++] = edge; 250 node = edge->node[UPPER]; 251 } 252 *index = idx; 253 return node; 254 } 255 256 /* 257 * walk down backref nodes to find start of next reference path 258 */ 259 static struct backref_node *walk_down_backref(struct backref_edge *edges[], 260 int *index) 261 { 262 struct backref_edge *edge; 263 struct backref_node *lower; 264 int idx = *index; 265 266 while (idx > 0) { 267 edge = edges[idx - 1]; 268 lower = edge->node[LOWER]; 269 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 270 idx--; 271 continue; 272 } 273 edge = list_entry(edge->list[LOWER].next, 274 struct backref_edge, list[LOWER]); 275 edges[idx - 1] = edge; 276 *index = idx; 277 return edge->node[UPPER]; 278 } 279 *index = 0; 280 return NULL; 281 } 282 283 static void drop_node_buffer(struct backref_node *node) 284 { 285 if (node->eb) { 286 if (node->locked) { 287 btrfs_tree_unlock(node->eb); 288 node->locked = 0; 289 } 290 free_extent_buffer(node->eb); 291 node->eb = NULL; 292 } 293 } 294 295 static void drop_backref_node(struct backref_cache *tree, 296 struct backref_node *node) 297 { 298 BUG_ON(!node->lowest); 299 BUG_ON(!list_empty(&node->upper)); 300 301 drop_node_buffer(node); 302 list_del(&node->lower); 303 304 rb_erase(&node->rb_node, &tree->rb_root); 305 kfree(node); 306 } 307 308 /* 309 * remove a backref node from the backref cache 310 */ 311 static void remove_backref_node(struct backref_cache *cache, 312 struct backref_node *node) 313 { 314 struct backref_node *upper; 315 struct backref_edge *edge; 316 317 if (!node) 318 return; 319 320 BUG_ON(!node->lowest); 321 while (!list_empty(&node->upper)) { 322 edge = list_entry(node->upper.next, struct backref_edge, 323 list[LOWER]); 324 upper = edge->node[UPPER]; 325 list_del(&edge->list[LOWER]); 326 list_del(&edge->list[UPPER]); 327 kfree(edge); 328 /* 329 * add the node to pending list if no other 330 * child block cached. 331 */ 332 if (list_empty(&upper->lower)) { 333 list_add_tail(&upper->lower, 334 &cache->pending[upper->level]); 335 upper->lowest = 1; 336 } 337 } 338 drop_backref_node(cache, node); 339 } 340 341 /* 342 * find reloc tree by address of tree root 343 */ 344 static struct btrfs_root *find_reloc_root(struct reloc_control *rc, 345 u64 bytenr) 346 { 347 struct rb_node *rb_node; 348 struct mapping_node *node; 349 struct btrfs_root *root = NULL; 350 351 spin_lock(&rc->reloc_root_tree.lock); 352 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); 353 if (rb_node) { 354 node = rb_entry(rb_node, struct mapping_node, rb_node); 355 root = (struct btrfs_root *)node->data; 356 } 357 spin_unlock(&rc->reloc_root_tree.lock); 358 return root; 359 } 360 361 static int is_cowonly_root(u64 root_objectid) 362 { 363 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 364 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 365 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 366 root_objectid == BTRFS_DEV_TREE_OBJECTID || 367 root_objectid == BTRFS_TREE_LOG_OBJECTID || 368 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 369 return 1; 370 return 0; 371 } 372 373 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 374 u64 root_objectid) 375 { 376 struct btrfs_key key; 377 378 key.objectid = root_objectid; 379 key.type = BTRFS_ROOT_ITEM_KEY; 380 if (is_cowonly_root(root_objectid)) 381 key.offset = 0; 382 else 383 key.offset = (u64)-1; 384 385 return btrfs_read_fs_root_no_name(fs_info, &key); 386 } 387 388 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 389 static noinline_for_stack 390 struct btrfs_root *find_tree_root(struct reloc_control *rc, 391 struct extent_buffer *leaf, 392 struct btrfs_extent_ref_v0 *ref0) 393 { 394 struct btrfs_root *root; 395 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 396 u64 generation = btrfs_ref_generation_v0(leaf, ref0); 397 398 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 399 400 root = read_fs_root(rc->extent_root->fs_info, root_objectid); 401 BUG_ON(IS_ERR(root)); 402 403 if (root->ref_cows && 404 generation != btrfs_root_generation(&root->root_item)) 405 return NULL; 406 407 return root; 408 } 409 #endif 410 411 static noinline_for_stack 412 int find_inline_backref(struct extent_buffer *leaf, int slot, 413 unsigned long *ptr, unsigned long *end) 414 { 415 struct btrfs_extent_item *ei; 416 struct btrfs_tree_block_info *bi; 417 u32 item_size; 418 419 item_size = btrfs_item_size_nr(leaf, slot); 420 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 421 if (item_size < sizeof(*ei)) { 422 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 423 return 1; 424 } 425 #endif 426 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 427 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 428 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 429 430 if (item_size <= sizeof(*ei) + sizeof(*bi)) { 431 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 432 return 1; 433 } 434 435 bi = (struct btrfs_tree_block_info *)(ei + 1); 436 *ptr = (unsigned long)(bi + 1); 437 *end = (unsigned long)ei + item_size; 438 return 0; 439 } 440 441 /* 442 * build backref tree for a given tree block. root of the backref tree 443 * corresponds the tree block, leaves of the backref tree correspond 444 * roots of b-trees that reference the tree block. 445 * 446 * the basic idea of this function is check backrefs of a given block 447 * to find upper level blocks that refernece the block, and then check 448 * bakcrefs of these upper level blocks recursively. the recursion stop 449 * when tree root is reached or backrefs for the block is cached. 450 * 451 * NOTE: if we find backrefs for a block are cached, we know backrefs 452 * for all upper level blocks that directly/indirectly reference the 453 * block are also cached. 454 */ 455 static struct backref_node *build_backref_tree(struct reloc_control *rc, 456 struct backref_cache *cache, 457 struct btrfs_key *node_key, 458 int level, u64 bytenr) 459 { 460 struct btrfs_path *path1; 461 struct btrfs_path *path2; 462 struct extent_buffer *eb; 463 struct btrfs_root *root; 464 struct backref_node *cur; 465 struct backref_node *upper; 466 struct backref_node *lower; 467 struct backref_node *node = NULL; 468 struct backref_node *exist = NULL; 469 struct backref_edge *edge; 470 struct rb_node *rb_node; 471 struct btrfs_key key; 472 unsigned long end; 473 unsigned long ptr; 474 LIST_HEAD(list); 475 int ret; 476 int err = 0; 477 478 path1 = btrfs_alloc_path(); 479 path2 = btrfs_alloc_path(); 480 if (!path1 || !path2) { 481 err = -ENOMEM; 482 goto out; 483 } 484 485 node = kmalloc(sizeof(*node), GFP_NOFS); 486 if (!node) { 487 err = -ENOMEM; 488 goto out; 489 } 490 491 backref_node_init(node); 492 node->bytenr = bytenr; 493 node->owner = 0; 494 node->level = level; 495 node->lowest = 1; 496 cur = node; 497 again: 498 end = 0; 499 ptr = 0; 500 key.objectid = cur->bytenr; 501 key.type = BTRFS_EXTENT_ITEM_KEY; 502 key.offset = (u64)-1; 503 504 path1->search_commit_root = 1; 505 path1->skip_locking = 1; 506 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, 507 0, 0); 508 if (ret < 0) { 509 err = ret; 510 goto out; 511 } 512 BUG_ON(!ret || !path1->slots[0]); 513 514 path1->slots[0]--; 515 516 WARN_ON(cur->checked); 517 if (!list_empty(&cur->upper)) { 518 /* 519 * the backref was added previously when processsing 520 * backref of type BTRFS_TREE_BLOCK_REF_KEY 521 */ 522 BUG_ON(!list_is_singular(&cur->upper)); 523 edge = list_entry(cur->upper.next, struct backref_edge, 524 list[LOWER]); 525 BUG_ON(!list_empty(&edge->list[UPPER])); 526 exist = edge->node[UPPER]; 527 /* 528 * add the upper level block to pending list if we need 529 * check its backrefs 530 */ 531 if (!exist->checked) 532 list_add_tail(&edge->list[UPPER], &list); 533 } else { 534 exist = NULL; 535 } 536 537 while (1) { 538 cond_resched(); 539 eb = path1->nodes[0]; 540 541 if (ptr >= end) { 542 if (path1->slots[0] >= btrfs_header_nritems(eb)) { 543 ret = btrfs_next_leaf(rc->extent_root, path1); 544 if (ret < 0) { 545 err = ret; 546 goto out; 547 } 548 if (ret > 0) 549 break; 550 eb = path1->nodes[0]; 551 } 552 553 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); 554 if (key.objectid != cur->bytenr) { 555 WARN_ON(exist); 556 break; 557 } 558 559 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 560 ret = find_inline_backref(eb, path1->slots[0], 561 &ptr, &end); 562 if (ret) 563 goto next; 564 } 565 } 566 567 if (ptr < end) { 568 /* update key for inline back ref */ 569 struct btrfs_extent_inline_ref *iref; 570 iref = (struct btrfs_extent_inline_ref *)ptr; 571 key.type = btrfs_extent_inline_ref_type(eb, iref); 572 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 573 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 574 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 575 } 576 577 if (exist && 578 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 579 exist->owner == key.offset) || 580 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 581 exist->bytenr == key.offset))) { 582 exist = NULL; 583 goto next; 584 } 585 586 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 587 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 588 key.type == BTRFS_EXTENT_REF_V0_KEY) { 589 if (key.objectid == key.offset && 590 key.type == BTRFS_EXTENT_REF_V0_KEY) { 591 struct btrfs_extent_ref_v0 *ref0; 592 ref0 = btrfs_item_ptr(eb, path1->slots[0], 593 struct btrfs_extent_ref_v0); 594 root = find_tree_root(rc, eb, ref0); 595 if (root) 596 cur->root = root; 597 else 598 cur->old_root = 1; 599 break; 600 } 601 #else 602 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 603 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 604 #endif 605 if (key.objectid == key.offset) { 606 /* 607 * only root blocks of reloc trees use 608 * backref of this type. 609 */ 610 root = find_reloc_root(rc, cur->bytenr); 611 BUG_ON(!root); 612 cur->root = root; 613 break; 614 } 615 616 edge = kzalloc(sizeof(*edge), GFP_NOFS); 617 if (!edge) { 618 err = -ENOMEM; 619 goto out; 620 } 621 rb_node = tree_search(&cache->rb_root, key.offset); 622 if (!rb_node) { 623 upper = kmalloc(sizeof(*upper), GFP_NOFS); 624 if (!upper) { 625 kfree(edge); 626 err = -ENOMEM; 627 goto out; 628 } 629 backref_node_init(upper); 630 upper->bytenr = key.offset; 631 upper->owner = 0; 632 upper->level = cur->level + 1; 633 /* 634 * backrefs for the upper level block isn't 635 * cached, add the block to pending list 636 */ 637 list_add_tail(&edge->list[UPPER], &list); 638 } else { 639 upper = rb_entry(rb_node, struct backref_node, 640 rb_node); 641 INIT_LIST_HEAD(&edge->list[UPPER]); 642 } 643 list_add(&edge->list[LOWER], &cur->upper); 644 edge->node[UPPER] = upper; 645 edge->node[LOWER] = cur; 646 647 goto next; 648 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 649 goto next; 650 } 651 652 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ 653 root = read_fs_root(rc->extent_root->fs_info, key.offset); 654 if (IS_ERR(root)) { 655 err = PTR_ERR(root); 656 goto out; 657 } 658 659 if (btrfs_root_level(&root->root_item) == cur->level) { 660 /* tree root */ 661 BUG_ON(btrfs_root_bytenr(&root->root_item) != 662 cur->bytenr); 663 cur->root = root; 664 break; 665 } 666 667 level = cur->level + 1; 668 669 /* 670 * searching the tree to find upper level blocks 671 * reference the block. 672 */ 673 path2->search_commit_root = 1; 674 path2->skip_locking = 1; 675 path2->lowest_level = level; 676 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); 677 path2->lowest_level = 0; 678 if (ret < 0) { 679 err = ret; 680 goto out; 681 } 682 if (ret > 0 && path2->slots[level] > 0) 683 path2->slots[level]--; 684 685 eb = path2->nodes[level]; 686 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 687 cur->bytenr); 688 689 lower = cur; 690 for (; level < BTRFS_MAX_LEVEL; level++) { 691 if (!path2->nodes[level]) { 692 BUG_ON(btrfs_root_bytenr(&root->root_item) != 693 lower->bytenr); 694 lower->root = root; 695 break; 696 } 697 698 edge = kzalloc(sizeof(*edge), GFP_NOFS); 699 if (!edge) { 700 err = -ENOMEM; 701 goto out; 702 } 703 704 eb = path2->nodes[level]; 705 rb_node = tree_search(&cache->rb_root, eb->start); 706 if (!rb_node) { 707 upper = kmalloc(sizeof(*upper), GFP_NOFS); 708 if (!upper) { 709 kfree(edge); 710 err = -ENOMEM; 711 goto out; 712 } 713 backref_node_init(upper); 714 upper->bytenr = eb->start; 715 upper->owner = btrfs_header_owner(eb); 716 upper->level = lower->level + 1; 717 718 /* 719 * if we know the block isn't shared 720 * we can void checking its backrefs. 721 */ 722 if (btrfs_block_can_be_shared(root, eb)) 723 upper->checked = 0; 724 else 725 upper->checked = 1; 726 727 /* 728 * add the block to pending list if we 729 * need check its backrefs. only block 730 * at 'cur->level + 1' is added to the 731 * tail of pending list. this guarantees 732 * we check backrefs from lower level 733 * blocks to upper level blocks. 734 */ 735 if (!upper->checked && 736 level == cur->level + 1) { 737 list_add_tail(&edge->list[UPPER], 738 &list); 739 } else 740 INIT_LIST_HEAD(&edge->list[UPPER]); 741 } else { 742 upper = rb_entry(rb_node, struct backref_node, 743 rb_node); 744 BUG_ON(!upper->checked); 745 INIT_LIST_HEAD(&edge->list[UPPER]); 746 } 747 list_add_tail(&edge->list[LOWER], &lower->upper); 748 edge->node[UPPER] = upper; 749 edge->node[LOWER] = lower; 750 751 if (rb_node) 752 break; 753 lower = upper; 754 upper = NULL; 755 } 756 btrfs_release_path(root, path2); 757 next: 758 if (ptr < end) { 759 ptr += btrfs_extent_inline_ref_size(key.type); 760 if (ptr >= end) { 761 WARN_ON(ptr > end); 762 ptr = 0; 763 end = 0; 764 } 765 } 766 if (ptr >= end) 767 path1->slots[0]++; 768 } 769 btrfs_release_path(rc->extent_root, path1); 770 771 cur->checked = 1; 772 WARN_ON(exist); 773 774 /* the pending list isn't empty, take the first block to process */ 775 if (!list_empty(&list)) { 776 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 777 list_del_init(&edge->list[UPPER]); 778 cur = edge->node[UPPER]; 779 goto again; 780 } 781 782 /* 783 * everything goes well, connect backref nodes and insert backref nodes 784 * into the cache. 785 */ 786 BUG_ON(!node->checked); 787 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 788 BUG_ON(rb_node); 789 790 list_for_each_entry(edge, &node->upper, list[LOWER]) 791 list_add_tail(&edge->list[UPPER], &list); 792 793 while (!list_empty(&list)) { 794 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 795 list_del_init(&edge->list[UPPER]); 796 upper = edge->node[UPPER]; 797 798 if (!RB_EMPTY_NODE(&upper->rb_node)) { 799 if (upper->lowest) { 800 list_del_init(&upper->lower); 801 upper->lowest = 0; 802 } 803 804 list_add_tail(&edge->list[UPPER], &upper->lower); 805 continue; 806 } 807 808 BUG_ON(!upper->checked); 809 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 810 &upper->rb_node); 811 BUG_ON(rb_node); 812 813 list_add_tail(&edge->list[UPPER], &upper->lower); 814 815 list_for_each_entry(edge, &upper->upper, list[LOWER]) 816 list_add_tail(&edge->list[UPPER], &list); 817 } 818 out: 819 btrfs_free_path(path1); 820 btrfs_free_path(path2); 821 if (err) { 822 INIT_LIST_HEAD(&list); 823 upper = node; 824 while (upper) { 825 if (RB_EMPTY_NODE(&upper->rb_node)) { 826 list_splice_tail(&upper->upper, &list); 827 kfree(upper); 828 } 829 830 if (list_empty(&list)) 831 break; 832 833 edge = list_entry(list.next, struct backref_edge, 834 list[LOWER]); 835 upper = edge->node[UPPER]; 836 kfree(edge); 837 } 838 return ERR_PTR(err); 839 } 840 return node; 841 } 842 843 /* 844 * helper to add 'address of tree root -> reloc tree' mapping 845 */ 846 static int __add_reloc_root(struct btrfs_root *root) 847 { 848 struct rb_node *rb_node; 849 struct mapping_node *node; 850 struct reloc_control *rc = root->fs_info->reloc_ctl; 851 852 node = kmalloc(sizeof(*node), GFP_NOFS); 853 BUG_ON(!node); 854 855 node->bytenr = root->node->start; 856 node->data = root; 857 858 spin_lock(&rc->reloc_root_tree.lock); 859 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 860 node->bytenr, &node->rb_node); 861 spin_unlock(&rc->reloc_root_tree.lock); 862 BUG_ON(rb_node); 863 864 list_add_tail(&root->root_list, &rc->reloc_roots); 865 return 0; 866 } 867 868 /* 869 * helper to update/delete the 'address of tree root -> reloc tree' 870 * mapping 871 */ 872 static int __update_reloc_root(struct btrfs_root *root, int del) 873 { 874 struct rb_node *rb_node; 875 struct mapping_node *node = NULL; 876 struct reloc_control *rc = root->fs_info->reloc_ctl; 877 878 spin_lock(&rc->reloc_root_tree.lock); 879 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 880 root->commit_root->start); 881 if (rb_node) { 882 node = rb_entry(rb_node, struct mapping_node, rb_node); 883 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 884 } 885 spin_unlock(&rc->reloc_root_tree.lock); 886 887 BUG_ON((struct btrfs_root *)node->data != root); 888 889 if (!del) { 890 spin_lock(&rc->reloc_root_tree.lock); 891 node->bytenr = root->node->start; 892 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 893 node->bytenr, &node->rb_node); 894 spin_unlock(&rc->reloc_root_tree.lock); 895 BUG_ON(rb_node); 896 } else { 897 list_del_init(&root->root_list); 898 kfree(node); 899 } 900 return 0; 901 } 902 903 /* 904 * create reloc tree for a given fs tree. reloc tree is just a 905 * snapshot of the fs tree with special root objectid. 906 */ 907 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 908 struct btrfs_root *root) 909 { 910 struct btrfs_root *reloc_root; 911 struct extent_buffer *eb; 912 struct btrfs_root_item *root_item; 913 struct btrfs_key root_key; 914 int ret; 915 916 if (root->reloc_root) { 917 reloc_root = root->reloc_root; 918 reloc_root->last_trans = trans->transid; 919 return 0; 920 } 921 922 if (!root->fs_info->reloc_ctl || 923 !root->fs_info->reloc_ctl->create_reloc_root || 924 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 925 return 0; 926 927 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 928 BUG_ON(!root_item); 929 930 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 931 root_key.type = BTRFS_ROOT_ITEM_KEY; 932 root_key.offset = root->root_key.objectid; 933 934 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 935 BTRFS_TREE_RELOC_OBJECTID); 936 BUG_ON(ret); 937 938 btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); 939 memcpy(root_item, &root->root_item, sizeof(*root_item)); 940 btrfs_set_root_refs(root_item, 1); 941 btrfs_set_root_bytenr(root_item, eb->start); 942 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 943 btrfs_set_root_generation(root_item, trans->transid); 944 memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); 945 root_item->drop_level = 0; 946 947 btrfs_tree_unlock(eb); 948 free_extent_buffer(eb); 949 950 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 951 &root_key, root_item); 952 BUG_ON(ret); 953 kfree(root_item); 954 955 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 956 &root_key); 957 BUG_ON(IS_ERR(reloc_root)); 958 reloc_root->last_trans = trans->transid; 959 960 __add_reloc_root(reloc_root); 961 root->reloc_root = reloc_root; 962 return 0; 963 } 964 965 /* 966 * update root item of reloc tree 967 */ 968 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 969 struct btrfs_root *root) 970 { 971 struct btrfs_root *reloc_root; 972 struct btrfs_root_item *root_item; 973 int del = 0; 974 int ret; 975 976 if (!root->reloc_root) 977 return 0; 978 979 reloc_root = root->reloc_root; 980 root_item = &reloc_root->root_item; 981 982 if (btrfs_root_refs(root_item) == 0) { 983 root->reloc_root = NULL; 984 del = 1; 985 } 986 987 __update_reloc_root(reloc_root, del); 988 989 if (reloc_root->commit_root != reloc_root->node) { 990 btrfs_set_root_node(root_item, reloc_root->node); 991 free_extent_buffer(reloc_root->commit_root); 992 reloc_root->commit_root = btrfs_root_node(reloc_root); 993 } 994 995 ret = btrfs_update_root(trans, root->fs_info->tree_root, 996 &reloc_root->root_key, root_item); 997 BUG_ON(ret); 998 return 0; 999 } 1000 1001 /* 1002 * helper to find first cached inode with inode number >= objectid 1003 * in a subvolume 1004 */ 1005 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 1006 { 1007 struct rb_node *node; 1008 struct rb_node *prev; 1009 struct btrfs_inode *entry; 1010 struct inode *inode; 1011 1012 spin_lock(&root->inode_lock); 1013 again: 1014 node = root->inode_tree.rb_node; 1015 prev = NULL; 1016 while (node) { 1017 prev = node; 1018 entry = rb_entry(node, struct btrfs_inode, rb_node); 1019 1020 if (objectid < entry->vfs_inode.i_ino) 1021 node = node->rb_left; 1022 else if (objectid > entry->vfs_inode.i_ino) 1023 node = node->rb_right; 1024 else 1025 break; 1026 } 1027 if (!node) { 1028 while (prev) { 1029 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1030 if (objectid <= entry->vfs_inode.i_ino) { 1031 node = prev; 1032 break; 1033 } 1034 prev = rb_next(prev); 1035 } 1036 } 1037 while (node) { 1038 entry = rb_entry(node, struct btrfs_inode, rb_node); 1039 inode = igrab(&entry->vfs_inode); 1040 if (inode) { 1041 spin_unlock(&root->inode_lock); 1042 return inode; 1043 } 1044 1045 objectid = entry->vfs_inode.i_ino + 1; 1046 if (cond_resched_lock(&root->inode_lock)) 1047 goto again; 1048 1049 node = rb_next(node); 1050 } 1051 spin_unlock(&root->inode_lock); 1052 return NULL; 1053 } 1054 1055 static int in_block_group(u64 bytenr, 1056 struct btrfs_block_group_cache *block_group) 1057 { 1058 if (bytenr >= block_group->key.objectid && 1059 bytenr < block_group->key.objectid + block_group->key.offset) 1060 return 1; 1061 return 0; 1062 } 1063 1064 /* 1065 * get new location of data 1066 */ 1067 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1068 u64 bytenr, u64 num_bytes) 1069 { 1070 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1071 struct btrfs_path *path; 1072 struct btrfs_file_extent_item *fi; 1073 struct extent_buffer *leaf; 1074 int ret; 1075 1076 path = btrfs_alloc_path(); 1077 if (!path) 1078 return -ENOMEM; 1079 1080 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1081 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1082 bytenr, 0); 1083 if (ret < 0) 1084 goto out; 1085 if (ret > 0) { 1086 ret = -ENOENT; 1087 goto out; 1088 } 1089 1090 leaf = path->nodes[0]; 1091 fi = btrfs_item_ptr(leaf, path->slots[0], 1092 struct btrfs_file_extent_item); 1093 1094 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1095 btrfs_file_extent_compression(leaf, fi) || 1096 btrfs_file_extent_encryption(leaf, fi) || 1097 btrfs_file_extent_other_encoding(leaf, fi)); 1098 1099 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1100 ret = 1; 1101 goto out; 1102 } 1103 1104 if (new_bytenr) 1105 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1106 ret = 0; 1107 out: 1108 btrfs_free_path(path); 1109 return ret; 1110 } 1111 1112 /* 1113 * update file extent items in the tree leaf to point to 1114 * the new locations. 1115 */ 1116 static int replace_file_extents(struct btrfs_trans_handle *trans, 1117 struct reloc_control *rc, 1118 struct btrfs_root *root, 1119 struct extent_buffer *leaf, 1120 struct list_head *inode_list) 1121 { 1122 struct btrfs_key key; 1123 struct btrfs_file_extent_item *fi; 1124 struct inode *inode = NULL; 1125 struct inodevec *ivec = NULL; 1126 u64 parent; 1127 u64 bytenr; 1128 u64 new_bytenr; 1129 u64 num_bytes; 1130 u64 end; 1131 u32 nritems; 1132 u32 i; 1133 int ret; 1134 int first = 1; 1135 int dirty = 0; 1136 1137 if (rc->stage != UPDATE_DATA_PTRS) 1138 return 0; 1139 1140 /* reloc trees always use full backref */ 1141 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1142 parent = leaf->start; 1143 else 1144 parent = 0; 1145 1146 nritems = btrfs_header_nritems(leaf); 1147 for (i = 0; i < nritems; i++) { 1148 cond_resched(); 1149 btrfs_item_key_to_cpu(leaf, &key, i); 1150 if (key.type != BTRFS_EXTENT_DATA_KEY) 1151 continue; 1152 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1153 if (btrfs_file_extent_type(leaf, fi) == 1154 BTRFS_FILE_EXTENT_INLINE) 1155 continue; 1156 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1157 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1158 if (bytenr == 0) 1159 continue; 1160 if (!in_block_group(bytenr, rc->block_group)) 1161 continue; 1162 1163 /* 1164 * if we are modifying block in fs tree, wait for readpage 1165 * to complete and drop the extent cache 1166 */ 1167 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1168 if (!ivec || ivec->nr == INODEVEC_SIZE) { 1169 ivec = kmalloc(sizeof(*ivec), GFP_NOFS); 1170 BUG_ON(!ivec); 1171 ivec->nr = 0; 1172 list_add_tail(&ivec->list, inode_list); 1173 } 1174 if (first) { 1175 inode = find_next_inode(root, key.objectid); 1176 if (inode) 1177 ivec->inode[ivec->nr++] = inode; 1178 first = 0; 1179 } else if (inode && inode->i_ino < key.objectid) { 1180 inode = find_next_inode(root, key.objectid); 1181 if (inode) 1182 ivec->inode[ivec->nr++] = inode; 1183 } 1184 if (inode && inode->i_ino == key.objectid) { 1185 end = key.offset + 1186 btrfs_file_extent_num_bytes(leaf, fi); 1187 WARN_ON(!IS_ALIGNED(key.offset, 1188 root->sectorsize)); 1189 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1190 end--; 1191 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1192 key.offset, end, 1193 GFP_NOFS); 1194 if (!ret) 1195 continue; 1196 1197 btrfs_drop_extent_cache(inode, key.offset, end, 1198 1); 1199 unlock_extent(&BTRFS_I(inode)->io_tree, 1200 key.offset, end, GFP_NOFS); 1201 } 1202 } 1203 1204 ret = get_new_location(rc->data_inode, &new_bytenr, 1205 bytenr, num_bytes); 1206 if (ret > 0) 1207 continue; 1208 BUG_ON(ret < 0); 1209 1210 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1211 dirty = 1; 1212 1213 key.offset -= btrfs_file_extent_offset(leaf, fi); 1214 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1215 num_bytes, parent, 1216 btrfs_header_owner(leaf), 1217 key.objectid, key.offset); 1218 BUG_ON(ret); 1219 1220 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1221 parent, btrfs_header_owner(leaf), 1222 key.objectid, key.offset); 1223 BUG_ON(ret); 1224 } 1225 if (dirty) 1226 btrfs_mark_buffer_dirty(leaf); 1227 return 0; 1228 } 1229 1230 static noinline_for_stack 1231 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1232 struct btrfs_path *path, int level) 1233 { 1234 struct btrfs_disk_key key1; 1235 struct btrfs_disk_key key2; 1236 btrfs_node_key(eb, &key1, slot); 1237 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1238 return memcmp(&key1, &key2, sizeof(key1)); 1239 } 1240 1241 /* 1242 * try to replace tree blocks in fs tree with the new blocks 1243 * in reloc tree. tree blocks haven't been modified since the 1244 * reloc tree was create can be replaced. 1245 * 1246 * if a block was replaced, level of the block + 1 is returned. 1247 * if no block got replaced, 0 is returned. if there are other 1248 * errors, a negative error number is returned. 1249 */ 1250 static int replace_path(struct btrfs_trans_handle *trans, 1251 struct btrfs_root *dest, struct btrfs_root *src, 1252 struct btrfs_path *path, struct btrfs_key *next_key, 1253 struct extent_buffer **leaf, 1254 int lowest_level, int max_level) 1255 { 1256 struct extent_buffer *eb; 1257 struct extent_buffer *parent; 1258 struct btrfs_key key; 1259 u64 old_bytenr; 1260 u64 new_bytenr; 1261 u64 old_ptr_gen; 1262 u64 new_ptr_gen; 1263 u64 last_snapshot; 1264 u32 blocksize; 1265 int level; 1266 int ret; 1267 int slot; 1268 1269 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1270 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1271 BUG_ON(lowest_level > 1 && leaf); 1272 1273 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1274 1275 slot = path->slots[lowest_level]; 1276 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1277 1278 eb = btrfs_lock_root_node(dest); 1279 btrfs_set_lock_blocking(eb); 1280 level = btrfs_header_level(eb); 1281 1282 if (level < lowest_level) { 1283 btrfs_tree_unlock(eb); 1284 free_extent_buffer(eb); 1285 return 0; 1286 } 1287 1288 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1289 BUG_ON(ret); 1290 btrfs_set_lock_blocking(eb); 1291 1292 if (next_key) { 1293 next_key->objectid = (u64)-1; 1294 next_key->type = (u8)-1; 1295 next_key->offset = (u64)-1; 1296 } 1297 1298 parent = eb; 1299 while (1) { 1300 level = btrfs_header_level(parent); 1301 BUG_ON(level < lowest_level); 1302 1303 ret = btrfs_bin_search(parent, &key, level, &slot); 1304 if (ret && slot > 0) 1305 slot--; 1306 1307 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1308 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1309 1310 old_bytenr = btrfs_node_blockptr(parent, slot); 1311 blocksize = btrfs_level_size(dest, level - 1); 1312 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1313 1314 if (level <= max_level) { 1315 eb = path->nodes[level]; 1316 new_bytenr = btrfs_node_blockptr(eb, 1317 path->slots[level]); 1318 new_ptr_gen = btrfs_node_ptr_generation(eb, 1319 path->slots[level]); 1320 } else { 1321 new_bytenr = 0; 1322 new_ptr_gen = 0; 1323 } 1324 1325 if (new_bytenr > 0 && new_bytenr == old_bytenr) { 1326 WARN_ON(1); 1327 ret = level; 1328 break; 1329 } 1330 1331 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1332 memcmp_node_keys(parent, slot, path, level)) { 1333 if (level <= lowest_level && !leaf) { 1334 ret = 0; 1335 break; 1336 } 1337 1338 eb = read_tree_block(dest, old_bytenr, blocksize, 1339 old_ptr_gen); 1340 btrfs_tree_lock(eb); 1341 ret = btrfs_cow_block(trans, dest, eb, parent, 1342 slot, &eb); 1343 BUG_ON(ret); 1344 btrfs_set_lock_blocking(eb); 1345 1346 if (level <= lowest_level) { 1347 *leaf = eb; 1348 ret = 0; 1349 break; 1350 } 1351 1352 btrfs_tree_unlock(parent); 1353 free_extent_buffer(parent); 1354 1355 parent = eb; 1356 continue; 1357 } 1358 1359 btrfs_node_key_to_cpu(path->nodes[level], &key, 1360 path->slots[level]); 1361 btrfs_release_path(src, path); 1362 1363 path->lowest_level = level; 1364 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1365 path->lowest_level = 0; 1366 BUG_ON(ret); 1367 1368 /* 1369 * swap blocks in fs tree and reloc tree. 1370 */ 1371 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1372 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1373 btrfs_mark_buffer_dirty(parent); 1374 1375 btrfs_set_node_blockptr(path->nodes[level], 1376 path->slots[level], old_bytenr); 1377 btrfs_set_node_ptr_generation(path->nodes[level], 1378 path->slots[level], old_ptr_gen); 1379 btrfs_mark_buffer_dirty(path->nodes[level]); 1380 1381 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, 1382 path->nodes[level]->start, 1383 src->root_key.objectid, level - 1, 0); 1384 BUG_ON(ret); 1385 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 1386 0, dest->root_key.objectid, level - 1, 1387 0); 1388 BUG_ON(ret); 1389 1390 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1391 path->nodes[level]->start, 1392 src->root_key.objectid, level - 1, 0); 1393 BUG_ON(ret); 1394 1395 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1396 0, dest->root_key.objectid, level - 1, 1397 0); 1398 BUG_ON(ret); 1399 1400 btrfs_unlock_up_safe(path, 0); 1401 1402 ret = level; 1403 break; 1404 } 1405 btrfs_tree_unlock(parent); 1406 free_extent_buffer(parent); 1407 return ret; 1408 } 1409 1410 /* 1411 * helper to find next relocated block in reloc tree 1412 */ 1413 static noinline_for_stack 1414 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1415 int *level) 1416 { 1417 struct extent_buffer *eb; 1418 int i; 1419 u64 last_snapshot; 1420 u32 nritems; 1421 1422 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1423 1424 for (i = 0; i < *level; i++) { 1425 free_extent_buffer(path->nodes[i]); 1426 path->nodes[i] = NULL; 1427 } 1428 1429 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1430 eb = path->nodes[i]; 1431 nritems = btrfs_header_nritems(eb); 1432 while (path->slots[i] + 1 < nritems) { 1433 path->slots[i]++; 1434 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1435 last_snapshot) 1436 continue; 1437 1438 *level = i; 1439 return 0; 1440 } 1441 free_extent_buffer(path->nodes[i]); 1442 path->nodes[i] = NULL; 1443 } 1444 return 1; 1445 } 1446 1447 /* 1448 * walk down reloc tree to find relocated block of lowest level 1449 */ 1450 static noinline_for_stack 1451 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1452 int *level) 1453 { 1454 struct extent_buffer *eb = NULL; 1455 int i; 1456 u64 bytenr; 1457 u64 ptr_gen = 0; 1458 u64 last_snapshot; 1459 u32 blocksize; 1460 u32 nritems; 1461 1462 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1463 1464 for (i = *level; i > 0; i--) { 1465 eb = path->nodes[i]; 1466 nritems = btrfs_header_nritems(eb); 1467 while (path->slots[i] < nritems) { 1468 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1469 if (ptr_gen > last_snapshot) 1470 break; 1471 path->slots[i]++; 1472 } 1473 if (path->slots[i] >= nritems) { 1474 if (i == *level) 1475 break; 1476 *level = i + 1; 1477 return 0; 1478 } 1479 if (i == 1) { 1480 *level = i; 1481 return 0; 1482 } 1483 1484 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1485 blocksize = btrfs_level_size(root, i - 1); 1486 eb = read_tree_block(root, bytenr, blocksize, ptr_gen); 1487 BUG_ON(btrfs_header_level(eb) != i - 1); 1488 path->nodes[i - 1] = eb; 1489 path->slots[i - 1] = 0; 1490 } 1491 return 1; 1492 } 1493 1494 /* 1495 * invalidate extent cache for file extents whose key in range of 1496 * [min_key, max_key) 1497 */ 1498 static int invalidate_extent_cache(struct btrfs_root *root, 1499 struct btrfs_key *min_key, 1500 struct btrfs_key *max_key) 1501 { 1502 struct inode *inode = NULL; 1503 u64 objectid; 1504 u64 start, end; 1505 1506 objectid = min_key->objectid; 1507 while (1) { 1508 cond_resched(); 1509 iput(inode); 1510 1511 if (objectid > max_key->objectid) 1512 break; 1513 1514 inode = find_next_inode(root, objectid); 1515 if (!inode) 1516 break; 1517 1518 if (inode->i_ino > max_key->objectid) { 1519 iput(inode); 1520 break; 1521 } 1522 1523 objectid = inode->i_ino + 1; 1524 if (!S_ISREG(inode->i_mode)) 1525 continue; 1526 1527 if (unlikely(min_key->objectid == inode->i_ino)) { 1528 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1529 continue; 1530 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1531 start = 0; 1532 else { 1533 start = min_key->offset; 1534 WARN_ON(!IS_ALIGNED(start, root->sectorsize)); 1535 } 1536 } else { 1537 start = 0; 1538 } 1539 1540 if (unlikely(max_key->objectid == inode->i_ino)) { 1541 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1542 continue; 1543 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1544 end = (u64)-1; 1545 } else { 1546 if (max_key->offset == 0) 1547 continue; 1548 end = max_key->offset; 1549 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1550 end--; 1551 } 1552 } else { 1553 end = (u64)-1; 1554 } 1555 1556 /* the lock_extent waits for readpage to complete */ 1557 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1558 btrfs_drop_extent_cache(inode, start, end, 1); 1559 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1560 } 1561 return 0; 1562 } 1563 1564 static int find_next_key(struct btrfs_path *path, int level, 1565 struct btrfs_key *key) 1566 1567 { 1568 while (level < BTRFS_MAX_LEVEL) { 1569 if (!path->nodes[level]) 1570 break; 1571 if (path->slots[level] + 1 < 1572 btrfs_header_nritems(path->nodes[level])) { 1573 btrfs_node_key_to_cpu(path->nodes[level], key, 1574 path->slots[level] + 1); 1575 return 0; 1576 } 1577 level++; 1578 } 1579 return 1; 1580 } 1581 1582 /* 1583 * merge the relocated tree blocks in reloc tree with corresponding 1584 * fs tree. 1585 */ 1586 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 1587 struct btrfs_root *root) 1588 { 1589 LIST_HEAD(inode_list); 1590 struct btrfs_key key; 1591 struct btrfs_key next_key; 1592 struct btrfs_trans_handle *trans; 1593 struct btrfs_root *reloc_root; 1594 struct btrfs_root_item *root_item; 1595 struct btrfs_path *path; 1596 struct extent_buffer *leaf = NULL; 1597 unsigned long nr; 1598 int level; 1599 int max_level; 1600 int replaced = 0; 1601 int ret; 1602 int err = 0; 1603 1604 path = btrfs_alloc_path(); 1605 if (!path) 1606 return -ENOMEM; 1607 1608 reloc_root = root->reloc_root; 1609 root_item = &reloc_root->root_item; 1610 1611 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 1612 level = btrfs_root_level(root_item); 1613 extent_buffer_get(reloc_root->node); 1614 path->nodes[level] = reloc_root->node; 1615 path->slots[level] = 0; 1616 } else { 1617 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 1618 1619 level = root_item->drop_level; 1620 BUG_ON(level == 0); 1621 path->lowest_level = level; 1622 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 1623 path->lowest_level = 0; 1624 if (ret < 0) { 1625 btrfs_free_path(path); 1626 return ret; 1627 } 1628 1629 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 1630 path->slots[level]); 1631 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 1632 1633 btrfs_unlock_up_safe(path, 0); 1634 } 1635 1636 if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { 1637 trans = btrfs_start_transaction(root, 1); 1638 1639 leaf = path->nodes[0]; 1640 btrfs_item_key_to_cpu(leaf, &key, 0); 1641 btrfs_release_path(reloc_root, path); 1642 1643 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1644 if (ret < 0) { 1645 err = ret; 1646 goto out; 1647 } 1648 1649 leaf = path->nodes[0]; 1650 btrfs_unlock_up_safe(path, 1); 1651 ret = replace_file_extents(trans, rc, root, leaf, 1652 &inode_list); 1653 if (ret < 0) 1654 err = ret; 1655 goto out; 1656 } 1657 1658 memset(&next_key, 0, sizeof(next_key)); 1659 1660 while (1) { 1661 leaf = NULL; 1662 replaced = 0; 1663 trans = btrfs_start_transaction(root, 1); 1664 max_level = level; 1665 1666 ret = walk_down_reloc_tree(reloc_root, path, &level); 1667 if (ret < 0) { 1668 err = ret; 1669 goto out; 1670 } 1671 if (ret > 0) 1672 break; 1673 1674 if (!find_next_key(path, level, &key) && 1675 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 1676 ret = 0; 1677 } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { 1678 ret = replace_path(trans, root, reloc_root, 1679 path, &next_key, &leaf, 1680 level, max_level); 1681 } else { 1682 ret = replace_path(trans, root, reloc_root, 1683 path, &next_key, NULL, 1684 level, max_level); 1685 } 1686 if (ret < 0) { 1687 err = ret; 1688 goto out; 1689 } 1690 1691 if (ret > 0) { 1692 level = ret; 1693 btrfs_node_key_to_cpu(path->nodes[level], &key, 1694 path->slots[level]); 1695 replaced = 1; 1696 } else if (leaf) { 1697 /* 1698 * no block got replaced, try replacing file extents 1699 */ 1700 btrfs_item_key_to_cpu(leaf, &key, 0); 1701 ret = replace_file_extents(trans, rc, root, leaf, 1702 &inode_list); 1703 btrfs_tree_unlock(leaf); 1704 free_extent_buffer(leaf); 1705 BUG_ON(ret < 0); 1706 } 1707 1708 ret = walk_up_reloc_tree(reloc_root, path, &level); 1709 if (ret > 0) 1710 break; 1711 1712 BUG_ON(level == 0); 1713 /* 1714 * save the merging progress in the drop_progress. 1715 * this is OK since root refs == 1 in this case. 1716 */ 1717 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 1718 path->slots[level]); 1719 root_item->drop_level = level; 1720 1721 nr = trans->blocks_used; 1722 btrfs_end_transaction(trans, root); 1723 1724 btrfs_btree_balance_dirty(root, nr); 1725 1726 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1727 invalidate_extent_cache(root, &key, &next_key); 1728 } 1729 1730 /* 1731 * handle the case only one block in the fs tree need to be 1732 * relocated and the block is tree root. 1733 */ 1734 leaf = btrfs_lock_root_node(root); 1735 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 1736 btrfs_tree_unlock(leaf); 1737 free_extent_buffer(leaf); 1738 if (ret < 0) 1739 err = ret; 1740 out: 1741 btrfs_free_path(path); 1742 1743 if (err == 0) { 1744 memset(&root_item->drop_progress, 0, 1745 sizeof(root_item->drop_progress)); 1746 root_item->drop_level = 0; 1747 btrfs_set_root_refs(root_item, 0); 1748 } 1749 1750 nr = trans->blocks_used; 1751 btrfs_end_transaction(trans, root); 1752 1753 btrfs_btree_balance_dirty(root, nr); 1754 1755 /* 1756 * put inodes while we aren't holding the tree locks 1757 */ 1758 while (!list_empty(&inode_list)) { 1759 struct inodevec *ivec; 1760 ivec = list_entry(inode_list.next, struct inodevec, list); 1761 list_del(&ivec->list); 1762 while (ivec->nr > 0) { 1763 ivec->nr--; 1764 iput(ivec->inode[ivec->nr]); 1765 } 1766 kfree(ivec); 1767 } 1768 1769 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1770 invalidate_extent_cache(root, &key, &next_key); 1771 1772 return err; 1773 } 1774 1775 /* 1776 * callback for the work threads. 1777 * this function merges reloc tree with corresponding fs tree, 1778 * and then drops the reloc tree. 1779 */ 1780 static void merge_func(struct btrfs_work *work) 1781 { 1782 struct btrfs_trans_handle *trans; 1783 struct btrfs_root *root; 1784 struct btrfs_root *reloc_root; 1785 struct async_merge *async; 1786 1787 async = container_of(work, struct async_merge, work); 1788 reloc_root = async->root; 1789 1790 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 1791 root = read_fs_root(reloc_root->fs_info, 1792 reloc_root->root_key.offset); 1793 BUG_ON(IS_ERR(root)); 1794 BUG_ON(root->reloc_root != reloc_root); 1795 1796 merge_reloc_root(async->rc, root); 1797 1798 trans = btrfs_start_transaction(root, 1); 1799 btrfs_update_reloc_root(trans, root); 1800 btrfs_end_transaction(trans, root); 1801 } 1802 1803 btrfs_drop_snapshot(reloc_root, 0); 1804 1805 if (atomic_dec_and_test(async->num_pending)) 1806 complete(async->done); 1807 1808 kfree(async); 1809 } 1810 1811 static int merge_reloc_roots(struct reloc_control *rc) 1812 { 1813 struct async_merge *async; 1814 struct btrfs_root *root; 1815 struct completion done; 1816 atomic_t num_pending; 1817 1818 init_completion(&done); 1819 atomic_set(&num_pending, 1); 1820 1821 while (!list_empty(&rc->reloc_roots)) { 1822 root = list_entry(rc->reloc_roots.next, 1823 struct btrfs_root, root_list); 1824 list_del_init(&root->root_list); 1825 1826 async = kmalloc(sizeof(*async), GFP_NOFS); 1827 BUG_ON(!async); 1828 async->work.func = merge_func; 1829 async->work.flags = 0; 1830 async->rc = rc; 1831 async->root = root; 1832 async->done = &done; 1833 async->num_pending = &num_pending; 1834 atomic_inc(&num_pending); 1835 btrfs_queue_worker(&rc->workers, &async->work); 1836 } 1837 1838 if (!atomic_dec_and_test(&num_pending)) 1839 wait_for_completion(&done); 1840 1841 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 1842 return 0; 1843 } 1844 1845 static void free_block_list(struct rb_root *blocks) 1846 { 1847 struct tree_block *block; 1848 struct rb_node *rb_node; 1849 while ((rb_node = rb_first(blocks))) { 1850 block = rb_entry(rb_node, struct tree_block, rb_node); 1851 rb_erase(rb_node, blocks); 1852 kfree(block); 1853 } 1854 } 1855 1856 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 1857 struct btrfs_root *reloc_root) 1858 { 1859 struct btrfs_root *root; 1860 1861 if (reloc_root->last_trans == trans->transid) 1862 return 0; 1863 1864 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); 1865 BUG_ON(IS_ERR(root)); 1866 BUG_ON(root->reloc_root != reloc_root); 1867 1868 return btrfs_record_root_in_trans(trans, root); 1869 } 1870 1871 /* 1872 * select one tree from trees that references the block. 1873 * for blocks in refernce counted trees, we preper reloc tree. 1874 * if no reloc tree found and reloc_only is true, NULL is returned. 1875 */ 1876 static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, 1877 struct backref_node *node, 1878 struct backref_edge *edges[], 1879 int *nr, int reloc_only) 1880 { 1881 struct backref_node *next; 1882 struct btrfs_root *root; 1883 int index; 1884 int loop = 0; 1885 again: 1886 index = 0; 1887 next = node; 1888 while (1) { 1889 cond_resched(); 1890 next = walk_up_backref(next, edges, &index); 1891 root = next->root; 1892 if (!root) { 1893 BUG_ON(!node->old_root); 1894 goto skip; 1895 } 1896 1897 /* no other choice for non-refernce counted tree */ 1898 if (!root->ref_cows) { 1899 BUG_ON(reloc_only); 1900 break; 1901 } 1902 1903 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 1904 record_reloc_root_in_trans(trans, root); 1905 break; 1906 } 1907 1908 if (loop) { 1909 btrfs_record_root_in_trans(trans, root); 1910 break; 1911 } 1912 1913 if (reloc_only || next != node) { 1914 if (!root->reloc_root) 1915 btrfs_record_root_in_trans(trans, root); 1916 root = root->reloc_root; 1917 /* 1918 * if the reloc tree was created in current 1919 * transation, there is no node in backref tree 1920 * corresponds to the root of the reloc tree. 1921 */ 1922 if (btrfs_root_last_snapshot(&root->root_item) == 1923 trans->transid - 1) 1924 break; 1925 } 1926 skip: 1927 root = NULL; 1928 next = walk_down_backref(edges, &index); 1929 if (!next || next->level <= node->level) 1930 break; 1931 } 1932 1933 if (!root && !loop && !reloc_only) { 1934 loop = 1; 1935 goto again; 1936 } 1937 1938 if (root) 1939 *nr = index; 1940 else 1941 *nr = 0; 1942 1943 return root; 1944 } 1945 1946 static noinline_for_stack 1947 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, 1948 struct backref_node *node) 1949 { 1950 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 1951 int nr; 1952 return __select_one_root(trans, node, edges, &nr, 0); 1953 } 1954 1955 static noinline_for_stack 1956 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 1957 struct backref_node *node, 1958 struct backref_edge *edges[], int *nr) 1959 { 1960 return __select_one_root(trans, node, edges, nr, 1); 1961 } 1962 1963 static void grab_path_buffers(struct btrfs_path *path, 1964 struct backref_node *node, 1965 struct backref_edge *edges[], int nr) 1966 { 1967 int i = 0; 1968 while (1) { 1969 drop_node_buffer(node); 1970 node->eb = path->nodes[node->level]; 1971 BUG_ON(!node->eb); 1972 if (path->locks[node->level]) 1973 node->locked = 1; 1974 path->nodes[node->level] = NULL; 1975 path->locks[node->level] = 0; 1976 1977 if (i >= nr) 1978 break; 1979 1980 edges[i]->blockptr = node->eb->start; 1981 node = edges[i]->node[UPPER]; 1982 i++; 1983 } 1984 } 1985 1986 /* 1987 * relocate a block tree, and then update pointers in upper level 1988 * blocks that reference the block to point to the new location. 1989 * 1990 * if called by link_to_upper, the block has already been relocated. 1991 * in that case this function just updates pointers. 1992 */ 1993 static int do_relocation(struct btrfs_trans_handle *trans, 1994 struct backref_node *node, 1995 struct btrfs_key *key, 1996 struct btrfs_path *path, int lowest) 1997 { 1998 struct backref_node *upper; 1999 struct backref_edge *edge; 2000 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2001 struct btrfs_root *root; 2002 struct extent_buffer *eb; 2003 u32 blocksize; 2004 u64 bytenr; 2005 u64 generation; 2006 int nr; 2007 int slot; 2008 int ret; 2009 int err = 0; 2010 2011 BUG_ON(lowest && node->eb); 2012 2013 path->lowest_level = node->level + 1; 2014 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2015 cond_resched(); 2016 if (node->eb && node->eb->start == edge->blockptr) 2017 continue; 2018 2019 upper = edge->node[UPPER]; 2020 root = select_reloc_root(trans, upper, edges, &nr); 2021 if (!root) 2022 continue; 2023 2024 if (upper->eb && !upper->locked) 2025 drop_node_buffer(upper); 2026 2027 if (!upper->eb) { 2028 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2029 if (ret < 0) { 2030 err = ret; 2031 break; 2032 } 2033 BUG_ON(ret > 0); 2034 2035 slot = path->slots[upper->level]; 2036 2037 btrfs_unlock_up_safe(path, upper->level + 1); 2038 grab_path_buffers(path, upper, edges, nr); 2039 2040 btrfs_release_path(NULL, path); 2041 } else { 2042 ret = btrfs_bin_search(upper->eb, key, upper->level, 2043 &slot); 2044 BUG_ON(ret); 2045 } 2046 2047 bytenr = btrfs_node_blockptr(upper->eb, slot); 2048 if (!lowest) { 2049 if (node->eb->start == bytenr) { 2050 btrfs_tree_unlock(upper->eb); 2051 upper->locked = 0; 2052 continue; 2053 } 2054 } else { 2055 BUG_ON(node->bytenr != bytenr); 2056 } 2057 2058 blocksize = btrfs_level_size(root, node->level); 2059 generation = btrfs_node_ptr_generation(upper->eb, slot); 2060 eb = read_tree_block(root, bytenr, blocksize, generation); 2061 btrfs_tree_lock(eb); 2062 btrfs_set_lock_blocking(eb); 2063 2064 if (!node->eb) { 2065 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2066 slot, &eb); 2067 if (ret < 0) { 2068 err = ret; 2069 break; 2070 } 2071 btrfs_set_lock_blocking(eb); 2072 node->eb = eb; 2073 node->locked = 1; 2074 } else { 2075 btrfs_set_node_blockptr(upper->eb, slot, 2076 node->eb->start); 2077 btrfs_set_node_ptr_generation(upper->eb, slot, 2078 trans->transid); 2079 btrfs_mark_buffer_dirty(upper->eb); 2080 2081 ret = btrfs_inc_extent_ref(trans, root, 2082 node->eb->start, blocksize, 2083 upper->eb->start, 2084 btrfs_header_owner(upper->eb), 2085 node->level, 0); 2086 BUG_ON(ret); 2087 2088 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2089 BUG_ON(ret); 2090 } 2091 if (!lowest) { 2092 btrfs_tree_unlock(upper->eb); 2093 upper->locked = 0; 2094 } 2095 } 2096 path->lowest_level = 0; 2097 return err; 2098 } 2099 2100 static int link_to_upper(struct btrfs_trans_handle *trans, 2101 struct backref_node *node, 2102 struct btrfs_path *path) 2103 { 2104 struct btrfs_key key; 2105 if (!node->eb || list_empty(&node->upper)) 2106 return 0; 2107 2108 btrfs_node_key_to_cpu(node->eb, &key, 0); 2109 return do_relocation(trans, node, &key, path, 0); 2110 } 2111 2112 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2113 struct backref_cache *cache, 2114 struct btrfs_path *path) 2115 { 2116 struct backref_node *node; 2117 int level; 2118 int ret; 2119 int err = 0; 2120 2121 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2122 while (!list_empty(&cache->pending[level])) { 2123 node = list_entry(cache->pending[level].next, 2124 struct backref_node, lower); 2125 BUG_ON(node->level != level); 2126 2127 ret = link_to_upper(trans, node, path); 2128 if (ret < 0) 2129 err = ret; 2130 /* 2131 * this remove the node from the pending list and 2132 * may add some other nodes to the level + 1 2133 * pending list 2134 */ 2135 remove_backref_node(cache, node); 2136 } 2137 } 2138 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); 2139 return err; 2140 } 2141 2142 static void mark_block_processed(struct reloc_control *rc, 2143 struct backref_node *node) 2144 { 2145 u32 blocksize; 2146 if (node->level == 0 || 2147 in_block_group(node->bytenr, rc->block_group)) { 2148 blocksize = btrfs_level_size(rc->extent_root, node->level); 2149 set_extent_bits(&rc->processed_blocks, node->bytenr, 2150 node->bytenr + blocksize - 1, EXTENT_DIRTY, 2151 GFP_NOFS); 2152 } 2153 node->processed = 1; 2154 } 2155 2156 /* 2157 * mark a block and all blocks directly/indirectly reference the block 2158 * as processed. 2159 */ 2160 static void update_processed_blocks(struct reloc_control *rc, 2161 struct backref_node *node) 2162 { 2163 struct backref_node *next = node; 2164 struct backref_edge *edge; 2165 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2166 int index = 0; 2167 2168 while (next) { 2169 cond_resched(); 2170 while (1) { 2171 if (next->processed) 2172 break; 2173 2174 mark_block_processed(rc, next); 2175 2176 if (list_empty(&next->upper)) 2177 break; 2178 2179 edge = list_entry(next->upper.next, 2180 struct backref_edge, list[LOWER]); 2181 edges[index++] = edge; 2182 next = edge->node[UPPER]; 2183 } 2184 next = walk_down_backref(edges, &index); 2185 } 2186 } 2187 2188 static int tree_block_processed(u64 bytenr, u32 blocksize, 2189 struct reloc_control *rc) 2190 { 2191 if (test_range_bit(&rc->processed_blocks, bytenr, 2192 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2193 return 1; 2194 return 0; 2195 } 2196 2197 /* 2198 * check if there are any file extent pointers in the leaf point to 2199 * data require processing 2200 */ 2201 static int check_file_extents(struct reloc_control *rc, 2202 u64 bytenr, u32 blocksize, u64 ptr_gen) 2203 { 2204 struct btrfs_key found_key; 2205 struct btrfs_file_extent_item *fi; 2206 struct extent_buffer *leaf; 2207 u32 nritems; 2208 int i; 2209 int ret = 0; 2210 2211 leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); 2212 2213 nritems = btrfs_header_nritems(leaf); 2214 for (i = 0; i < nritems; i++) { 2215 cond_resched(); 2216 btrfs_item_key_to_cpu(leaf, &found_key, i); 2217 if (found_key.type != BTRFS_EXTENT_DATA_KEY) 2218 continue; 2219 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 2220 if (btrfs_file_extent_type(leaf, fi) == 2221 BTRFS_FILE_EXTENT_INLINE) 2222 continue; 2223 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 2224 if (bytenr == 0) 2225 continue; 2226 if (in_block_group(bytenr, rc->block_group)) { 2227 ret = 1; 2228 break; 2229 } 2230 } 2231 free_extent_buffer(leaf); 2232 return ret; 2233 } 2234 2235 /* 2236 * scan child blocks of a given block to find blocks require processing 2237 */ 2238 static int add_child_blocks(struct btrfs_trans_handle *trans, 2239 struct reloc_control *rc, 2240 struct backref_node *node, 2241 struct rb_root *blocks) 2242 { 2243 struct tree_block *block; 2244 struct rb_node *rb_node; 2245 u64 bytenr; 2246 u64 ptr_gen; 2247 u32 blocksize; 2248 u32 nritems; 2249 int i; 2250 int err = 0; 2251 2252 nritems = btrfs_header_nritems(node->eb); 2253 blocksize = btrfs_level_size(rc->extent_root, node->level - 1); 2254 for (i = 0; i < nritems; i++) { 2255 cond_resched(); 2256 bytenr = btrfs_node_blockptr(node->eb, i); 2257 ptr_gen = btrfs_node_ptr_generation(node->eb, i); 2258 if (ptr_gen == trans->transid) 2259 continue; 2260 if (!in_block_group(bytenr, rc->block_group) && 2261 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) 2262 continue; 2263 if (tree_block_processed(bytenr, blocksize, rc)) 2264 continue; 2265 2266 readahead_tree_block(rc->extent_root, 2267 bytenr, blocksize, ptr_gen); 2268 } 2269 2270 for (i = 0; i < nritems; i++) { 2271 cond_resched(); 2272 bytenr = btrfs_node_blockptr(node->eb, i); 2273 ptr_gen = btrfs_node_ptr_generation(node->eb, i); 2274 if (ptr_gen == trans->transid) 2275 continue; 2276 if (!in_block_group(bytenr, rc->block_group) && 2277 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) 2278 continue; 2279 if (tree_block_processed(bytenr, blocksize, rc)) 2280 continue; 2281 if (!in_block_group(bytenr, rc->block_group) && 2282 !check_file_extents(rc, bytenr, blocksize, ptr_gen)) 2283 continue; 2284 2285 block = kmalloc(sizeof(*block), GFP_NOFS); 2286 if (!block) { 2287 err = -ENOMEM; 2288 break; 2289 } 2290 block->bytenr = bytenr; 2291 btrfs_node_key_to_cpu(node->eb, &block->key, i); 2292 block->level = node->level - 1; 2293 block->key_ready = 1; 2294 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 2295 BUG_ON(rb_node); 2296 } 2297 if (err) 2298 free_block_list(blocks); 2299 return err; 2300 } 2301 2302 /* 2303 * find adjacent blocks require processing 2304 */ 2305 static noinline_for_stack 2306 int add_adjacent_blocks(struct btrfs_trans_handle *trans, 2307 struct reloc_control *rc, 2308 struct backref_cache *cache, 2309 struct rb_root *blocks, int level, 2310 struct backref_node **upper) 2311 { 2312 struct backref_node *node; 2313 int ret = 0; 2314 2315 WARN_ON(!list_empty(&cache->pending[level])); 2316 2317 if (list_empty(&cache->pending[level + 1])) 2318 return 1; 2319 2320 node = list_entry(cache->pending[level + 1].next, 2321 struct backref_node, lower); 2322 if (node->eb) 2323 ret = add_child_blocks(trans, rc, node, blocks); 2324 2325 *upper = node; 2326 return ret; 2327 } 2328 2329 static int get_tree_block_key(struct reloc_control *rc, 2330 struct tree_block *block) 2331 { 2332 struct extent_buffer *eb; 2333 2334 BUG_ON(block->key_ready); 2335 eb = read_tree_block(rc->extent_root, block->bytenr, 2336 block->key.objectid, block->key.offset); 2337 WARN_ON(btrfs_header_level(eb) != block->level); 2338 if (block->level == 0) 2339 btrfs_item_key_to_cpu(eb, &block->key, 0); 2340 else 2341 btrfs_node_key_to_cpu(eb, &block->key, 0); 2342 free_extent_buffer(eb); 2343 block->key_ready = 1; 2344 return 0; 2345 } 2346 2347 static int reada_tree_block(struct reloc_control *rc, 2348 struct tree_block *block) 2349 { 2350 BUG_ON(block->key_ready); 2351 readahead_tree_block(rc->extent_root, block->bytenr, 2352 block->key.objectid, block->key.offset); 2353 return 0; 2354 } 2355 2356 /* 2357 * helper function to relocate a tree block 2358 */ 2359 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2360 struct reloc_control *rc, 2361 struct backref_node *node, 2362 struct btrfs_key *key, 2363 struct btrfs_path *path) 2364 { 2365 struct btrfs_root *root; 2366 int ret; 2367 2368 root = select_one_root(trans, node); 2369 if (unlikely(!root)) { 2370 rc->found_old_snapshot = 1; 2371 update_processed_blocks(rc, node); 2372 return 0; 2373 } 2374 2375 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2376 ret = do_relocation(trans, node, key, path, 1); 2377 if (ret < 0) 2378 goto out; 2379 if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { 2380 ret = replace_file_extents(trans, rc, root, 2381 node->eb, NULL); 2382 if (ret < 0) 2383 goto out; 2384 } 2385 drop_node_buffer(node); 2386 } else if (!root->ref_cows) { 2387 path->lowest_level = node->level; 2388 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2389 btrfs_release_path(root, path); 2390 if (ret < 0) 2391 goto out; 2392 } else if (root != node->root) { 2393 WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); 2394 } 2395 2396 update_processed_blocks(rc, node); 2397 ret = 0; 2398 out: 2399 drop_node_buffer(node); 2400 return ret; 2401 } 2402 2403 /* 2404 * relocate a list of blocks 2405 */ 2406 static noinline_for_stack 2407 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2408 struct reloc_control *rc, struct rb_root *blocks) 2409 { 2410 struct backref_cache *cache; 2411 struct backref_node *node; 2412 struct btrfs_path *path; 2413 struct tree_block *block; 2414 struct rb_node *rb_node; 2415 int level = -1; 2416 int ret; 2417 int err = 0; 2418 2419 path = btrfs_alloc_path(); 2420 if (!path) 2421 return -ENOMEM; 2422 2423 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2424 if (!cache) { 2425 btrfs_free_path(path); 2426 return -ENOMEM; 2427 } 2428 2429 backref_cache_init(cache); 2430 2431 rb_node = rb_first(blocks); 2432 while (rb_node) { 2433 block = rb_entry(rb_node, struct tree_block, rb_node); 2434 if (level == -1) 2435 level = block->level; 2436 else 2437 BUG_ON(level != block->level); 2438 if (!block->key_ready) 2439 reada_tree_block(rc, block); 2440 rb_node = rb_next(rb_node); 2441 } 2442 2443 rb_node = rb_first(blocks); 2444 while (rb_node) { 2445 block = rb_entry(rb_node, struct tree_block, rb_node); 2446 if (!block->key_ready) 2447 get_tree_block_key(rc, block); 2448 rb_node = rb_next(rb_node); 2449 } 2450 2451 rb_node = rb_first(blocks); 2452 while (rb_node) { 2453 block = rb_entry(rb_node, struct tree_block, rb_node); 2454 2455 node = build_backref_tree(rc, cache, &block->key, 2456 block->level, block->bytenr); 2457 if (IS_ERR(node)) { 2458 err = PTR_ERR(node); 2459 goto out; 2460 } 2461 2462 ret = relocate_tree_block(trans, rc, node, &block->key, 2463 path); 2464 if (ret < 0) { 2465 err = ret; 2466 goto out; 2467 } 2468 remove_backref_node(cache, node); 2469 rb_node = rb_next(rb_node); 2470 } 2471 2472 if (level > 0) 2473 goto out; 2474 2475 free_block_list(blocks); 2476 2477 /* 2478 * now backrefs of some upper level tree blocks have been cached, 2479 * try relocating blocks referenced by these upper level blocks. 2480 */ 2481 while (1) { 2482 struct backref_node *upper = NULL; 2483 if (trans->transaction->in_commit || 2484 trans->transaction->delayed_refs.flushing) 2485 break; 2486 2487 ret = add_adjacent_blocks(trans, rc, cache, blocks, level, 2488 &upper); 2489 if (ret < 0) 2490 err = ret; 2491 if (ret != 0) 2492 break; 2493 2494 rb_node = rb_first(blocks); 2495 while (rb_node) { 2496 block = rb_entry(rb_node, struct tree_block, rb_node); 2497 if (trans->transaction->in_commit || 2498 trans->transaction->delayed_refs.flushing) 2499 goto out; 2500 BUG_ON(!block->key_ready); 2501 node = build_backref_tree(rc, cache, &block->key, 2502 level, block->bytenr); 2503 if (IS_ERR(node)) { 2504 err = PTR_ERR(node); 2505 goto out; 2506 } 2507 2508 ret = relocate_tree_block(trans, rc, node, 2509 &block->key, path); 2510 if (ret < 0) { 2511 err = ret; 2512 goto out; 2513 } 2514 remove_backref_node(cache, node); 2515 rb_node = rb_next(rb_node); 2516 } 2517 free_block_list(blocks); 2518 2519 if (upper) { 2520 ret = link_to_upper(trans, upper, path); 2521 if (ret < 0) { 2522 err = ret; 2523 break; 2524 } 2525 remove_backref_node(cache, upper); 2526 } 2527 } 2528 out: 2529 free_block_list(blocks); 2530 2531 ret = finish_pending_nodes(trans, cache, path); 2532 if (ret < 0) 2533 err = ret; 2534 2535 kfree(cache); 2536 btrfs_free_path(path); 2537 return err; 2538 } 2539 2540 static noinline_for_stack 2541 int setup_extent_mapping(struct inode *inode, u64 start, u64 end, 2542 u64 block_start) 2543 { 2544 struct btrfs_root *root = BTRFS_I(inode)->root; 2545 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2546 struct extent_map *em; 2547 int ret = 0; 2548 2549 em = alloc_extent_map(GFP_NOFS); 2550 if (!em) 2551 return -ENOMEM; 2552 2553 em->start = start; 2554 em->len = end + 1 - start; 2555 em->block_len = em->len; 2556 em->block_start = block_start; 2557 em->bdev = root->fs_info->fs_devices->latest_bdev; 2558 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2559 2560 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2561 while (1) { 2562 write_lock(&em_tree->lock); 2563 ret = add_extent_mapping(em_tree, em); 2564 write_unlock(&em_tree->lock); 2565 if (ret != -EEXIST) { 2566 free_extent_map(em); 2567 break; 2568 } 2569 btrfs_drop_extent_cache(inode, start, end, 0); 2570 } 2571 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2572 return ret; 2573 } 2574 2575 static int relocate_file_extent_cluster(struct inode *inode, 2576 struct file_extent_cluster *cluster) 2577 { 2578 u64 page_start; 2579 u64 page_end; 2580 u64 offset = BTRFS_I(inode)->index_cnt; 2581 unsigned long index; 2582 unsigned long last_index; 2583 unsigned int dirty_page = 0; 2584 struct page *page; 2585 struct file_ra_state *ra; 2586 int nr = 0; 2587 int ret = 0; 2588 2589 if (!cluster->nr) 2590 return 0; 2591 2592 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2593 if (!ra) 2594 return -ENOMEM; 2595 2596 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 2597 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 2598 2599 mutex_lock(&inode->i_mutex); 2600 2601 i_size_write(inode, cluster->end + 1 - offset); 2602 ret = setup_extent_mapping(inode, cluster->start - offset, 2603 cluster->end - offset, cluster->start); 2604 if (ret) 2605 goto out_unlock; 2606 2607 file_ra_state_init(ra, inode->i_mapping); 2608 2609 WARN_ON(cluster->start != cluster->boundary[0]); 2610 while (index <= last_index) { 2611 page = find_lock_page(inode->i_mapping, index); 2612 if (!page) { 2613 page_cache_sync_readahead(inode->i_mapping, 2614 ra, NULL, index, 2615 last_index + 1 - index); 2616 page = grab_cache_page(inode->i_mapping, index); 2617 if (!page) { 2618 ret = -ENOMEM; 2619 goto out_unlock; 2620 } 2621 } 2622 2623 if (PageReadahead(page)) { 2624 page_cache_async_readahead(inode->i_mapping, 2625 ra, NULL, page, index, 2626 last_index + 1 - index); 2627 } 2628 2629 if (!PageUptodate(page)) { 2630 btrfs_readpage(NULL, page); 2631 lock_page(page); 2632 if (!PageUptodate(page)) { 2633 unlock_page(page); 2634 page_cache_release(page); 2635 ret = -EIO; 2636 goto out_unlock; 2637 } 2638 } 2639 2640 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2641 page_end = page_start + PAGE_CACHE_SIZE - 1; 2642 2643 lock_extent(&BTRFS_I(inode)->io_tree, 2644 page_start, page_end, GFP_NOFS); 2645 2646 set_page_extent_mapped(page); 2647 2648 if (nr < cluster->nr && 2649 page_start + offset == cluster->boundary[nr]) { 2650 set_extent_bits(&BTRFS_I(inode)->io_tree, 2651 page_start, page_end, 2652 EXTENT_BOUNDARY, GFP_NOFS); 2653 nr++; 2654 } 2655 btrfs_set_extent_delalloc(inode, page_start, page_end); 2656 2657 set_page_dirty(page); 2658 dirty_page++; 2659 2660 unlock_extent(&BTRFS_I(inode)->io_tree, 2661 page_start, page_end, GFP_NOFS); 2662 unlock_page(page); 2663 page_cache_release(page); 2664 2665 index++; 2666 if (nr < cluster->nr && 2667 page_end + 1 + offset == cluster->boundary[nr]) { 2668 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 2669 dirty_page); 2670 dirty_page = 0; 2671 } 2672 } 2673 if (dirty_page) { 2674 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 2675 dirty_page); 2676 } 2677 WARN_ON(nr != cluster->nr); 2678 out_unlock: 2679 mutex_unlock(&inode->i_mutex); 2680 kfree(ra); 2681 return ret; 2682 } 2683 2684 static noinline_for_stack 2685 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, 2686 struct file_extent_cluster *cluster) 2687 { 2688 int ret; 2689 2690 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { 2691 ret = relocate_file_extent_cluster(inode, cluster); 2692 if (ret) 2693 return ret; 2694 cluster->nr = 0; 2695 } 2696 2697 if (!cluster->nr) 2698 cluster->start = extent_key->objectid; 2699 else 2700 BUG_ON(cluster->nr >= MAX_EXTENTS); 2701 cluster->end = extent_key->objectid + extent_key->offset - 1; 2702 cluster->boundary[cluster->nr] = extent_key->objectid; 2703 cluster->nr++; 2704 2705 if (cluster->nr >= MAX_EXTENTS) { 2706 ret = relocate_file_extent_cluster(inode, cluster); 2707 if (ret) 2708 return ret; 2709 cluster->nr = 0; 2710 } 2711 return 0; 2712 } 2713 2714 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2715 static int get_ref_objectid_v0(struct reloc_control *rc, 2716 struct btrfs_path *path, 2717 struct btrfs_key *extent_key, 2718 u64 *ref_objectid, int *path_change) 2719 { 2720 struct btrfs_key key; 2721 struct extent_buffer *leaf; 2722 struct btrfs_extent_ref_v0 *ref0; 2723 int ret; 2724 int slot; 2725 2726 leaf = path->nodes[0]; 2727 slot = path->slots[0]; 2728 while (1) { 2729 if (slot >= btrfs_header_nritems(leaf)) { 2730 ret = btrfs_next_leaf(rc->extent_root, path); 2731 if (ret < 0) 2732 return ret; 2733 BUG_ON(ret > 0); 2734 leaf = path->nodes[0]; 2735 slot = path->slots[0]; 2736 if (path_change) 2737 *path_change = 1; 2738 } 2739 btrfs_item_key_to_cpu(leaf, &key, slot); 2740 if (key.objectid != extent_key->objectid) 2741 return -ENOENT; 2742 2743 if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 2744 slot++; 2745 continue; 2746 } 2747 ref0 = btrfs_item_ptr(leaf, slot, 2748 struct btrfs_extent_ref_v0); 2749 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 2750 break; 2751 } 2752 return 0; 2753 } 2754 #endif 2755 2756 /* 2757 * helper to add a tree block to the list. 2758 * the major work is getting the generation and level of the block 2759 */ 2760 static int add_tree_block(struct reloc_control *rc, 2761 struct btrfs_key *extent_key, 2762 struct btrfs_path *path, 2763 struct rb_root *blocks) 2764 { 2765 struct extent_buffer *eb; 2766 struct btrfs_extent_item *ei; 2767 struct btrfs_tree_block_info *bi; 2768 struct tree_block *block; 2769 struct rb_node *rb_node; 2770 u32 item_size; 2771 int level = -1; 2772 int generation; 2773 2774 eb = path->nodes[0]; 2775 item_size = btrfs_item_size_nr(eb, path->slots[0]); 2776 2777 if (item_size >= sizeof(*ei) + sizeof(*bi)) { 2778 ei = btrfs_item_ptr(eb, path->slots[0], 2779 struct btrfs_extent_item); 2780 bi = (struct btrfs_tree_block_info *)(ei + 1); 2781 generation = btrfs_extent_generation(eb, ei); 2782 level = btrfs_tree_block_level(eb, bi); 2783 } else { 2784 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2785 u64 ref_owner; 2786 int ret; 2787 2788 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2789 ret = get_ref_objectid_v0(rc, path, extent_key, 2790 &ref_owner, NULL); 2791 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 2792 level = (int)ref_owner; 2793 /* FIXME: get real generation */ 2794 generation = 0; 2795 #else 2796 BUG(); 2797 #endif 2798 } 2799 2800 btrfs_release_path(rc->extent_root, path); 2801 2802 BUG_ON(level == -1); 2803 2804 block = kmalloc(sizeof(*block), GFP_NOFS); 2805 if (!block) 2806 return -ENOMEM; 2807 2808 block->bytenr = extent_key->objectid; 2809 block->key.objectid = extent_key->offset; 2810 block->key.offset = generation; 2811 block->level = level; 2812 block->key_ready = 0; 2813 2814 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 2815 BUG_ON(rb_node); 2816 2817 return 0; 2818 } 2819 2820 /* 2821 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 2822 */ 2823 static int __add_tree_block(struct reloc_control *rc, 2824 u64 bytenr, u32 blocksize, 2825 struct rb_root *blocks) 2826 { 2827 struct btrfs_path *path; 2828 struct btrfs_key key; 2829 int ret; 2830 2831 if (tree_block_processed(bytenr, blocksize, rc)) 2832 return 0; 2833 2834 if (tree_search(blocks, bytenr)) 2835 return 0; 2836 2837 path = btrfs_alloc_path(); 2838 if (!path) 2839 return -ENOMEM; 2840 2841 key.objectid = bytenr; 2842 key.type = BTRFS_EXTENT_ITEM_KEY; 2843 key.offset = blocksize; 2844 2845 path->search_commit_root = 1; 2846 path->skip_locking = 1; 2847 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 2848 if (ret < 0) 2849 goto out; 2850 BUG_ON(ret); 2851 2852 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2853 ret = add_tree_block(rc, &key, path, blocks); 2854 out: 2855 btrfs_free_path(path); 2856 return ret; 2857 } 2858 2859 /* 2860 * helper to check if the block use full backrefs for pointers in it 2861 */ 2862 static int block_use_full_backref(struct reloc_control *rc, 2863 struct extent_buffer *eb) 2864 { 2865 struct btrfs_path *path; 2866 struct btrfs_extent_item *ei; 2867 struct btrfs_key key; 2868 u64 flags; 2869 int ret; 2870 2871 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || 2872 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) 2873 return 1; 2874 2875 path = btrfs_alloc_path(); 2876 BUG_ON(!path); 2877 2878 key.objectid = eb->start; 2879 key.type = BTRFS_EXTENT_ITEM_KEY; 2880 key.offset = eb->len; 2881 2882 path->search_commit_root = 1; 2883 path->skip_locking = 1; 2884 ret = btrfs_search_slot(NULL, rc->extent_root, 2885 &key, path, 0, 0); 2886 BUG_ON(ret); 2887 2888 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 2889 struct btrfs_extent_item); 2890 flags = btrfs_extent_flags(path->nodes[0], ei); 2891 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 2892 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 2893 ret = 1; 2894 else 2895 ret = 0; 2896 btrfs_free_path(path); 2897 return ret; 2898 } 2899 2900 /* 2901 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 2902 * this function scans fs tree to find blocks reference the data extent 2903 */ 2904 static int find_data_references(struct reloc_control *rc, 2905 struct btrfs_key *extent_key, 2906 struct extent_buffer *leaf, 2907 struct btrfs_extent_data_ref *ref, 2908 struct rb_root *blocks) 2909 { 2910 struct btrfs_path *path; 2911 struct tree_block *block; 2912 struct btrfs_root *root; 2913 struct btrfs_file_extent_item *fi; 2914 struct rb_node *rb_node; 2915 struct btrfs_key key; 2916 u64 ref_root; 2917 u64 ref_objectid; 2918 u64 ref_offset; 2919 u32 ref_count; 2920 u32 nritems; 2921 int err = 0; 2922 int added = 0; 2923 int counted; 2924 int ret; 2925 2926 path = btrfs_alloc_path(); 2927 if (!path) 2928 return -ENOMEM; 2929 2930 ref_root = btrfs_extent_data_ref_root(leaf, ref); 2931 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 2932 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 2933 ref_count = btrfs_extent_data_ref_count(leaf, ref); 2934 2935 root = read_fs_root(rc->extent_root->fs_info, ref_root); 2936 if (IS_ERR(root)) { 2937 err = PTR_ERR(root); 2938 goto out; 2939 } 2940 2941 key.objectid = ref_objectid; 2942 key.offset = ref_offset; 2943 key.type = BTRFS_EXTENT_DATA_KEY; 2944 2945 path->search_commit_root = 1; 2946 path->skip_locking = 1; 2947 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2948 if (ret < 0) { 2949 err = ret; 2950 goto out; 2951 } 2952 2953 leaf = path->nodes[0]; 2954 nritems = btrfs_header_nritems(leaf); 2955 /* 2956 * the references in tree blocks that use full backrefs 2957 * are not counted in 2958 */ 2959 if (block_use_full_backref(rc, leaf)) 2960 counted = 0; 2961 else 2962 counted = 1; 2963 rb_node = tree_search(blocks, leaf->start); 2964 if (rb_node) { 2965 if (counted) 2966 added = 1; 2967 else 2968 path->slots[0] = nritems; 2969 } 2970 2971 while (ref_count > 0) { 2972 while (path->slots[0] >= nritems) { 2973 ret = btrfs_next_leaf(root, path); 2974 if (ret < 0) { 2975 err = ret; 2976 goto out; 2977 } 2978 if (ret > 0) { 2979 WARN_ON(1); 2980 goto out; 2981 } 2982 2983 leaf = path->nodes[0]; 2984 nritems = btrfs_header_nritems(leaf); 2985 added = 0; 2986 2987 if (block_use_full_backref(rc, leaf)) 2988 counted = 0; 2989 else 2990 counted = 1; 2991 rb_node = tree_search(blocks, leaf->start); 2992 if (rb_node) { 2993 if (counted) 2994 added = 1; 2995 else 2996 path->slots[0] = nritems; 2997 } 2998 } 2999 3000 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3001 if (key.objectid != ref_objectid || 3002 key.type != BTRFS_EXTENT_DATA_KEY) { 3003 WARN_ON(1); 3004 break; 3005 } 3006 3007 fi = btrfs_item_ptr(leaf, path->slots[0], 3008 struct btrfs_file_extent_item); 3009 3010 if (btrfs_file_extent_type(leaf, fi) == 3011 BTRFS_FILE_EXTENT_INLINE) 3012 goto next; 3013 3014 if (btrfs_file_extent_disk_bytenr(leaf, fi) != 3015 extent_key->objectid) 3016 goto next; 3017 3018 key.offset -= btrfs_file_extent_offset(leaf, fi); 3019 if (key.offset != ref_offset) 3020 goto next; 3021 3022 if (counted) 3023 ref_count--; 3024 if (added) 3025 goto next; 3026 3027 if (!tree_block_processed(leaf->start, leaf->len, rc)) { 3028 block = kmalloc(sizeof(*block), GFP_NOFS); 3029 if (!block) { 3030 err = -ENOMEM; 3031 break; 3032 } 3033 block->bytenr = leaf->start; 3034 btrfs_item_key_to_cpu(leaf, &block->key, 0); 3035 block->level = 0; 3036 block->key_ready = 1; 3037 rb_node = tree_insert(blocks, block->bytenr, 3038 &block->rb_node); 3039 BUG_ON(rb_node); 3040 } 3041 if (counted) 3042 added = 1; 3043 else 3044 path->slots[0] = nritems; 3045 next: 3046 path->slots[0]++; 3047 3048 } 3049 out: 3050 btrfs_free_path(path); 3051 return err; 3052 } 3053 3054 /* 3055 * hepler to find all tree blocks that reference a given data extent 3056 */ 3057 static noinline_for_stack 3058 int add_data_references(struct reloc_control *rc, 3059 struct btrfs_key *extent_key, 3060 struct btrfs_path *path, 3061 struct rb_root *blocks) 3062 { 3063 struct btrfs_key key; 3064 struct extent_buffer *eb; 3065 struct btrfs_extent_data_ref *dref; 3066 struct btrfs_extent_inline_ref *iref; 3067 unsigned long ptr; 3068 unsigned long end; 3069 u32 blocksize; 3070 int ret; 3071 int err = 0; 3072 3073 ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, 3074 extent_key->offset); 3075 BUG_ON(ret < 0); 3076 if (ret > 0) { 3077 /* the relocated data is fragmented */ 3078 rc->extents_skipped++; 3079 btrfs_release_path(rc->extent_root, path); 3080 return 0; 3081 } 3082 3083 blocksize = btrfs_level_size(rc->extent_root, 0); 3084 3085 eb = path->nodes[0]; 3086 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3087 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3088 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3089 if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3090 ptr = end; 3091 else 3092 #endif 3093 ptr += sizeof(struct btrfs_extent_item); 3094 3095 while (ptr < end) { 3096 iref = (struct btrfs_extent_inline_ref *)ptr; 3097 key.type = btrfs_extent_inline_ref_type(eb, iref); 3098 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3099 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3100 ret = __add_tree_block(rc, key.offset, blocksize, 3101 blocks); 3102 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3103 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 3104 ret = find_data_references(rc, extent_key, 3105 eb, dref, blocks); 3106 } else { 3107 BUG(); 3108 } 3109 ptr += btrfs_extent_inline_ref_size(key.type); 3110 } 3111 WARN_ON(ptr > end); 3112 3113 while (1) { 3114 cond_resched(); 3115 eb = path->nodes[0]; 3116 if (path->slots[0] >= btrfs_header_nritems(eb)) { 3117 ret = btrfs_next_leaf(rc->extent_root, path); 3118 if (ret < 0) { 3119 err = ret; 3120 break; 3121 } 3122 if (ret > 0) 3123 break; 3124 eb = path->nodes[0]; 3125 } 3126 3127 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 3128 if (key.objectid != extent_key->objectid) 3129 break; 3130 3131 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3132 if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3133 key.type == BTRFS_EXTENT_REF_V0_KEY) { 3134 #else 3135 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3136 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3137 #endif 3138 ret = __add_tree_block(rc, key.offset, blocksize, 3139 blocks); 3140 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3141 dref = btrfs_item_ptr(eb, path->slots[0], 3142 struct btrfs_extent_data_ref); 3143 ret = find_data_references(rc, extent_key, 3144 eb, dref, blocks); 3145 } else { 3146 ret = 0; 3147 } 3148 if (ret) { 3149 err = ret; 3150 break; 3151 } 3152 path->slots[0]++; 3153 } 3154 btrfs_release_path(rc->extent_root, path); 3155 if (err) 3156 free_block_list(blocks); 3157 return err; 3158 } 3159 3160 /* 3161 * hepler to find next unprocessed extent 3162 */ 3163 static noinline_for_stack 3164 int find_next_extent(struct btrfs_trans_handle *trans, 3165 struct reloc_control *rc, struct btrfs_path *path) 3166 { 3167 struct btrfs_key key; 3168 struct extent_buffer *leaf; 3169 u64 start, end, last; 3170 int ret; 3171 3172 last = rc->block_group->key.objectid + rc->block_group->key.offset; 3173 while (1) { 3174 cond_resched(); 3175 if (rc->search_start >= last) { 3176 ret = 1; 3177 break; 3178 } 3179 3180 key.objectid = rc->search_start; 3181 key.type = BTRFS_EXTENT_ITEM_KEY; 3182 key.offset = 0; 3183 3184 path->search_commit_root = 1; 3185 path->skip_locking = 1; 3186 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3187 0, 0); 3188 if (ret < 0) 3189 break; 3190 next: 3191 leaf = path->nodes[0]; 3192 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3193 ret = btrfs_next_leaf(rc->extent_root, path); 3194 if (ret != 0) 3195 break; 3196 leaf = path->nodes[0]; 3197 } 3198 3199 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3200 if (key.objectid >= last) { 3201 ret = 1; 3202 break; 3203 } 3204 3205 if (key.type != BTRFS_EXTENT_ITEM_KEY || 3206 key.objectid + key.offset <= rc->search_start) { 3207 path->slots[0]++; 3208 goto next; 3209 } 3210 3211 ret = find_first_extent_bit(&rc->processed_blocks, 3212 key.objectid, &start, &end, 3213 EXTENT_DIRTY); 3214 3215 if (ret == 0 && start <= key.objectid) { 3216 btrfs_release_path(rc->extent_root, path); 3217 rc->search_start = end + 1; 3218 } else { 3219 rc->search_start = key.objectid + key.offset; 3220 return 0; 3221 } 3222 } 3223 btrfs_release_path(rc->extent_root, path); 3224 return ret; 3225 } 3226 3227 static void set_reloc_control(struct reloc_control *rc) 3228 { 3229 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3230 mutex_lock(&fs_info->trans_mutex); 3231 fs_info->reloc_ctl = rc; 3232 mutex_unlock(&fs_info->trans_mutex); 3233 } 3234 3235 static void unset_reloc_control(struct reloc_control *rc) 3236 { 3237 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3238 mutex_lock(&fs_info->trans_mutex); 3239 fs_info->reloc_ctl = NULL; 3240 mutex_unlock(&fs_info->trans_mutex); 3241 } 3242 3243 static int check_extent_flags(u64 flags) 3244 { 3245 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3246 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3247 return 1; 3248 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3249 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3250 return 1; 3251 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3252 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3253 return 1; 3254 return 0; 3255 } 3256 3257 3258 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3259 { 3260 struct rb_root blocks = RB_ROOT; 3261 struct btrfs_key key; 3262 struct file_extent_cluster *cluster; 3263 struct btrfs_trans_handle *trans = NULL; 3264 struct btrfs_path *path; 3265 struct btrfs_extent_item *ei; 3266 unsigned long nr; 3267 u64 flags; 3268 u32 item_size; 3269 int ret; 3270 int err = 0; 3271 3272 cluster = kzalloc(sizeof(*cluster), GFP_NOFS); 3273 if (!cluster) 3274 return -ENOMEM; 3275 3276 path = btrfs_alloc_path(); 3277 if (!path) 3278 return -ENOMEM; 3279 3280 rc->extents_found = 0; 3281 rc->extents_skipped = 0; 3282 3283 rc->search_start = rc->block_group->key.objectid; 3284 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3285 GFP_NOFS); 3286 3287 rc->create_reloc_root = 1; 3288 set_reloc_control(rc); 3289 3290 trans = btrfs_start_transaction(rc->extent_root, 1); 3291 btrfs_commit_transaction(trans, rc->extent_root); 3292 3293 while (1) { 3294 trans = btrfs_start_transaction(rc->extent_root, 1); 3295 3296 ret = find_next_extent(trans, rc, path); 3297 if (ret < 0) 3298 err = ret; 3299 if (ret != 0) 3300 break; 3301 3302 rc->extents_found++; 3303 3304 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3305 struct btrfs_extent_item); 3306 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3307 item_size = btrfs_item_size_nr(path->nodes[0], 3308 path->slots[0]); 3309 if (item_size >= sizeof(*ei)) { 3310 flags = btrfs_extent_flags(path->nodes[0], ei); 3311 ret = check_extent_flags(flags); 3312 BUG_ON(ret); 3313 3314 } else { 3315 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3316 u64 ref_owner; 3317 int path_change = 0; 3318 3319 BUG_ON(item_size != 3320 sizeof(struct btrfs_extent_item_v0)); 3321 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 3322 &path_change); 3323 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 3324 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 3325 else 3326 flags = BTRFS_EXTENT_FLAG_DATA; 3327 3328 if (path_change) { 3329 btrfs_release_path(rc->extent_root, path); 3330 3331 path->search_commit_root = 1; 3332 path->skip_locking = 1; 3333 ret = btrfs_search_slot(NULL, rc->extent_root, 3334 &key, path, 0, 0); 3335 if (ret < 0) { 3336 err = ret; 3337 break; 3338 } 3339 BUG_ON(ret > 0); 3340 } 3341 #else 3342 BUG(); 3343 #endif 3344 } 3345 3346 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 3347 ret = add_tree_block(rc, &key, path, &blocks); 3348 } else if (rc->stage == UPDATE_DATA_PTRS && 3349 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3350 ret = add_data_references(rc, &key, path, &blocks); 3351 } else { 3352 btrfs_release_path(rc->extent_root, path); 3353 ret = 0; 3354 } 3355 if (ret < 0) { 3356 err = 0; 3357 break; 3358 } 3359 3360 if (!RB_EMPTY_ROOT(&blocks)) { 3361 ret = relocate_tree_blocks(trans, rc, &blocks); 3362 if (ret < 0) { 3363 err = ret; 3364 break; 3365 } 3366 } 3367 3368 nr = trans->blocks_used; 3369 btrfs_end_transaction(trans, rc->extent_root); 3370 trans = NULL; 3371 btrfs_btree_balance_dirty(rc->extent_root, nr); 3372 3373 if (rc->stage == MOVE_DATA_EXTENTS && 3374 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3375 rc->found_file_extent = 1; 3376 ret = relocate_data_extent(rc->data_inode, 3377 &key, cluster); 3378 if (ret < 0) { 3379 err = ret; 3380 break; 3381 } 3382 } 3383 } 3384 btrfs_free_path(path); 3385 3386 if (trans) { 3387 nr = trans->blocks_used; 3388 btrfs_end_transaction(trans, rc->extent_root); 3389 btrfs_btree_balance_dirty(rc->extent_root, nr); 3390 } 3391 3392 if (!err) { 3393 ret = relocate_file_extent_cluster(rc->data_inode, cluster); 3394 if (ret < 0) 3395 err = ret; 3396 } 3397 3398 kfree(cluster); 3399 3400 rc->create_reloc_root = 0; 3401 smp_mb(); 3402 3403 if (rc->extents_found > 0) { 3404 trans = btrfs_start_transaction(rc->extent_root, 1); 3405 btrfs_commit_transaction(trans, rc->extent_root); 3406 } 3407 3408 merge_reloc_roots(rc); 3409 3410 unset_reloc_control(rc); 3411 3412 /* get rid of pinned extents */ 3413 trans = btrfs_start_transaction(rc->extent_root, 1); 3414 btrfs_commit_transaction(trans, rc->extent_root); 3415 3416 return err; 3417 } 3418 3419 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3420 struct btrfs_root *root, u64 objectid) 3421 { 3422 struct btrfs_path *path; 3423 struct btrfs_inode_item *item; 3424 struct extent_buffer *leaf; 3425 int ret; 3426 3427 path = btrfs_alloc_path(); 3428 if (!path) 3429 return -ENOMEM; 3430 3431 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 3432 if (ret) 3433 goto out; 3434 3435 leaf = path->nodes[0]; 3436 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3437 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3438 btrfs_set_inode_generation(leaf, item, 1); 3439 btrfs_set_inode_size(leaf, item, 0); 3440 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3441 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 3442 btrfs_mark_buffer_dirty(leaf); 3443 btrfs_release_path(root, path); 3444 out: 3445 btrfs_free_path(path); 3446 return ret; 3447 } 3448 3449 /* 3450 * helper to create inode for data relocation. 3451 * the inode is in data relocation tree and its link count is 0 3452 */ 3453 static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 3454 struct btrfs_block_group_cache *group) 3455 { 3456 struct inode *inode = NULL; 3457 struct btrfs_trans_handle *trans; 3458 struct btrfs_root *root; 3459 struct btrfs_key key; 3460 unsigned long nr; 3461 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 3462 int err = 0; 3463 3464 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 3465 if (IS_ERR(root)) 3466 return ERR_CAST(root); 3467 3468 trans = btrfs_start_transaction(root, 1); 3469 BUG_ON(!trans); 3470 3471 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3472 if (err) 3473 goto out; 3474 3475 err = __insert_orphan_inode(trans, root, objectid); 3476 BUG_ON(err); 3477 3478 key.objectid = objectid; 3479 key.type = BTRFS_INODE_ITEM_KEY; 3480 key.offset = 0; 3481 inode = btrfs_iget(root->fs_info->sb, &key, root); 3482 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 3483 BTRFS_I(inode)->index_cnt = group->key.objectid; 3484 3485 err = btrfs_orphan_add(trans, inode); 3486 out: 3487 nr = trans->blocks_used; 3488 btrfs_end_transaction(trans, root); 3489 3490 btrfs_btree_balance_dirty(root, nr); 3491 if (err) { 3492 if (inode) 3493 iput(inode); 3494 inode = ERR_PTR(err); 3495 } 3496 return inode; 3497 } 3498 3499 /* 3500 * function to relocate all extents in a block group. 3501 */ 3502 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) 3503 { 3504 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3505 struct reloc_control *rc; 3506 int ret; 3507 int err = 0; 3508 3509 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3510 if (!rc) 3511 return -ENOMEM; 3512 3513 mapping_tree_init(&rc->reloc_root_tree); 3514 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); 3515 INIT_LIST_HEAD(&rc->reloc_roots); 3516 3517 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 3518 BUG_ON(!rc->block_group); 3519 3520 btrfs_init_workers(&rc->workers, "relocate", 3521 fs_info->thread_pool_size); 3522 3523 rc->extent_root = extent_root; 3524 btrfs_prepare_block_group_relocation(extent_root, rc->block_group); 3525 3526 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3527 if (IS_ERR(rc->data_inode)) { 3528 err = PTR_ERR(rc->data_inode); 3529 rc->data_inode = NULL; 3530 goto out; 3531 } 3532 3533 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 3534 (unsigned long long)rc->block_group->key.objectid, 3535 (unsigned long long)rc->block_group->flags); 3536 3537 btrfs_start_delalloc_inodes(fs_info->tree_root); 3538 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3539 3540 while (1) { 3541 rc->extents_found = 0; 3542 rc->extents_skipped = 0; 3543 3544 mutex_lock(&fs_info->cleaner_mutex); 3545 3546 btrfs_clean_old_snapshots(fs_info->tree_root); 3547 ret = relocate_block_group(rc); 3548 3549 mutex_unlock(&fs_info->cleaner_mutex); 3550 if (ret < 0) { 3551 err = ret; 3552 break; 3553 } 3554 3555 if (rc->extents_found == 0) 3556 break; 3557 3558 printk(KERN_INFO "btrfs: found %llu extents\n", 3559 (unsigned long long)rc->extents_found); 3560 3561 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 3562 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 3563 invalidate_mapping_pages(rc->data_inode->i_mapping, 3564 0, -1); 3565 rc->stage = UPDATE_DATA_PTRS; 3566 } else if (rc->stage == UPDATE_DATA_PTRS && 3567 rc->extents_skipped >= rc->extents_found) { 3568 iput(rc->data_inode); 3569 rc->data_inode = create_reloc_inode(fs_info, 3570 rc->block_group); 3571 if (IS_ERR(rc->data_inode)) { 3572 err = PTR_ERR(rc->data_inode); 3573 rc->data_inode = NULL; 3574 break; 3575 } 3576 rc->stage = MOVE_DATA_EXTENTS; 3577 rc->found_file_extent = 0; 3578 } 3579 } 3580 3581 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, 3582 rc->block_group->key.objectid, 3583 rc->block_group->key.objectid + 3584 rc->block_group->key.offset - 1); 3585 3586 WARN_ON(rc->block_group->pinned > 0); 3587 WARN_ON(rc->block_group->reserved > 0); 3588 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 3589 out: 3590 iput(rc->data_inode); 3591 btrfs_stop_workers(&rc->workers); 3592 btrfs_put_block_group(rc->block_group); 3593 kfree(rc); 3594 return err; 3595 } 3596 3597 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 3598 { 3599 struct btrfs_trans_handle *trans; 3600 int ret; 3601 3602 trans = btrfs_start_transaction(root->fs_info->tree_root, 1); 3603 3604 memset(&root->root_item.drop_progress, 0, 3605 sizeof(root->root_item.drop_progress)); 3606 root->root_item.drop_level = 0; 3607 btrfs_set_root_refs(&root->root_item, 0); 3608 ret = btrfs_update_root(trans, root->fs_info->tree_root, 3609 &root->root_key, &root->root_item); 3610 BUG_ON(ret); 3611 3612 ret = btrfs_end_transaction(trans, root->fs_info->tree_root); 3613 BUG_ON(ret); 3614 return 0; 3615 } 3616 3617 /* 3618 * recover relocation interrupted by system crash. 3619 * 3620 * this function resumes merging reloc trees with corresponding fs trees. 3621 * this is important for keeping the sharing of tree blocks 3622 */ 3623 int btrfs_recover_relocation(struct btrfs_root *root) 3624 { 3625 LIST_HEAD(reloc_roots); 3626 struct btrfs_key key; 3627 struct btrfs_root *fs_root; 3628 struct btrfs_root *reloc_root; 3629 struct btrfs_path *path; 3630 struct extent_buffer *leaf; 3631 struct reloc_control *rc = NULL; 3632 struct btrfs_trans_handle *trans; 3633 int ret; 3634 int err = 0; 3635 3636 path = btrfs_alloc_path(); 3637 if (!path) 3638 return -ENOMEM; 3639 3640 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 3641 key.type = BTRFS_ROOT_ITEM_KEY; 3642 key.offset = (u64)-1; 3643 3644 while (1) { 3645 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, 3646 path, 0, 0); 3647 if (ret < 0) { 3648 err = ret; 3649 goto out; 3650 } 3651 if (ret > 0) { 3652 if (path->slots[0] == 0) 3653 break; 3654 path->slots[0]--; 3655 } 3656 leaf = path->nodes[0]; 3657 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3658 btrfs_release_path(root->fs_info->tree_root, path); 3659 3660 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 3661 key.type != BTRFS_ROOT_ITEM_KEY) 3662 break; 3663 3664 reloc_root = btrfs_read_fs_root_no_radix(root, &key); 3665 if (IS_ERR(reloc_root)) { 3666 err = PTR_ERR(reloc_root); 3667 goto out; 3668 } 3669 3670 list_add(&reloc_root->root_list, &reloc_roots); 3671 3672 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 3673 fs_root = read_fs_root(root->fs_info, 3674 reloc_root->root_key.offset); 3675 if (IS_ERR(fs_root)) { 3676 ret = PTR_ERR(fs_root); 3677 if (ret != -ENOENT) { 3678 err = ret; 3679 goto out; 3680 } 3681 mark_garbage_root(reloc_root); 3682 } 3683 } 3684 3685 if (key.offset == 0) 3686 break; 3687 3688 key.offset--; 3689 } 3690 btrfs_release_path(root->fs_info->tree_root, path); 3691 3692 if (list_empty(&reloc_roots)) 3693 goto out; 3694 3695 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3696 if (!rc) { 3697 err = -ENOMEM; 3698 goto out; 3699 } 3700 3701 mapping_tree_init(&rc->reloc_root_tree); 3702 INIT_LIST_HEAD(&rc->reloc_roots); 3703 btrfs_init_workers(&rc->workers, "relocate", 3704 root->fs_info->thread_pool_size); 3705 rc->extent_root = root->fs_info->extent_root; 3706 3707 set_reloc_control(rc); 3708 3709 while (!list_empty(&reloc_roots)) { 3710 reloc_root = list_entry(reloc_roots.next, 3711 struct btrfs_root, root_list); 3712 list_del(&reloc_root->root_list); 3713 3714 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 3715 list_add_tail(&reloc_root->root_list, 3716 &rc->reloc_roots); 3717 continue; 3718 } 3719 3720 fs_root = read_fs_root(root->fs_info, 3721 reloc_root->root_key.offset); 3722 BUG_ON(IS_ERR(fs_root)); 3723 3724 __add_reloc_root(reloc_root); 3725 fs_root->reloc_root = reloc_root; 3726 } 3727 3728 trans = btrfs_start_transaction(rc->extent_root, 1); 3729 btrfs_commit_transaction(trans, rc->extent_root); 3730 3731 merge_reloc_roots(rc); 3732 3733 unset_reloc_control(rc); 3734 3735 trans = btrfs_start_transaction(rc->extent_root, 1); 3736 btrfs_commit_transaction(trans, rc->extent_root); 3737 out: 3738 if (rc) { 3739 btrfs_stop_workers(&rc->workers); 3740 kfree(rc); 3741 } 3742 while (!list_empty(&reloc_roots)) { 3743 reloc_root = list_entry(reloc_roots.next, 3744 struct btrfs_root, root_list); 3745 list_del(&reloc_root->root_list); 3746 free_extent_buffer(reloc_root->node); 3747 free_extent_buffer(reloc_root->commit_root); 3748 kfree(reloc_root); 3749 } 3750 btrfs_free_path(path); 3751 3752 if (err == 0) { 3753 /* cleanup orphan inode in data relocation tree */ 3754 fs_root = read_fs_root(root->fs_info, 3755 BTRFS_DATA_RELOC_TREE_OBJECTID); 3756 if (IS_ERR(fs_root)) 3757 err = PTR_ERR(fs_root); 3758 } 3759 return err; 3760 } 3761 3762 /* 3763 * helper to add ordered checksum for data relocation. 3764 * 3765 * cloning checksum properly handles the nodatasum extents. 3766 * it also saves CPU time to re-calculate the checksum. 3767 */ 3768 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 3769 { 3770 struct btrfs_ordered_sum *sums; 3771 struct btrfs_sector_sum *sector_sum; 3772 struct btrfs_ordered_extent *ordered; 3773 struct btrfs_root *root = BTRFS_I(inode)->root; 3774 size_t offset; 3775 int ret; 3776 u64 disk_bytenr; 3777 LIST_HEAD(list); 3778 3779 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 3780 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 3781 3782 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 3783 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 3784 disk_bytenr + len - 1, &list); 3785 3786 while (!list_empty(&list)) { 3787 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 3788 list_del_init(&sums->list); 3789 3790 sector_sum = sums->sums; 3791 sums->bytenr = ordered->start; 3792 3793 offset = 0; 3794 while (offset < sums->len) { 3795 sector_sum->bytenr += ordered->start - disk_bytenr; 3796 sector_sum++; 3797 offset += root->sectorsize; 3798 } 3799 3800 btrfs_add_ordered_sum(inode, ordered, sums); 3801 } 3802 btrfs_put_ordered_extent(ordered); 3803 return 0; 3804 } 3805