1 /* 2 * Copyright (C) 2009 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include "ctree.h" 25 #include "disk-io.h" 26 #include "transaction.h" 27 #include "volumes.h" 28 #include "locking.h" 29 #include "btrfs_inode.h" 30 #include "async-thread.h" 31 32 /* 33 * backref_node, mapping_node and tree_block start with this 34 */ 35 struct tree_entry { 36 struct rb_node rb_node; 37 u64 bytenr; 38 }; 39 40 /* 41 * present a tree block in the backref cache 42 */ 43 struct backref_node { 44 struct rb_node rb_node; 45 u64 bytenr; 46 /* objectid tree block owner */ 47 u64 owner; 48 /* list of upper level blocks reference this block */ 49 struct list_head upper; 50 /* list of child blocks in the cache */ 51 struct list_head lower; 52 /* NULL if this node is not tree root */ 53 struct btrfs_root *root; 54 /* extent buffer got by COW the block */ 55 struct extent_buffer *eb; 56 /* level of tree block */ 57 unsigned int level:8; 58 /* 1 if the block is root of old snapshot */ 59 unsigned int old_root:1; 60 /* 1 if no child blocks in the cache */ 61 unsigned int lowest:1; 62 /* is the extent buffer locked */ 63 unsigned int locked:1; 64 /* has the block been processed */ 65 unsigned int processed:1; 66 /* have backrefs of this block been checked */ 67 unsigned int checked:1; 68 }; 69 70 /* 71 * present a block pointer in the backref cache 72 */ 73 struct backref_edge { 74 struct list_head list[2]; 75 struct backref_node *node[2]; 76 u64 blockptr; 77 }; 78 79 #define LOWER 0 80 #define UPPER 1 81 82 struct backref_cache { 83 /* red black tree of all backref nodes in the cache */ 84 struct rb_root rb_root; 85 /* list of backref nodes with no child block in the cache */ 86 struct list_head pending[BTRFS_MAX_LEVEL]; 87 spinlock_t lock; 88 }; 89 90 /* 91 * map address of tree root to tree 92 */ 93 struct mapping_node { 94 struct rb_node rb_node; 95 u64 bytenr; 96 void *data; 97 }; 98 99 struct mapping_tree { 100 struct rb_root rb_root; 101 spinlock_t lock; 102 }; 103 104 /* 105 * present a tree block to process 106 */ 107 struct tree_block { 108 struct rb_node rb_node; 109 u64 bytenr; 110 struct btrfs_key key; 111 unsigned int level:8; 112 unsigned int key_ready:1; 113 }; 114 115 /* inode vector */ 116 #define INODEVEC_SIZE 16 117 118 struct inodevec { 119 struct list_head list; 120 struct inode *inode[INODEVEC_SIZE]; 121 int nr; 122 }; 123 124 #define MAX_EXTENTS 128 125 126 struct file_extent_cluster { 127 u64 start; 128 u64 end; 129 u64 boundary[MAX_EXTENTS]; 130 unsigned int nr; 131 }; 132 133 struct reloc_control { 134 /* block group to relocate */ 135 struct btrfs_block_group_cache *block_group; 136 /* extent tree */ 137 struct btrfs_root *extent_root; 138 /* inode for moving data */ 139 struct inode *data_inode; 140 struct btrfs_workers workers; 141 /* tree blocks have been processed */ 142 struct extent_io_tree processed_blocks; 143 /* map start of tree root to corresponding reloc tree */ 144 struct mapping_tree reloc_root_tree; 145 /* list of reloc trees */ 146 struct list_head reloc_roots; 147 u64 search_start; 148 u64 extents_found; 149 u64 extents_skipped; 150 int stage; 151 int create_reloc_root; 152 unsigned int found_file_extent:1; 153 unsigned int found_old_snapshot:1; 154 }; 155 156 /* stages of data relocation */ 157 #define MOVE_DATA_EXTENTS 0 158 #define UPDATE_DATA_PTRS 1 159 160 /* 161 * merge reloc tree to corresponding fs tree in worker threads 162 */ 163 struct async_merge { 164 struct btrfs_work work; 165 struct reloc_control *rc; 166 struct btrfs_root *root; 167 struct completion *done; 168 atomic_t *num_pending; 169 }; 170 171 static void mapping_tree_init(struct mapping_tree *tree) 172 { 173 tree->rb_root.rb_node = NULL; 174 spin_lock_init(&tree->lock); 175 } 176 177 static void backref_cache_init(struct backref_cache *cache) 178 { 179 int i; 180 cache->rb_root.rb_node = NULL; 181 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 182 INIT_LIST_HEAD(&cache->pending[i]); 183 spin_lock_init(&cache->lock); 184 } 185 186 static void backref_node_init(struct backref_node *node) 187 { 188 memset(node, 0, sizeof(*node)); 189 INIT_LIST_HEAD(&node->upper); 190 INIT_LIST_HEAD(&node->lower); 191 RB_CLEAR_NODE(&node->rb_node); 192 } 193 194 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 195 struct rb_node *node) 196 { 197 struct rb_node **p = &root->rb_node; 198 struct rb_node *parent = NULL; 199 struct tree_entry *entry; 200 201 while (*p) { 202 parent = *p; 203 entry = rb_entry(parent, struct tree_entry, rb_node); 204 205 if (bytenr < entry->bytenr) 206 p = &(*p)->rb_left; 207 else if (bytenr > entry->bytenr) 208 p = &(*p)->rb_right; 209 else 210 return parent; 211 } 212 213 rb_link_node(node, parent, p); 214 rb_insert_color(node, root); 215 return NULL; 216 } 217 218 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) 219 { 220 struct rb_node *n = root->rb_node; 221 struct tree_entry *entry; 222 223 while (n) { 224 entry = rb_entry(n, struct tree_entry, rb_node); 225 226 if (bytenr < entry->bytenr) 227 n = n->rb_left; 228 else if (bytenr > entry->bytenr) 229 n = n->rb_right; 230 else 231 return n; 232 } 233 return NULL; 234 } 235 236 /* 237 * walk up backref nodes until reach node presents tree root 238 */ 239 static struct backref_node *walk_up_backref(struct backref_node *node, 240 struct backref_edge *edges[], 241 int *index) 242 { 243 struct backref_edge *edge; 244 int idx = *index; 245 246 while (!list_empty(&node->upper)) { 247 edge = list_entry(node->upper.next, 248 struct backref_edge, list[LOWER]); 249 edges[idx++] = edge; 250 node = edge->node[UPPER]; 251 } 252 *index = idx; 253 return node; 254 } 255 256 /* 257 * walk down backref nodes to find start of next reference path 258 */ 259 static struct backref_node *walk_down_backref(struct backref_edge *edges[], 260 int *index) 261 { 262 struct backref_edge *edge; 263 struct backref_node *lower; 264 int idx = *index; 265 266 while (idx > 0) { 267 edge = edges[idx - 1]; 268 lower = edge->node[LOWER]; 269 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 270 idx--; 271 continue; 272 } 273 edge = list_entry(edge->list[LOWER].next, 274 struct backref_edge, list[LOWER]); 275 edges[idx - 1] = edge; 276 *index = idx; 277 return edge->node[UPPER]; 278 } 279 *index = 0; 280 return NULL; 281 } 282 283 static void drop_node_buffer(struct backref_node *node) 284 { 285 if (node->eb) { 286 if (node->locked) { 287 btrfs_tree_unlock(node->eb); 288 node->locked = 0; 289 } 290 free_extent_buffer(node->eb); 291 node->eb = NULL; 292 } 293 } 294 295 static void drop_backref_node(struct backref_cache *tree, 296 struct backref_node *node) 297 { 298 BUG_ON(!node->lowest); 299 BUG_ON(!list_empty(&node->upper)); 300 301 drop_node_buffer(node); 302 list_del(&node->lower); 303 304 rb_erase(&node->rb_node, &tree->rb_root); 305 kfree(node); 306 } 307 308 /* 309 * remove a backref node from the backref cache 310 */ 311 static void remove_backref_node(struct backref_cache *cache, 312 struct backref_node *node) 313 { 314 struct backref_node *upper; 315 struct backref_edge *edge; 316 317 if (!node) 318 return; 319 320 BUG_ON(!node->lowest); 321 while (!list_empty(&node->upper)) { 322 edge = list_entry(node->upper.next, struct backref_edge, 323 list[LOWER]); 324 upper = edge->node[UPPER]; 325 list_del(&edge->list[LOWER]); 326 list_del(&edge->list[UPPER]); 327 kfree(edge); 328 /* 329 * add the node to pending list if no other 330 * child block cached. 331 */ 332 if (list_empty(&upper->lower)) { 333 list_add_tail(&upper->lower, 334 &cache->pending[upper->level]); 335 upper->lowest = 1; 336 } 337 } 338 drop_backref_node(cache, node); 339 } 340 341 /* 342 * find reloc tree by address of tree root 343 */ 344 static struct btrfs_root *find_reloc_root(struct reloc_control *rc, 345 u64 bytenr) 346 { 347 struct rb_node *rb_node; 348 struct mapping_node *node; 349 struct btrfs_root *root = NULL; 350 351 spin_lock(&rc->reloc_root_tree.lock); 352 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); 353 if (rb_node) { 354 node = rb_entry(rb_node, struct mapping_node, rb_node); 355 root = (struct btrfs_root *)node->data; 356 } 357 spin_unlock(&rc->reloc_root_tree.lock); 358 return root; 359 } 360 361 static int is_cowonly_root(u64 root_objectid) 362 { 363 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 364 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 365 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 366 root_objectid == BTRFS_DEV_TREE_OBJECTID || 367 root_objectid == BTRFS_TREE_LOG_OBJECTID || 368 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 369 return 1; 370 return 0; 371 } 372 373 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 374 u64 root_objectid) 375 { 376 struct btrfs_key key; 377 378 key.objectid = root_objectid; 379 key.type = BTRFS_ROOT_ITEM_KEY; 380 if (is_cowonly_root(root_objectid)) 381 key.offset = 0; 382 else 383 key.offset = (u64)-1; 384 385 return btrfs_read_fs_root_no_name(fs_info, &key); 386 } 387 388 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 389 static noinline_for_stack 390 struct btrfs_root *find_tree_root(struct reloc_control *rc, 391 struct extent_buffer *leaf, 392 struct btrfs_extent_ref_v0 *ref0) 393 { 394 struct btrfs_root *root; 395 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 396 u64 generation = btrfs_ref_generation_v0(leaf, ref0); 397 398 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 399 400 root = read_fs_root(rc->extent_root->fs_info, root_objectid); 401 BUG_ON(IS_ERR(root)); 402 403 if (root->ref_cows && 404 generation != btrfs_root_generation(&root->root_item)) 405 return NULL; 406 407 return root; 408 } 409 #endif 410 411 static noinline_for_stack 412 int find_inline_backref(struct extent_buffer *leaf, int slot, 413 unsigned long *ptr, unsigned long *end) 414 { 415 struct btrfs_extent_item *ei; 416 struct btrfs_tree_block_info *bi; 417 u32 item_size; 418 419 item_size = btrfs_item_size_nr(leaf, slot); 420 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 421 if (item_size < sizeof(*ei)) { 422 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 423 return 1; 424 } 425 #endif 426 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 427 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 428 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 429 430 if (item_size <= sizeof(*ei) + sizeof(*bi)) { 431 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 432 return 1; 433 } 434 435 bi = (struct btrfs_tree_block_info *)(ei + 1); 436 *ptr = (unsigned long)(bi + 1); 437 *end = (unsigned long)ei + item_size; 438 return 0; 439 } 440 441 /* 442 * build backref tree for a given tree block. root of the backref tree 443 * corresponds the tree block, leaves of the backref tree correspond 444 * roots of b-trees that reference the tree block. 445 * 446 * the basic idea of this function is check backrefs of a given block 447 * to find upper level blocks that refernece the block, and then check 448 * bakcrefs of these upper level blocks recursively. the recursion stop 449 * when tree root is reached or backrefs for the block is cached. 450 * 451 * NOTE: if we find backrefs for a block are cached, we know backrefs 452 * for all upper level blocks that directly/indirectly reference the 453 * block are also cached. 454 */ 455 static struct backref_node *build_backref_tree(struct reloc_control *rc, 456 struct backref_cache *cache, 457 struct btrfs_key *node_key, 458 int level, u64 bytenr) 459 { 460 struct btrfs_path *path1; 461 struct btrfs_path *path2; 462 struct extent_buffer *eb; 463 struct btrfs_root *root; 464 struct backref_node *cur; 465 struct backref_node *upper; 466 struct backref_node *lower; 467 struct backref_node *node = NULL; 468 struct backref_node *exist = NULL; 469 struct backref_edge *edge; 470 struct rb_node *rb_node; 471 struct btrfs_key key; 472 unsigned long end; 473 unsigned long ptr; 474 LIST_HEAD(list); 475 int ret; 476 int err = 0; 477 478 path1 = btrfs_alloc_path(); 479 path2 = btrfs_alloc_path(); 480 if (!path1 || !path2) { 481 err = -ENOMEM; 482 goto out; 483 } 484 485 node = kmalloc(sizeof(*node), GFP_NOFS); 486 if (!node) { 487 err = -ENOMEM; 488 goto out; 489 } 490 491 backref_node_init(node); 492 node->bytenr = bytenr; 493 node->owner = 0; 494 node->level = level; 495 node->lowest = 1; 496 cur = node; 497 again: 498 end = 0; 499 ptr = 0; 500 key.objectid = cur->bytenr; 501 key.type = BTRFS_EXTENT_ITEM_KEY; 502 key.offset = (u64)-1; 503 504 path1->search_commit_root = 1; 505 path1->skip_locking = 1; 506 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, 507 0, 0); 508 if (ret < 0) { 509 err = ret; 510 goto out; 511 } 512 BUG_ON(!ret || !path1->slots[0]); 513 514 path1->slots[0]--; 515 516 WARN_ON(cur->checked); 517 if (!list_empty(&cur->upper)) { 518 /* 519 * the backref was added previously when processsing 520 * backref of type BTRFS_TREE_BLOCK_REF_KEY 521 */ 522 BUG_ON(!list_is_singular(&cur->upper)); 523 edge = list_entry(cur->upper.next, struct backref_edge, 524 list[LOWER]); 525 BUG_ON(!list_empty(&edge->list[UPPER])); 526 exist = edge->node[UPPER]; 527 /* 528 * add the upper level block to pending list if we need 529 * check its backrefs 530 */ 531 if (!exist->checked) 532 list_add_tail(&edge->list[UPPER], &list); 533 } else { 534 exist = NULL; 535 } 536 537 while (1) { 538 cond_resched(); 539 eb = path1->nodes[0]; 540 541 if (ptr >= end) { 542 if (path1->slots[0] >= btrfs_header_nritems(eb)) { 543 ret = btrfs_next_leaf(rc->extent_root, path1); 544 if (ret < 0) { 545 err = ret; 546 goto out; 547 } 548 if (ret > 0) 549 break; 550 eb = path1->nodes[0]; 551 } 552 553 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); 554 if (key.objectid != cur->bytenr) { 555 WARN_ON(exist); 556 break; 557 } 558 559 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 560 ret = find_inline_backref(eb, path1->slots[0], 561 &ptr, &end); 562 if (ret) 563 goto next; 564 } 565 } 566 567 if (ptr < end) { 568 /* update key for inline back ref */ 569 struct btrfs_extent_inline_ref *iref; 570 iref = (struct btrfs_extent_inline_ref *)ptr; 571 key.type = btrfs_extent_inline_ref_type(eb, iref); 572 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 573 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 574 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 575 } 576 577 if (exist && 578 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 579 exist->owner == key.offset) || 580 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 581 exist->bytenr == key.offset))) { 582 exist = NULL; 583 goto next; 584 } 585 586 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 587 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 588 key.type == BTRFS_EXTENT_REF_V0_KEY) { 589 if (key.objectid == key.offset && 590 key.type == BTRFS_EXTENT_REF_V0_KEY) { 591 struct btrfs_extent_ref_v0 *ref0; 592 ref0 = btrfs_item_ptr(eb, path1->slots[0], 593 struct btrfs_extent_ref_v0); 594 root = find_tree_root(rc, eb, ref0); 595 if (root) 596 cur->root = root; 597 else 598 cur->old_root = 1; 599 break; 600 } 601 #else 602 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 603 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 604 #endif 605 if (key.objectid == key.offset) { 606 /* 607 * only root blocks of reloc trees use 608 * backref of this type. 609 */ 610 root = find_reloc_root(rc, cur->bytenr); 611 BUG_ON(!root); 612 cur->root = root; 613 break; 614 } 615 616 edge = kzalloc(sizeof(*edge), GFP_NOFS); 617 if (!edge) { 618 err = -ENOMEM; 619 goto out; 620 } 621 rb_node = tree_search(&cache->rb_root, key.offset); 622 if (!rb_node) { 623 upper = kmalloc(sizeof(*upper), GFP_NOFS); 624 if (!upper) { 625 kfree(edge); 626 err = -ENOMEM; 627 goto out; 628 } 629 backref_node_init(upper); 630 upper->bytenr = key.offset; 631 upper->owner = 0; 632 upper->level = cur->level + 1; 633 /* 634 * backrefs for the upper level block isn't 635 * cached, add the block to pending list 636 */ 637 list_add_tail(&edge->list[UPPER], &list); 638 } else { 639 upper = rb_entry(rb_node, struct backref_node, 640 rb_node); 641 INIT_LIST_HEAD(&edge->list[UPPER]); 642 } 643 list_add(&edge->list[LOWER], &cur->upper); 644 edge->node[UPPER] = upper; 645 edge->node[LOWER] = cur; 646 647 goto next; 648 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 649 goto next; 650 } 651 652 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ 653 root = read_fs_root(rc->extent_root->fs_info, key.offset); 654 if (IS_ERR(root)) { 655 err = PTR_ERR(root); 656 goto out; 657 } 658 659 if (btrfs_root_level(&root->root_item) == cur->level) { 660 /* tree root */ 661 BUG_ON(btrfs_root_bytenr(&root->root_item) != 662 cur->bytenr); 663 cur->root = root; 664 break; 665 } 666 667 level = cur->level + 1; 668 669 /* 670 * searching the tree to find upper level blocks 671 * reference the block. 672 */ 673 path2->search_commit_root = 1; 674 path2->skip_locking = 1; 675 path2->lowest_level = level; 676 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); 677 path2->lowest_level = 0; 678 if (ret < 0) { 679 err = ret; 680 goto out; 681 } 682 if (ret > 0 && path2->slots[level] > 0) 683 path2->slots[level]--; 684 685 eb = path2->nodes[level]; 686 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 687 cur->bytenr); 688 689 lower = cur; 690 for (; level < BTRFS_MAX_LEVEL; level++) { 691 if (!path2->nodes[level]) { 692 BUG_ON(btrfs_root_bytenr(&root->root_item) != 693 lower->bytenr); 694 lower->root = root; 695 break; 696 } 697 698 edge = kzalloc(sizeof(*edge), GFP_NOFS); 699 if (!edge) { 700 err = -ENOMEM; 701 goto out; 702 } 703 704 eb = path2->nodes[level]; 705 rb_node = tree_search(&cache->rb_root, eb->start); 706 if (!rb_node) { 707 upper = kmalloc(sizeof(*upper), GFP_NOFS); 708 if (!upper) { 709 kfree(edge); 710 err = -ENOMEM; 711 goto out; 712 } 713 backref_node_init(upper); 714 upper->bytenr = eb->start; 715 upper->owner = btrfs_header_owner(eb); 716 upper->level = lower->level + 1; 717 718 /* 719 * if we know the block isn't shared 720 * we can void checking its backrefs. 721 */ 722 if (btrfs_block_can_be_shared(root, eb)) 723 upper->checked = 0; 724 else 725 upper->checked = 1; 726 727 /* 728 * add the block to pending list if we 729 * need check its backrefs. only block 730 * at 'cur->level + 1' is added to the 731 * tail of pending list. this guarantees 732 * we check backrefs from lower level 733 * blocks to upper level blocks. 734 */ 735 if (!upper->checked && 736 level == cur->level + 1) { 737 list_add_tail(&edge->list[UPPER], 738 &list); 739 } else 740 INIT_LIST_HEAD(&edge->list[UPPER]); 741 } else { 742 upper = rb_entry(rb_node, struct backref_node, 743 rb_node); 744 BUG_ON(!upper->checked); 745 INIT_LIST_HEAD(&edge->list[UPPER]); 746 } 747 list_add_tail(&edge->list[LOWER], &lower->upper); 748 edge->node[UPPER] = upper; 749 edge->node[LOWER] = lower; 750 751 if (rb_node) 752 break; 753 lower = upper; 754 upper = NULL; 755 } 756 btrfs_release_path(root, path2); 757 next: 758 if (ptr < end) { 759 ptr += btrfs_extent_inline_ref_size(key.type); 760 if (ptr >= end) { 761 WARN_ON(ptr > end); 762 ptr = 0; 763 end = 0; 764 } 765 } 766 if (ptr >= end) 767 path1->slots[0]++; 768 } 769 btrfs_release_path(rc->extent_root, path1); 770 771 cur->checked = 1; 772 WARN_ON(exist); 773 774 /* the pending list isn't empty, take the first block to process */ 775 if (!list_empty(&list)) { 776 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 777 list_del_init(&edge->list[UPPER]); 778 cur = edge->node[UPPER]; 779 goto again; 780 } 781 782 /* 783 * everything goes well, connect backref nodes and insert backref nodes 784 * into the cache. 785 */ 786 BUG_ON(!node->checked); 787 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 788 BUG_ON(rb_node); 789 790 list_for_each_entry(edge, &node->upper, list[LOWER]) 791 list_add_tail(&edge->list[UPPER], &list); 792 793 while (!list_empty(&list)) { 794 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 795 list_del_init(&edge->list[UPPER]); 796 upper = edge->node[UPPER]; 797 798 if (!RB_EMPTY_NODE(&upper->rb_node)) { 799 if (upper->lowest) { 800 list_del_init(&upper->lower); 801 upper->lowest = 0; 802 } 803 804 list_add_tail(&edge->list[UPPER], &upper->lower); 805 continue; 806 } 807 808 BUG_ON(!upper->checked); 809 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 810 &upper->rb_node); 811 BUG_ON(rb_node); 812 813 list_add_tail(&edge->list[UPPER], &upper->lower); 814 815 list_for_each_entry(edge, &upper->upper, list[LOWER]) 816 list_add_tail(&edge->list[UPPER], &list); 817 } 818 out: 819 btrfs_free_path(path1); 820 btrfs_free_path(path2); 821 if (err) { 822 INIT_LIST_HEAD(&list); 823 upper = node; 824 while (upper) { 825 if (RB_EMPTY_NODE(&upper->rb_node)) { 826 list_splice_tail(&upper->upper, &list); 827 kfree(upper); 828 } 829 830 if (list_empty(&list)) 831 break; 832 833 edge = list_entry(list.next, struct backref_edge, 834 list[LOWER]); 835 upper = edge->node[UPPER]; 836 kfree(edge); 837 } 838 return ERR_PTR(err); 839 } 840 return node; 841 } 842 843 /* 844 * helper to add 'address of tree root -> reloc tree' mapping 845 */ 846 static int __add_reloc_root(struct btrfs_root *root) 847 { 848 struct rb_node *rb_node; 849 struct mapping_node *node; 850 struct reloc_control *rc = root->fs_info->reloc_ctl; 851 852 node = kmalloc(sizeof(*node), GFP_NOFS); 853 BUG_ON(!node); 854 855 node->bytenr = root->node->start; 856 node->data = root; 857 858 spin_lock(&rc->reloc_root_tree.lock); 859 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 860 node->bytenr, &node->rb_node); 861 spin_unlock(&rc->reloc_root_tree.lock); 862 BUG_ON(rb_node); 863 864 list_add_tail(&root->root_list, &rc->reloc_roots); 865 return 0; 866 } 867 868 /* 869 * helper to update/delete the 'address of tree root -> reloc tree' 870 * mapping 871 */ 872 static int __update_reloc_root(struct btrfs_root *root, int del) 873 { 874 struct rb_node *rb_node; 875 struct mapping_node *node = NULL; 876 struct reloc_control *rc = root->fs_info->reloc_ctl; 877 878 spin_lock(&rc->reloc_root_tree.lock); 879 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 880 root->commit_root->start); 881 if (rb_node) { 882 node = rb_entry(rb_node, struct mapping_node, rb_node); 883 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 884 } 885 spin_unlock(&rc->reloc_root_tree.lock); 886 887 BUG_ON((struct btrfs_root *)node->data != root); 888 889 if (!del) { 890 spin_lock(&rc->reloc_root_tree.lock); 891 node->bytenr = root->node->start; 892 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 893 node->bytenr, &node->rb_node); 894 spin_unlock(&rc->reloc_root_tree.lock); 895 BUG_ON(rb_node); 896 } else { 897 list_del_init(&root->root_list); 898 kfree(node); 899 } 900 return 0; 901 } 902 903 /* 904 * create reloc tree for a given fs tree. reloc tree is just a 905 * snapshot of the fs tree with special root objectid. 906 */ 907 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 908 struct btrfs_root *root) 909 { 910 struct btrfs_root *reloc_root; 911 struct extent_buffer *eb; 912 struct btrfs_root_item *root_item; 913 struct btrfs_key root_key; 914 int ret; 915 916 if (root->reloc_root) { 917 reloc_root = root->reloc_root; 918 reloc_root->last_trans = trans->transid; 919 return 0; 920 } 921 922 if (!root->fs_info->reloc_ctl || 923 !root->fs_info->reloc_ctl->create_reloc_root || 924 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 925 return 0; 926 927 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 928 BUG_ON(!root_item); 929 930 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 931 root_key.type = BTRFS_ROOT_ITEM_KEY; 932 root_key.offset = root->root_key.objectid; 933 934 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 935 BTRFS_TREE_RELOC_OBJECTID); 936 BUG_ON(ret); 937 938 btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); 939 memcpy(root_item, &root->root_item, sizeof(*root_item)); 940 btrfs_set_root_refs(root_item, 1); 941 btrfs_set_root_bytenr(root_item, eb->start); 942 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 943 btrfs_set_root_generation(root_item, trans->transid); 944 memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); 945 root_item->drop_level = 0; 946 947 btrfs_tree_unlock(eb); 948 free_extent_buffer(eb); 949 950 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 951 &root_key, root_item); 952 BUG_ON(ret); 953 kfree(root_item); 954 955 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 956 &root_key); 957 BUG_ON(IS_ERR(reloc_root)); 958 reloc_root->last_trans = trans->transid; 959 960 __add_reloc_root(reloc_root); 961 root->reloc_root = reloc_root; 962 return 0; 963 } 964 965 /* 966 * update root item of reloc tree 967 */ 968 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 969 struct btrfs_root *root) 970 { 971 struct btrfs_root *reloc_root; 972 struct btrfs_root_item *root_item; 973 int del = 0; 974 int ret; 975 976 if (!root->reloc_root) 977 return 0; 978 979 reloc_root = root->reloc_root; 980 root_item = &reloc_root->root_item; 981 982 if (btrfs_root_refs(root_item) == 0) { 983 root->reloc_root = NULL; 984 del = 1; 985 } 986 987 __update_reloc_root(reloc_root, del); 988 989 if (reloc_root->commit_root != reloc_root->node) { 990 btrfs_set_root_node(root_item, reloc_root->node); 991 free_extent_buffer(reloc_root->commit_root); 992 reloc_root->commit_root = btrfs_root_node(reloc_root); 993 } 994 995 ret = btrfs_update_root(trans, root->fs_info->tree_root, 996 &reloc_root->root_key, root_item); 997 BUG_ON(ret); 998 return 0; 999 } 1000 1001 /* 1002 * helper to find first cached inode with inode number >= objectid 1003 * in a subvolume 1004 */ 1005 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 1006 { 1007 struct rb_node *node; 1008 struct rb_node *prev; 1009 struct btrfs_inode *entry; 1010 struct inode *inode; 1011 1012 spin_lock(&root->inode_lock); 1013 again: 1014 node = root->inode_tree.rb_node; 1015 prev = NULL; 1016 while (node) { 1017 prev = node; 1018 entry = rb_entry(node, struct btrfs_inode, rb_node); 1019 1020 if (objectid < entry->vfs_inode.i_ino) 1021 node = node->rb_left; 1022 else if (objectid > entry->vfs_inode.i_ino) 1023 node = node->rb_right; 1024 else 1025 break; 1026 } 1027 if (!node) { 1028 while (prev) { 1029 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1030 if (objectid <= entry->vfs_inode.i_ino) { 1031 node = prev; 1032 break; 1033 } 1034 prev = rb_next(prev); 1035 } 1036 } 1037 while (node) { 1038 entry = rb_entry(node, struct btrfs_inode, rb_node); 1039 inode = igrab(&entry->vfs_inode); 1040 if (inode) { 1041 spin_unlock(&root->inode_lock); 1042 return inode; 1043 } 1044 1045 objectid = entry->vfs_inode.i_ino + 1; 1046 if (cond_resched_lock(&root->inode_lock)) 1047 goto again; 1048 1049 node = rb_next(node); 1050 } 1051 spin_unlock(&root->inode_lock); 1052 return NULL; 1053 } 1054 1055 static int in_block_group(u64 bytenr, 1056 struct btrfs_block_group_cache *block_group) 1057 { 1058 if (bytenr >= block_group->key.objectid && 1059 bytenr < block_group->key.objectid + block_group->key.offset) 1060 return 1; 1061 return 0; 1062 } 1063 1064 /* 1065 * get new location of data 1066 */ 1067 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1068 u64 bytenr, u64 num_bytes) 1069 { 1070 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1071 struct btrfs_path *path; 1072 struct btrfs_file_extent_item *fi; 1073 struct extent_buffer *leaf; 1074 int ret; 1075 1076 path = btrfs_alloc_path(); 1077 if (!path) 1078 return -ENOMEM; 1079 1080 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1081 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1082 bytenr, 0); 1083 if (ret < 0) 1084 goto out; 1085 if (ret > 0) { 1086 ret = -ENOENT; 1087 goto out; 1088 } 1089 1090 leaf = path->nodes[0]; 1091 fi = btrfs_item_ptr(leaf, path->slots[0], 1092 struct btrfs_file_extent_item); 1093 1094 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1095 btrfs_file_extent_compression(leaf, fi) || 1096 btrfs_file_extent_encryption(leaf, fi) || 1097 btrfs_file_extent_other_encoding(leaf, fi)); 1098 1099 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1100 ret = 1; 1101 goto out; 1102 } 1103 1104 if (new_bytenr) 1105 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1106 ret = 0; 1107 out: 1108 btrfs_free_path(path); 1109 return ret; 1110 } 1111 1112 /* 1113 * update file extent items in the tree leaf to point to 1114 * the new locations. 1115 */ 1116 static int replace_file_extents(struct btrfs_trans_handle *trans, 1117 struct reloc_control *rc, 1118 struct btrfs_root *root, 1119 struct extent_buffer *leaf, 1120 struct list_head *inode_list) 1121 { 1122 struct btrfs_key key; 1123 struct btrfs_file_extent_item *fi; 1124 struct inode *inode = NULL; 1125 struct inodevec *ivec = NULL; 1126 u64 parent; 1127 u64 bytenr; 1128 u64 new_bytenr; 1129 u64 num_bytes; 1130 u64 end; 1131 u32 nritems; 1132 u32 i; 1133 int ret; 1134 int first = 1; 1135 int dirty = 0; 1136 1137 if (rc->stage != UPDATE_DATA_PTRS) 1138 return 0; 1139 1140 /* reloc trees always use full backref */ 1141 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1142 parent = leaf->start; 1143 else 1144 parent = 0; 1145 1146 nritems = btrfs_header_nritems(leaf); 1147 for (i = 0; i < nritems; i++) { 1148 cond_resched(); 1149 btrfs_item_key_to_cpu(leaf, &key, i); 1150 if (key.type != BTRFS_EXTENT_DATA_KEY) 1151 continue; 1152 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1153 if (btrfs_file_extent_type(leaf, fi) == 1154 BTRFS_FILE_EXTENT_INLINE) 1155 continue; 1156 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1157 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1158 if (bytenr == 0) 1159 continue; 1160 if (!in_block_group(bytenr, rc->block_group)) 1161 continue; 1162 1163 /* 1164 * if we are modifying block in fs tree, wait for readpage 1165 * to complete and drop the extent cache 1166 */ 1167 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1168 if (!ivec || ivec->nr == INODEVEC_SIZE) { 1169 ivec = kmalloc(sizeof(*ivec), GFP_NOFS); 1170 BUG_ON(!ivec); 1171 ivec->nr = 0; 1172 list_add_tail(&ivec->list, inode_list); 1173 } 1174 if (first) { 1175 inode = find_next_inode(root, key.objectid); 1176 if (inode) 1177 ivec->inode[ivec->nr++] = inode; 1178 first = 0; 1179 } else if (inode && inode->i_ino < key.objectid) { 1180 inode = find_next_inode(root, key.objectid); 1181 if (inode) 1182 ivec->inode[ivec->nr++] = inode; 1183 } 1184 if (inode && inode->i_ino == key.objectid) { 1185 end = key.offset + 1186 btrfs_file_extent_num_bytes(leaf, fi); 1187 WARN_ON(!IS_ALIGNED(key.offset, 1188 root->sectorsize)); 1189 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1190 end--; 1191 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1192 key.offset, end, 1193 GFP_NOFS); 1194 if (!ret) 1195 continue; 1196 1197 btrfs_drop_extent_cache(inode, key.offset, end, 1198 1); 1199 unlock_extent(&BTRFS_I(inode)->io_tree, 1200 key.offset, end, GFP_NOFS); 1201 } 1202 } 1203 1204 ret = get_new_location(rc->data_inode, &new_bytenr, 1205 bytenr, num_bytes); 1206 if (ret > 0) 1207 continue; 1208 BUG_ON(ret < 0); 1209 1210 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1211 dirty = 1; 1212 1213 key.offset -= btrfs_file_extent_offset(leaf, fi); 1214 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1215 num_bytes, parent, 1216 btrfs_header_owner(leaf), 1217 key.objectid, key.offset); 1218 BUG_ON(ret); 1219 1220 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1221 parent, btrfs_header_owner(leaf), 1222 key.objectid, key.offset); 1223 BUG_ON(ret); 1224 } 1225 if (dirty) 1226 btrfs_mark_buffer_dirty(leaf); 1227 return 0; 1228 } 1229 1230 static noinline_for_stack 1231 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1232 struct btrfs_path *path, int level) 1233 { 1234 struct btrfs_disk_key key1; 1235 struct btrfs_disk_key key2; 1236 btrfs_node_key(eb, &key1, slot); 1237 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1238 return memcmp(&key1, &key2, sizeof(key1)); 1239 } 1240 1241 /* 1242 * try to replace tree blocks in fs tree with the new blocks 1243 * in reloc tree. tree blocks haven't been modified since the 1244 * reloc tree was create can be replaced. 1245 * 1246 * if a block was replaced, level of the block + 1 is returned. 1247 * if no block got replaced, 0 is returned. if there are other 1248 * errors, a negative error number is returned. 1249 */ 1250 static int replace_path(struct btrfs_trans_handle *trans, 1251 struct btrfs_root *dest, struct btrfs_root *src, 1252 struct btrfs_path *path, struct btrfs_key *next_key, 1253 struct extent_buffer **leaf, 1254 int lowest_level, int max_level) 1255 { 1256 struct extent_buffer *eb; 1257 struct extent_buffer *parent; 1258 struct btrfs_key key; 1259 u64 old_bytenr; 1260 u64 new_bytenr; 1261 u64 old_ptr_gen; 1262 u64 new_ptr_gen; 1263 u64 last_snapshot; 1264 u32 blocksize; 1265 int level; 1266 int ret; 1267 int slot; 1268 1269 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1270 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1271 BUG_ON(lowest_level > 1 && leaf); 1272 1273 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1274 1275 slot = path->slots[lowest_level]; 1276 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1277 1278 eb = btrfs_lock_root_node(dest); 1279 btrfs_set_lock_blocking(eb); 1280 level = btrfs_header_level(eb); 1281 1282 if (level < lowest_level) { 1283 btrfs_tree_unlock(eb); 1284 free_extent_buffer(eb); 1285 return 0; 1286 } 1287 1288 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1289 BUG_ON(ret); 1290 btrfs_set_lock_blocking(eb); 1291 1292 if (next_key) { 1293 next_key->objectid = (u64)-1; 1294 next_key->type = (u8)-1; 1295 next_key->offset = (u64)-1; 1296 } 1297 1298 parent = eb; 1299 while (1) { 1300 level = btrfs_header_level(parent); 1301 BUG_ON(level < lowest_level); 1302 1303 ret = btrfs_bin_search(parent, &key, level, &slot); 1304 if (ret && slot > 0) 1305 slot--; 1306 1307 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1308 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1309 1310 old_bytenr = btrfs_node_blockptr(parent, slot); 1311 blocksize = btrfs_level_size(dest, level - 1); 1312 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1313 1314 if (level <= max_level) { 1315 eb = path->nodes[level]; 1316 new_bytenr = btrfs_node_blockptr(eb, 1317 path->slots[level]); 1318 new_ptr_gen = btrfs_node_ptr_generation(eb, 1319 path->slots[level]); 1320 } else { 1321 new_bytenr = 0; 1322 new_ptr_gen = 0; 1323 } 1324 1325 if (new_bytenr > 0 && new_bytenr == old_bytenr) { 1326 WARN_ON(1); 1327 ret = level; 1328 break; 1329 } 1330 1331 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1332 memcmp_node_keys(parent, slot, path, level)) { 1333 if (level <= lowest_level && !leaf) { 1334 ret = 0; 1335 break; 1336 } 1337 1338 eb = read_tree_block(dest, old_bytenr, blocksize, 1339 old_ptr_gen); 1340 btrfs_tree_lock(eb); 1341 ret = btrfs_cow_block(trans, dest, eb, parent, 1342 slot, &eb); 1343 BUG_ON(ret); 1344 btrfs_set_lock_blocking(eb); 1345 1346 if (level <= lowest_level) { 1347 *leaf = eb; 1348 ret = 0; 1349 break; 1350 } 1351 1352 btrfs_tree_unlock(parent); 1353 free_extent_buffer(parent); 1354 1355 parent = eb; 1356 continue; 1357 } 1358 1359 btrfs_node_key_to_cpu(path->nodes[level], &key, 1360 path->slots[level]); 1361 btrfs_release_path(src, path); 1362 1363 path->lowest_level = level; 1364 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1365 path->lowest_level = 0; 1366 BUG_ON(ret); 1367 1368 /* 1369 * swap blocks in fs tree and reloc tree. 1370 */ 1371 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1372 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1373 btrfs_mark_buffer_dirty(parent); 1374 1375 btrfs_set_node_blockptr(path->nodes[level], 1376 path->slots[level], old_bytenr); 1377 btrfs_set_node_ptr_generation(path->nodes[level], 1378 path->slots[level], old_ptr_gen); 1379 btrfs_mark_buffer_dirty(path->nodes[level]); 1380 1381 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, 1382 path->nodes[level]->start, 1383 src->root_key.objectid, level - 1, 0); 1384 BUG_ON(ret); 1385 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 1386 0, dest->root_key.objectid, level - 1, 1387 0); 1388 BUG_ON(ret); 1389 1390 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1391 path->nodes[level]->start, 1392 src->root_key.objectid, level - 1, 0); 1393 BUG_ON(ret); 1394 1395 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1396 0, dest->root_key.objectid, level - 1, 1397 0); 1398 BUG_ON(ret); 1399 1400 btrfs_unlock_up_safe(path, 0); 1401 1402 ret = level; 1403 break; 1404 } 1405 btrfs_tree_unlock(parent); 1406 free_extent_buffer(parent); 1407 return ret; 1408 } 1409 1410 /* 1411 * helper to find next relocated block in reloc tree 1412 */ 1413 static noinline_for_stack 1414 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1415 int *level) 1416 { 1417 struct extent_buffer *eb; 1418 int i; 1419 u64 last_snapshot; 1420 u32 nritems; 1421 1422 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1423 1424 for (i = 0; i < *level; i++) { 1425 free_extent_buffer(path->nodes[i]); 1426 path->nodes[i] = NULL; 1427 } 1428 1429 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1430 eb = path->nodes[i]; 1431 nritems = btrfs_header_nritems(eb); 1432 while (path->slots[i] + 1 < nritems) { 1433 path->slots[i]++; 1434 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1435 last_snapshot) 1436 continue; 1437 1438 *level = i; 1439 return 0; 1440 } 1441 free_extent_buffer(path->nodes[i]); 1442 path->nodes[i] = NULL; 1443 } 1444 return 1; 1445 } 1446 1447 /* 1448 * walk down reloc tree to find relocated block of lowest level 1449 */ 1450 static noinline_for_stack 1451 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1452 int *level) 1453 { 1454 struct extent_buffer *eb = NULL; 1455 int i; 1456 u64 bytenr; 1457 u64 ptr_gen = 0; 1458 u64 last_snapshot; 1459 u32 blocksize; 1460 u32 nritems; 1461 1462 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1463 1464 for (i = *level; i > 0; i--) { 1465 eb = path->nodes[i]; 1466 nritems = btrfs_header_nritems(eb); 1467 while (path->slots[i] < nritems) { 1468 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1469 if (ptr_gen > last_snapshot) 1470 break; 1471 path->slots[i]++; 1472 } 1473 if (path->slots[i] >= nritems) { 1474 if (i == *level) 1475 break; 1476 *level = i + 1; 1477 return 0; 1478 } 1479 if (i == 1) { 1480 *level = i; 1481 return 0; 1482 } 1483 1484 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1485 blocksize = btrfs_level_size(root, i - 1); 1486 eb = read_tree_block(root, bytenr, blocksize, ptr_gen); 1487 BUG_ON(btrfs_header_level(eb) != i - 1); 1488 path->nodes[i - 1] = eb; 1489 path->slots[i - 1] = 0; 1490 } 1491 return 1; 1492 } 1493 1494 /* 1495 * invalidate extent cache for file extents whose key in range of 1496 * [min_key, max_key) 1497 */ 1498 static int invalidate_extent_cache(struct btrfs_root *root, 1499 struct btrfs_key *min_key, 1500 struct btrfs_key *max_key) 1501 { 1502 struct inode *inode = NULL; 1503 u64 objectid; 1504 u64 start, end; 1505 1506 objectid = min_key->objectid; 1507 while (1) { 1508 cond_resched(); 1509 iput(inode); 1510 1511 if (objectid > max_key->objectid) 1512 break; 1513 1514 inode = find_next_inode(root, objectid); 1515 if (!inode) 1516 break; 1517 1518 if (inode->i_ino > max_key->objectid) { 1519 iput(inode); 1520 break; 1521 } 1522 1523 objectid = inode->i_ino + 1; 1524 if (!S_ISREG(inode->i_mode)) 1525 continue; 1526 1527 if (unlikely(min_key->objectid == inode->i_ino)) { 1528 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1529 continue; 1530 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1531 start = 0; 1532 else { 1533 start = min_key->offset; 1534 WARN_ON(!IS_ALIGNED(start, root->sectorsize)); 1535 } 1536 } else { 1537 start = 0; 1538 } 1539 1540 if (unlikely(max_key->objectid == inode->i_ino)) { 1541 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1542 continue; 1543 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1544 end = (u64)-1; 1545 } else { 1546 if (max_key->offset == 0) 1547 continue; 1548 end = max_key->offset; 1549 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1550 end--; 1551 } 1552 } else { 1553 end = (u64)-1; 1554 } 1555 1556 /* the lock_extent waits for readpage to complete */ 1557 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1558 btrfs_drop_extent_cache(inode, start, end, 1); 1559 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1560 } 1561 return 0; 1562 } 1563 1564 static void put_inodes(struct list_head *list) 1565 { 1566 struct inodevec *ivec; 1567 while (!list_empty(list)) { 1568 ivec = list_entry(list->next, struct inodevec, list); 1569 list_del(&ivec->list); 1570 while (ivec->nr > 0) { 1571 ivec->nr--; 1572 iput(ivec->inode[ivec->nr]); 1573 } 1574 kfree(ivec); 1575 } 1576 } 1577 1578 static int find_next_key(struct btrfs_path *path, int level, 1579 struct btrfs_key *key) 1580 1581 { 1582 while (level < BTRFS_MAX_LEVEL) { 1583 if (!path->nodes[level]) 1584 break; 1585 if (path->slots[level] + 1 < 1586 btrfs_header_nritems(path->nodes[level])) { 1587 btrfs_node_key_to_cpu(path->nodes[level], key, 1588 path->slots[level] + 1); 1589 return 0; 1590 } 1591 level++; 1592 } 1593 return 1; 1594 } 1595 1596 /* 1597 * merge the relocated tree blocks in reloc tree with corresponding 1598 * fs tree. 1599 */ 1600 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 1601 struct btrfs_root *root) 1602 { 1603 LIST_HEAD(inode_list); 1604 struct btrfs_key key; 1605 struct btrfs_key next_key; 1606 struct btrfs_trans_handle *trans; 1607 struct btrfs_root *reloc_root; 1608 struct btrfs_root_item *root_item; 1609 struct btrfs_path *path; 1610 struct extent_buffer *leaf = NULL; 1611 unsigned long nr; 1612 int level; 1613 int max_level; 1614 int replaced = 0; 1615 int ret; 1616 int err = 0; 1617 1618 path = btrfs_alloc_path(); 1619 if (!path) 1620 return -ENOMEM; 1621 1622 reloc_root = root->reloc_root; 1623 root_item = &reloc_root->root_item; 1624 1625 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 1626 level = btrfs_root_level(root_item); 1627 extent_buffer_get(reloc_root->node); 1628 path->nodes[level] = reloc_root->node; 1629 path->slots[level] = 0; 1630 } else { 1631 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 1632 1633 level = root_item->drop_level; 1634 BUG_ON(level == 0); 1635 path->lowest_level = level; 1636 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 1637 path->lowest_level = 0; 1638 if (ret < 0) { 1639 btrfs_free_path(path); 1640 return ret; 1641 } 1642 1643 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 1644 path->slots[level]); 1645 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 1646 1647 btrfs_unlock_up_safe(path, 0); 1648 } 1649 1650 if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { 1651 trans = btrfs_start_transaction(root, 1); 1652 1653 leaf = path->nodes[0]; 1654 btrfs_item_key_to_cpu(leaf, &key, 0); 1655 btrfs_release_path(reloc_root, path); 1656 1657 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1658 if (ret < 0) { 1659 err = ret; 1660 goto out; 1661 } 1662 1663 leaf = path->nodes[0]; 1664 btrfs_unlock_up_safe(path, 1); 1665 ret = replace_file_extents(trans, rc, root, leaf, 1666 &inode_list); 1667 if (ret < 0) 1668 err = ret; 1669 goto out; 1670 } 1671 1672 memset(&next_key, 0, sizeof(next_key)); 1673 1674 while (1) { 1675 leaf = NULL; 1676 replaced = 0; 1677 trans = btrfs_start_transaction(root, 1); 1678 max_level = level; 1679 1680 ret = walk_down_reloc_tree(reloc_root, path, &level); 1681 if (ret < 0) { 1682 err = ret; 1683 goto out; 1684 } 1685 if (ret > 0) 1686 break; 1687 1688 if (!find_next_key(path, level, &key) && 1689 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 1690 ret = 0; 1691 } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { 1692 ret = replace_path(trans, root, reloc_root, 1693 path, &next_key, &leaf, 1694 level, max_level); 1695 } else { 1696 ret = replace_path(trans, root, reloc_root, 1697 path, &next_key, NULL, 1698 level, max_level); 1699 } 1700 if (ret < 0) { 1701 err = ret; 1702 goto out; 1703 } 1704 1705 if (ret > 0) { 1706 level = ret; 1707 btrfs_node_key_to_cpu(path->nodes[level], &key, 1708 path->slots[level]); 1709 replaced = 1; 1710 } else if (leaf) { 1711 /* 1712 * no block got replaced, try replacing file extents 1713 */ 1714 btrfs_item_key_to_cpu(leaf, &key, 0); 1715 ret = replace_file_extents(trans, rc, root, leaf, 1716 &inode_list); 1717 btrfs_tree_unlock(leaf); 1718 free_extent_buffer(leaf); 1719 BUG_ON(ret < 0); 1720 } 1721 1722 ret = walk_up_reloc_tree(reloc_root, path, &level); 1723 if (ret > 0) 1724 break; 1725 1726 BUG_ON(level == 0); 1727 /* 1728 * save the merging progress in the drop_progress. 1729 * this is OK since root refs == 1 in this case. 1730 */ 1731 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 1732 path->slots[level]); 1733 root_item->drop_level = level; 1734 1735 nr = trans->blocks_used; 1736 btrfs_end_transaction(trans, root); 1737 1738 btrfs_btree_balance_dirty(root, nr); 1739 1740 /* 1741 * put inodes outside transaction, otherwise we may deadlock. 1742 */ 1743 put_inodes(&inode_list); 1744 1745 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1746 invalidate_extent_cache(root, &key, &next_key); 1747 } 1748 1749 /* 1750 * handle the case only one block in the fs tree need to be 1751 * relocated and the block is tree root. 1752 */ 1753 leaf = btrfs_lock_root_node(root); 1754 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 1755 btrfs_tree_unlock(leaf); 1756 free_extent_buffer(leaf); 1757 if (ret < 0) 1758 err = ret; 1759 out: 1760 btrfs_free_path(path); 1761 1762 if (err == 0) { 1763 memset(&root_item->drop_progress, 0, 1764 sizeof(root_item->drop_progress)); 1765 root_item->drop_level = 0; 1766 btrfs_set_root_refs(root_item, 0); 1767 } 1768 1769 nr = trans->blocks_used; 1770 btrfs_end_transaction(trans, root); 1771 1772 btrfs_btree_balance_dirty(root, nr); 1773 1774 put_inodes(&inode_list); 1775 1776 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1777 invalidate_extent_cache(root, &key, &next_key); 1778 1779 return err; 1780 } 1781 1782 /* 1783 * callback for the work threads. 1784 * this function merges reloc tree with corresponding fs tree, 1785 * and then drops the reloc tree. 1786 */ 1787 static void merge_func(struct btrfs_work *work) 1788 { 1789 struct btrfs_trans_handle *trans; 1790 struct btrfs_root *root; 1791 struct btrfs_root *reloc_root; 1792 struct async_merge *async; 1793 1794 async = container_of(work, struct async_merge, work); 1795 reloc_root = async->root; 1796 1797 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 1798 root = read_fs_root(reloc_root->fs_info, 1799 reloc_root->root_key.offset); 1800 BUG_ON(IS_ERR(root)); 1801 BUG_ON(root->reloc_root != reloc_root); 1802 1803 merge_reloc_root(async->rc, root); 1804 1805 trans = btrfs_start_transaction(root, 1); 1806 btrfs_update_reloc_root(trans, root); 1807 btrfs_end_transaction(trans, root); 1808 } 1809 1810 btrfs_drop_snapshot(reloc_root, 0); 1811 1812 if (atomic_dec_and_test(async->num_pending)) 1813 complete(async->done); 1814 1815 kfree(async); 1816 } 1817 1818 static int merge_reloc_roots(struct reloc_control *rc) 1819 { 1820 struct async_merge *async; 1821 struct btrfs_root *root; 1822 struct completion done; 1823 atomic_t num_pending; 1824 1825 init_completion(&done); 1826 atomic_set(&num_pending, 1); 1827 1828 while (!list_empty(&rc->reloc_roots)) { 1829 root = list_entry(rc->reloc_roots.next, 1830 struct btrfs_root, root_list); 1831 list_del_init(&root->root_list); 1832 1833 async = kmalloc(sizeof(*async), GFP_NOFS); 1834 BUG_ON(!async); 1835 async->work.func = merge_func; 1836 async->work.flags = 0; 1837 async->rc = rc; 1838 async->root = root; 1839 async->done = &done; 1840 async->num_pending = &num_pending; 1841 atomic_inc(&num_pending); 1842 btrfs_queue_worker(&rc->workers, &async->work); 1843 } 1844 1845 if (!atomic_dec_and_test(&num_pending)) 1846 wait_for_completion(&done); 1847 1848 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 1849 return 0; 1850 } 1851 1852 static void free_block_list(struct rb_root *blocks) 1853 { 1854 struct tree_block *block; 1855 struct rb_node *rb_node; 1856 while ((rb_node = rb_first(blocks))) { 1857 block = rb_entry(rb_node, struct tree_block, rb_node); 1858 rb_erase(rb_node, blocks); 1859 kfree(block); 1860 } 1861 } 1862 1863 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 1864 struct btrfs_root *reloc_root) 1865 { 1866 struct btrfs_root *root; 1867 1868 if (reloc_root->last_trans == trans->transid) 1869 return 0; 1870 1871 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); 1872 BUG_ON(IS_ERR(root)); 1873 BUG_ON(root->reloc_root != reloc_root); 1874 1875 return btrfs_record_root_in_trans(trans, root); 1876 } 1877 1878 /* 1879 * select one tree from trees that references the block. 1880 * for blocks in refernce counted trees, we preper reloc tree. 1881 * if no reloc tree found and reloc_only is true, NULL is returned. 1882 */ 1883 static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, 1884 struct backref_node *node, 1885 struct backref_edge *edges[], 1886 int *nr, int reloc_only) 1887 { 1888 struct backref_node *next; 1889 struct btrfs_root *root; 1890 int index; 1891 int loop = 0; 1892 again: 1893 index = 0; 1894 next = node; 1895 while (1) { 1896 cond_resched(); 1897 next = walk_up_backref(next, edges, &index); 1898 root = next->root; 1899 if (!root) { 1900 BUG_ON(!node->old_root); 1901 goto skip; 1902 } 1903 1904 /* no other choice for non-refernce counted tree */ 1905 if (!root->ref_cows) { 1906 BUG_ON(reloc_only); 1907 break; 1908 } 1909 1910 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 1911 record_reloc_root_in_trans(trans, root); 1912 break; 1913 } 1914 1915 if (loop) { 1916 btrfs_record_root_in_trans(trans, root); 1917 break; 1918 } 1919 1920 if (reloc_only || next != node) { 1921 if (!root->reloc_root) 1922 btrfs_record_root_in_trans(trans, root); 1923 root = root->reloc_root; 1924 /* 1925 * if the reloc tree was created in current 1926 * transation, there is no node in backref tree 1927 * corresponds to the root of the reloc tree. 1928 */ 1929 if (btrfs_root_last_snapshot(&root->root_item) == 1930 trans->transid - 1) 1931 break; 1932 } 1933 skip: 1934 root = NULL; 1935 next = walk_down_backref(edges, &index); 1936 if (!next || next->level <= node->level) 1937 break; 1938 } 1939 1940 if (!root && !loop && !reloc_only) { 1941 loop = 1; 1942 goto again; 1943 } 1944 1945 if (root) 1946 *nr = index; 1947 else 1948 *nr = 0; 1949 1950 return root; 1951 } 1952 1953 static noinline_for_stack 1954 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, 1955 struct backref_node *node) 1956 { 1957 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 1958 int nr; 1959 return __select_one_root(trans, node, edges, &nr, 0); 1960 } 1961 1962 static noinline_for_stack 1963 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 1964 struct backref_node *node, 1965 struct backref_edge *edges[], int *nr) 1966 { 1967 return __select_one_root(trans, node, edges, nr, 1); 1968 } 1969 1970 static void grab_path_buffers(struct btrfs_path *path, 1971 struct backref_node *node, 1972 struct backref_edge *edges[], int nr) 1973 { 1974 int i = 0; 1975 while (1) { 1976 drop_node_buffer(node); 1977 node->eb = path->nodes[node->level]; 1978 BUG_ON(!node->eb); 1979 if (path->locks[node->level]) 1980 node->locked = 1; 1981 path->nodes[node->level] = NULL; 1982 path->locks[node->level] = 0; 1983 1984 if (i >= nr) 1985 break; 1986 1987 edges[i]->blockptr = node->eb->start; 1988 node = edges[i]->node[UPPER]; 1989 i++; 1990 } 1991 } 1992 1993 /* 1994 * relocate a block tree, and then update pointers in upper level 1995 * blocks that reference the block to point to the new location. 1996 * 1997 * if called by link_to_upper, the block has already been relocated. 1998 * in that case this function just updates pointers. 1999 */ 2000 static int do_relocation(struct btrfs_trans_handle *trans, 2001 struct backref_node *node, 2002 struct btrfs_key *key, 2003 struct btrfs_path *path, int lowest) 2004 { 2005 struct backref_node *upper; 2006 struct backref_edge *edge; 2007 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2008 struct btrfs_root *root; 2009 struct extent_buffer *eb; 2010 u32 blocksize; 2011 u64 bytenr; 2012 u64 generation; 2013 int nr; 2014 int slot; 2015 int ret; 2016 int err = 0; 2017 2018 BUG_ON(lowest && node->eb); 2019 2020 path->lowest_level = node->level + 1; 2021 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2022 cond_resched(); 2023 if (node->eb && node->eb->start == edge->blockptr) 2024 continue; 2025 2026 upper = edge->node[UPPER]; 2027 root = select_reloc_root(trans, upper, edges, &nr); 2028 if (!root) 2029 continue; 2030 2031 if (upper->eb && !upper->locked) 2032 drop_node_buffer(upper); 2033 2034 if (!upper->eb) { 2035 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2036 if (ret < 0) { 2037 err = ret; 2038 break; 2039 } 2040 BUG_ON(ret > 0); 2041 2042 slot = path->slots[upper->level]; 2043 2044 btrfs_unlock_up_safe(path, upper->level + 1); 2045 grab_path_buffers(path, upper, edges, nr); 2046 2047 btrfs_release_path(NULL, path); 2048 } else { 2049 ret = btrfs_bin_search(upper->eb, key, upper->level, 2050 &slot); 2051 BUG_ON(ret); 2052 } 2053 2054 bytenr = btrfs_node_blockptr(upper->eb, slot); 2055 if (!lowest) { 2056 if (node->eb->start == bytenr) { 2057 btrfs_tree_unlock(upper->eb); 2058 upper->locked = 0; 2059 continue; 2060 } 2061 } else { 2062 BUG_ON(node->bytenr != bytenr); 2063 } 2064 2065 blocksize = btrfs_level_size(root, node->level); 2066 generation = btrfs_node_ptr_generation(upper->eb, slot); 2067 eb = read_tree_block(root, bytenr, blocksize, generation); 2068 btrfs_tree_lock(eb); 2069 btrfs_set_lock_blocking(eb); 2070 2071 if (!node->eb) { 2072 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2073 slot, &eb); 2074 if (ret < 0) { 2075 err = ret; 2076 break; 2077 } 2078 btrfs_set_lock_blocking(eb); 2079 node->eb = eb; 2080 node->locked = 1; 2081 } else { 2082 btrfs_set_node_blockptr(upper->eb, slot, 2083 node->eb->start); 2084 btrfs_set_node_ptr_generation(upper->eb, slot, 2085 trans->transid); 2086 btrfs_mark_buffer_dirty(upper->eb); 2087 2088 ret = btrfs_inc_extent_ref(trans, root, 2089 node->eb->start, blocksize, 2090 upper->eb->start, 2091 btrfs_header_owner(upper->eb), 2092 node->level, 0); 2093 BUG_ON(ret); 2094 2095 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2096 BUG_ON(ret); 2097 } 2098 if (!lowest) { 2099 btrfs_tree_unlock(upper->eb); 2100 upper->locked = 0; 2101 } 2102 } 2103 path->lowest_level = 0; 2104 return err; 2105 } 2106 2107 static int link_to_upper(struct btrfs_trans_handle *trans, 2108 struct backref_node *node, 2109 struct btrfs_path *path) 2110 { 2111 struct btrfs_key key; 2112 if (!node->eb || list_empty(&node->upper)) 2113 return 0; 2114 2115 btrfs_node_key_to_cpu(node->eb, &key, 0); 2116 return do_relocation(trans, node, &key, path, 0); 2117 } 2118 2119 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2120 struct backref_cache *cache, 2121 struct btrfs_path *path) 2122 { 2123 struct backref_node *node; 2124 int level; 2125 int ret; 2126 int err = 0; 2127 2128 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2129 while (!list_empty(&cache->pending[level])) { 2130 node = list_entry(cache->pending[level].next, 2131 struct backref_node, lower); 2132 BUG_ON(node->level != level); 2133 2134 ret = link_to_upper(trans, node, path); 2135 if (ret < 0) 2136 err = ret; 2137 /* 2138 * this remove the node from the pending list and 2139 * may add some other nodes to the level + 1 2140 * pending list 2141 */ 2142 remove_backref_node(cache, node); 2143 } 2144 } 2145 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); 2146 return err; 2147 } 2148 2149 static void mark_block_processed(struct reloc_control *rc, 2150 struct backref_node *node) 2151 { 2152 u32 blocksize; 2153 if (node->level == 0 || 2154 in_block_group(node->bytenr, rc->block_group)) { 2155 blocksize = btrfs_level_size(rc->extent_root, node->level); 2156 set_extent_bits(&rc->processed_blocks, node->bytenr, 2157 node->bytenr + blocksize - 1, EXTENT_DIRTY, 2158 GFP_NOFS); 2159 } 2160 node->processed = 1; 2161 } 2162 2163 /* 2164 * mark a block and all blocks directly/indirectly reference the block 2165 * as processed. 2166 */ 2167 static void update_processed_blocks(struct reloc_control *rc, 2168 struct backref_node *node) 2169 { 2170 struct backref_node *next = node; 2171 struct backref_edge *edge; 2172 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2173 int index = 0; 2174 2175 while (next) { 2176 cond_resched(); 2177 while (1) { 2178 if (next->processed) 2179 break; 2180 2181 mark_block_processed(rc, next); 2182 2183 if (list_empty(&next->upper)) 2184 break; 2185 2186 edge = list_entry(next->upper.next, 2187 struct backref_edge, list[LOWER]); 2188 edges[index++] = edge; 2189 next = edge->node[UPPER]; 2190 } 2191 next = walk_down_backref(edges, &index); 2192 } 2193 } 2194 2195 static int tree_block_processed(u64 bytenr, u32 blocksize, 2196 struct reloc_control *rc) 2197 { 2198 if (test_range_bit(&rc->processed_blocks, bytenr, 2199 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2200 return 1; 2201 return 0; 2202 } 2203 2204 /* 2205 * check if there are any file extent pointers in the leaf point to 2206 * data require processing 2207 */ 2208 static int check_file_extents(struct reloc_control *rc, 2209 u64 bytenr, u32 blocksize, u64 ptr_gen) 2210 { 2211 struct btrfs_key found_key; 2212 struct btrfs_file_extent_item *fi; 2213 struct extent_buffer *leaf; 2214 u32 nritems; 2215 int i; 2216 int ret = 0; 2217 2218 leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); 2219 2220 nritems = btrfs_header_nritems(leaf); 2221 for (i = 0; i < nritems; i++) { 2222 cond_resched(); 2223 btrfs_item_key_to_cpu(leaf, &found_key, i); 2224 if (found_key.type != BTRFS_EXTENT_DATA_KEY) 2225 continue; 2226 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 2227 if (btrfs_file_extent_type(leaf, fi) == 2228 BTRFS_FILE_EXTENT_INLINE) 2229 continue; 2230 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 2231 if (bytenr == 0) 2232 continue; 2233 if (in_block_group(bytenr, rc->block_group)) { 2234 ret = 1; 2235 break; 2236 } 2237 } 2238 free_extent_buffer(leaf); 2239 return ret; 2240 } 2241 2242 /* 2243 * scan child blocks of a given block to find blocks require processing 2244 */ 2245 static int add_child_blocks(struct btrfs_trans_handle *trans, 2246 struct reloc_control *rc, 2247 struct backref_node *node, 2248 struct rb_root *blocks) 2249 { 2250 struct tree_block *block; 2251 struct rb_node *rb_node; 2252 u64 bytenr; 2253 u64 ptr_gen; 2254 u32 blocksize; 2255 u32 nritems; 2256 int i; 2257 int err = 0; 2258 2259 nritems = btrfs_header_nritems(node->eb); 2260 blocksize = btrfs_level_size(rc->extent_root, node->level - 1); 2261 for (i = 0; i < nritems; i++) { 2262 cond_resched(); 2263 bytenr = btrfs_node_blockptr(node->eb, i); 2264 ptr_gen = btrfs_node_ptr_generation(node->eb, i); 2265 if (ptr_gen == trans->transid) 2266 continue; 2267 if (!in_block_group(bytenr, rc->block_group) && 2268 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) 2269 continue; 2270 if (tree_block_processed(bytenr, blocksize, rc)) 2271 continue; 2272 2273 readahead_tree_block(rc->extent_root, 2274 bytenr, blocksize, ptr_gen); 2275 } 2276 2277 for (i = 0; i < nritems; i++) { 2278 cond_resched(); 2279 bytenr = btrfs_node_blockptr(node->eb, i); 2280 ptr_gen = btrfs_node_ptr_generation(node->eb, i); 2281 if (ptr_gen == trans->transid) 2282 continue; 2283 if (!in_block_group(bytenr, rc->block_group) && 2284 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) 2285 continue; 2286 if (tree_block_processed(bytenr, blocksize, rc)) 2287 continue; 2288 if (!in_block_group(bytenr, rc->block_group) && 2289 !check_file_extents(rc, bytenr, blocksize, ptr_gen)) 2290 continue; 2291 2292 block = kmalloc(sizeof(*block), GFP_NOFS); 2293 if (!block) { 2294 err = -ENOMEM; 2295 break; 2296 } 2297 block->bytenr = bytenr; 2298 btrfs_node_key_to_cpu(node->eb, &block->key, i); 2299 block->level = node->level - 1; 2300 block->key_ready = 1; 2301 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 2302 BUG_ON(rb_node); 2303 } 2304 if (err) 2305 free_block_list(blocks); 2306 return err; 2307 } 2308 2309 /* 2310 * find adjacent blocks require processing 2311 */ 2312 static noinline_for_stack 2313 int add_adjacent_blocks(struct btrfs_trans_handle *trans, 2314 struct reloc_control *rc, 2315 struct backref_cache *cache, 2316 struct rb_root *blocks, int level, 2317 struct backref_node **upper) 2318 { 2319 struct backref_node *node; 2320 int ret = 0; 2321 2322 WARN_ON(!list_empty(&cache->pending[level])); 2323 2324 if (list_empty(&cache->pending[level + 1])) 2325 return 1; 2326 2327 node = list_entry(cache->pending[level + 1].next, 2328 struct backref_node, lower); 2329 if (node->eb) 2330 ret = add_child_blocks(trans, rc, node, blocks); 2331 2332 *upper = node; 2333 return ret; 2334 } 2335 2336 static int get_tree_block_key(struct reloc_control *rc, 2337 struct tree_block *block) 2338 { 2339 struct extent_buffer *eb; 2340 2341 BUG_ON(block->key_ready); 2342 eb = read_tree_block(rc->extent_root, block->bytenr, 2343 block->key.objectid, block->key.offset); 2344 WARN_ON(btrfs_header_level(eb) != block->level); 2345 if (block->level == 0) 2346 btrfs_item_key_to_cpu(eb, &block->key, 0); 2347 else 2348 btrfs_node_key_to_cpu(eb, &block->key, 0); 2349 free_extent_buffer(eb); 2350 block->key_ready = 1; 2351 return 0; 2352 } 2353 2354 static int reada_tree_block(struct reloc_control *rc, 2355 struct tree_block *block) 2356 { 2357 BUG_ON(block->key_ready); 2358 readahead_tree_block(rc->extent_root, block->bytenr, 2359 block->key.objectid, block->key.offset); 2360 return 0; 2361 } 2362 2363 /* 2364 * helper function to relocate a tree block 2365 */ 2366 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2367 struct reloc_control *rc, 2368 struct backref_node *node, 2369 struct btrfs_key *key, 2370 struct btrfs_path *path) 2371 { 2372 struct btrfs_root *root; 2373 int ret; 2374 2375 root = select_one_root(trans, node); 2376 if (unlikely(!root)) { 2377 rc->found_old_snapshot = 1; 2378 update_processed_blocks(rc, node); 2379 return 0; 2380 } 2381 2382 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2383 ret = do_relocation(trans, node, key, path, 1); 2384 if (ret < 0) 2385 goto out; 2386 if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { 2387 ret = replace_file_extents(trans, rc, root, 2388 node->eb, NULL); 2389 if (ret < 0) 2390 goto out; 2391 } 2392 drop_node_buffer(node); 2393 } else if (!root->ref_cows) { 2394 path->lowest_level = node->level; 2395 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2396 btrfs_release_path(root, path); 2397 if (ret < 0) 2398 goto out; 2399 } else if (root != node->root) { 2400 WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); 2401 } 2402 2403 update_processed_blocks(rc, node); 2404 ret = 0; 2405 out: 2406 drop_node_buffer(node); 2407 return ret; 2408 } 2409 2410 /* 2411 * relocate a list of blocks 2412 */ 2413 static noinline_for_stack 2414 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2415 struct reloc_control *rc, struct rb_root *blocks) 2416 { 2417 struct backref_cache *cache; 2418 struct backref_node *node; 2419 struct btrfs_path *path; 2420 struct tree_block *block; 2421 struct rb_node *rb_node; 2422 int level = -1; 2423 int ret; 2424 int err = 0; 2425 2426 path = btrfs_alloc_path(); 2427 if (!path) 2428 return -ENOMEM; 2429 2430 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2431 if (!cache) { 2432 btrfs_free_path(path); 2433 return -ENOMEM; 2434 } 2435 2436 backref_cache_init(cache); 2437 2438 rb_node = rb_first(blocks); 2439 while (rb_node) { 2440 block = rb_entry(rb_node, struct tree_block, rb_node); 2441 if (level == -1) 2442 level = block->level; 2443 else 2444 BUG_ON(level != block->level); 2445 if (!block->key_ready) 2446 reada_tree_block(rc, block); 2447 rb_node = rb_next(rb_node); 2448 } 2449 2450 rb_node = rb_first(blocks); 2451 while (rb_node) { 2452 block = rb_entry(rb_node, struct tree_block, rb_node); 2453 if (!block->key_ready) 2454 get_tree_block_key(rc, block); 2455 rb_node = rb_next(rb_node); 2456 } 2457 2458 rb_node = rb_first(blocks); 2459 while (rb_node) { 2460 block = rb_entry(rb_node, struct tree_block, rb_node); 2461 2462 node = build_backref_tree(rc, cache, &block->key, 2463 block->level, block->bytenr); 2464 if (IS_ERR(node)) { 2465 err = PTR_ERR(node); 2466 goto out; 2467 } 2468 2469 ret = relocate_tree_block(trans, rc, node, &block->key, 2470 path); 2471 if (ret < 0) { 2472 err = ret; 2473 goto out; 2474 } 2475 remove_backref_node(cache, node); 2476 rb_node = rb_next(rb_node); 2477 } 2478 2479 if (level > 0) 2480 goto out; 2481 2482 free_block_list(blocks); 2483 2484 /* 2485 * now backrefs of some upper level tree blocks have been cached, 2486 * try relocating blocks referenced by these upper level blocks. 2487 */ 2488 while (1) { 2489 struct backref_node *upper = NULL; 2490 if (trans->transaction->in_commit || 2491 trans->transaction->delayed_refs.flushing) 2492 break; 2493 2494 ret = add_adjacent_blocks(trans, rc, cache, blocks, level, 2495 &upper); 2496 if (ret < 0) 2497 err = ret; 2498 if (ret != 0) 2499 break; 2500 2501 rb_node = rb_first(blocks); 2502 while (rb_node) { 2503 block = rb_entry(rb_node, struct tree_block, rb_node); 2504 if (trans->transaction->in_commit || 2505 trans->transaction->delayed_refs.flushing) 2506 goto out; 2507 BUG_ON(!block->key_ready); 2508 node = build_backref_tree(rc, cache, &block->key, 2509 level, block->bytenr); 2510 if (IS_ERR(node)) { 2511 err = PTR_ERR(node); 2512 goto out; 2513 } 2514 2515 ret = relocate_tree_block(trans, rc, node, 2516 &block->key, path); 2517 if (ret < 0) { 2518 err = ret; 2519 goto out; 2520 } 2521 remove_backref_node(cache, node); 2522 rb_node = rb_next(rb_node); 2523 } 2524 free_block_list(blocks); 2525 2526 if (upper) { 2527 ret = link_to_upper(trans, upper, path); 2528 if (ret < 0) { 2529 err = ret; 2530 break; 2531 } 2532 remove_backref_node(cache, upper); 2533 } 2534 } 2535 out: 2536 free_block_list(blocks); 2537 2538 ret = finish_pending_nodes(trans, cache, path); 2539 if (ret < 0) 2540 err = ret; 2541 2542 kfree(cache); 2543 btrfs_free_path(path); 2544 return err; 2545 } 2546 2547 static noinline_for_stack 2548 int setup_extent_mapping(struct inode *inode, u64 start, u64 end, 2549 u64 block_start) 2550 { 2551 struct btrfs_root *root = BTRFS_I(inode)->root; 2552 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2553 struct extent_map *em; 2554 int ret = 0; 2555 2556 em = alloc_extent_map(GFP_NOFS); 2557 if (!em) 2558 return -ENOMEM; 2559 2560 em->start = start; 2561 em->len = end + 1 - start; 2562 em->block_len = em->len; 2563 em->block_start = block_start; 2564 em->bdev = root->fs_info->fs_devices->latest_bdev; 2565 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2566 2567 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2568 while (1) { 2569 write_lock(&em_tree->lock); 2570 ret = add_extent_mapping(em_tree, em); 2571 write_unlock(&em_tree->lock); 2572 if (ret != -EEXIST) { 2573 free_extent_map(em); 2574 break; 2575 } 2576 btrfs_drop_extent_cache(inode, start, end, 0); 2577 } 2578 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2579 return ret; 2580 } 2581 2582 static int relocate_file_extent_cluster(struct inode *inode, 2583 struct file_extent_cluster *cluster) 2584 { 2585 u64 page_start; 2586 u64 page_end; 2587 u64 offset = BTRFS_I(inode)->index_cnt; 2588 unsigned long index; 2589 unsigned long last_index; 2590 unsigned int dirty_page = 0; 2591 struct page *page; 2592 struct file_ra_state *ra; 2593 int nr = 0; 2594 int ret = 0; 2595 2596 if (!cluster->nr) 2597 return 0; 2598 2599 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2600 if (!ra) 2601 return -ENOMEM; 2602 2603 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 2604 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 2605 2606 mutex_lock(&inode->i_mutex); 2607 2608 i_size_write(inode, cluster->end + 1 - offset); 2609 ret = setup_extent_mapping(inode, cluster->start - offset, 2610 cluster->end - offset, cluster->start); 2611 if (ret) 2612 goto out_unlock; 2613 2614 file_ra_state_init(ra, inode->i_mapping); 2615 2616 WARN_ON(cluster->start != cluster->boundary[0]); 2617 while (index <= last_index) { 2618 page = find_lock_page(inode->i_mapping, index); 2619 if (!page) { 2620 page_cache_sync_readahead(inode->i_mapping, 2621 ra, NULL, index, 2622 last_index + 1 - index); 2623 page = grab_cache_page(inode->i_mapping, index); 2624 if (!page) { 2625 ret = -ENOMEM; 2626 goto out_unlock; 2627 } 2628 } 2629 2630 if (PageReadahead(page)) { 2631 page_cache_async_readahead(inode->i_mapping, 2632 ra, NULL, page, index, 2633 last_index + 1 - index); 2634 } 2635 2636 if (!PageUptodate(page)) { 2637 btrfs_readpage(NULL, page); 2638 lock_page(page); 2639 if (!PageUptodate(page)) { 2640 unlock_page(page); 2641 page_cache_release(page); 2642 ret = -EIO; 2643 goto out_unlock; 2644 } 2645 } 2646 2647 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2648 page_end = page_start + PAGE_CACHE_SIZE - 1; 2649 2650 lock_extent(&BTRFS_I(inode)->io_tree, 2651 page_start, page_end, GFP_NOFS); 2652 2653 set_page_extent_mapped(page); 2654 2655 if (nr < cluster->nr && 2656 page_start + offset == cluster->boundary[nr]) { 2657 set_extent_bits(&BTRFS_I(inode)->io_tree, 2658 page_start, page_end, 2659 EXTENT_BOUNDARY, GFP_NOFS); 2660 nr++; 2661 } 2662 btrfs_set_extent_delalloc(inode, page_start, page_end); 2663 2664 set_page_dirty(page); 2665 dirty_page++; 2666 2667 unlock_extent(&BTRFS_I(inode)->io_tree, 2668 page_start, page_end, GFP_NOFS); 2669 unlock_page(page); 2670 page_cache_release(page); 2671 2672 index++; 2673 if (nr < cluster->nr && 2674 page_end + 1 + offset == cluster->boundary[nr]) { 2675 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 2676 dirty_page); 2677 dirty_page = 0; 2678 } 2679 } 2680 if (dirty_page) { 2681 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 2682 dirty_page); 2683 } 2684 WARN_ON(nr != cluster->nr); 2685 out_unlock: 2686 mutex_unlock(&inode->i_mutex); 2687 kfree(ra); 2688 return ret; 2689 } 2690 2691 static noinline_for_stack 2692 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, 2693 struct file_extent_cluster *cluster) 2694 { 2695 int ret; 2696 2697 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { 2698 ret = relocate_file_extent_cluster(inode, cluster); 2699 if (ret) 2700 return ret; 2701 cluster->nr = 0; 2702 } 2703 2704 if (!cluster->nr) 2705 cluster->start = extent_key->objectid; 2706 else 2707 BUG_ON(cluster->nr >= MAX_EXTENTS); 2708 cluster->end = extent_key->objectid + extent_key->offset - 1; 2709 cluster->boundary[cluster->nr] = extent_key->objectid; 2710 cluster->nr++; 2711 2712 if (cluster->nr >= MAX_EXTENTS) { 2713 ret = relocate_file_extent_cluster(inode, cluster); 2714 if (ret) 2715 return ret; 2716 cluster->nr = 0; 2717 } 2718 return 0; 2719 } 2720 2721 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2722 static int get_ref_objectid_v0(struct reloc_control *rc, 2723 struct btrfs_path *path, 2724 struct btrfs_key *extent_key, 2725 u64 *ref_objectid, int *path_change) 2726 { 2727 struct btrfs_key key; 2728 struct extent_buffer *leaf; 2729 struct btrfs_extent_ref_v0 *ref0; 2730 int ret; 2731 int slot; 2732 2733 leaf = path->nodes[0]; 2734 slot = path->slots[0]; 2735 while (1) { 2736 if (slot >= btrfs_header_nritems(leaf)) { 2737 ret = btrfs_next_leaf(rc->extent_root, path); 2738 if (ret < 0) 2739 return ret; 2740 BUG_ON(ret > 0); 2741 leaf = path->nodes[0]; 2742 slot = path->slots[0]; 2743 if (path_change) 2744 *path_change = 1; 2745 } 2746 btrfs_item_key_to_cpu(leaf, &key, slot); 2747 if (key.objectid != extent_key->objectid) 2748 return -ENOENT; 2749 2750 if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 2751 slot++; 2752 continue; 2753 } 2754 ref0 = btrfs_item_ptr(leaf, slot, 2755 struct btrfs_extent_ref_v0); 2756 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 2757 break; 2758 } 2759 return 0; 2760 } 2761 #endif 2762 2763 /* 2764 * helper to add a tree block to the list. 2765 * the major work is getting the generation and level of the block 2766 */ 2767 static int add_tree_block(struct reloc_control *rc, 2768 struct btrfs_key *extent_key, 2769 struct btrfs_path *path, 2770 struct rb_root *blocks) 2771 { 2772 struct extent_buffer *eb; 2773 struct btrfs_extent_item *ei; 2774 struct btrfs_tree_block_info *bi; 2775 struct tree_block *block; 2776 struct rb_node *rb_node; 2777 u32 item_size; 2778 int level = -1; 2779 int generation; 2780 2781 eb = path->nodes[0]; 2782 item_size = btrfs_item_size_nr(eb, path->slots[0]); 2783 2784 if (item_size >= sizeof(*ei) + sizeof(*bi)) { 2785 ei = btrfs_item_ptr(eb, path->slots[0], 2786 struct btrfs_extent_item); 2787 bi = (struct btrfs_tree_block_info *)(ei + 1); 2788 generation = btrfs_extent_generation(eb, ei); 2789 level = btrfs_tree_block_level(eb, bi); 2790 } else { 2791 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2792 u64 ref_owner; 2793 int ret; 2794 2795 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2796 ret = get_ref_objectid_v0(rc, path, extent_key, 2797 &ref_owner, NULL); 2798 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 2799 level = (int)ref_owner; 2800 /* FIXME: get real generation */ 2801 generation = 0; 2802 #else 2803 BUG(); 2804 #endif 2805 } 2806 2807 btrfs_release_path(rc->extent_root, path); 2808 2809 BUG_ON(level == -1); 2810 2811 block = kmalloc(sizeof(*block), GFP_NOFS); 2812 if (!block) 2813 return -ENOMEM; 2814 2815 block->bytenr = extent_key->objectid; 2816 block->key.objectid = extent_key->offset; 2817 block->key.offset = generation; 2818 block->level = level; 2819 block->key_ready = 0; 2820 2821 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 2822 BUG_ON(rb_node); 2823 2824 return 0; 2825 } 2826 2827 /* 2828 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 2829 */ 2830 static int __add_tree_block(struct reloc_control *rc, 2831 u64 bytenr, u32 blocksize, 2832 struct rb_root *blocks) 2833 { 2834 struct btrfs_path *path; 2835 struct btrfs_key key; 2836 int ret; 2837 2838 if (tree_block_processed(bytenr, blocksize, rc)) 2839 return 0; 2840 2841 if (tree_search(blocks, bytenr)) 2842 return 0; 2843 2844 path = btrfs_alloc_path(); 2845 if (!path) 2846 return -ENOMEM; 2847 2848 key.objectid = bytenr; 2849 key.type = BTRFS_EXTENT_ITEM_KEY; 2850 key.offset = blocksize; 2851 2852 path->search_commit_root = 1; 2853 path->skip_locking = 1; 2854 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 2855 if (ret < 0) 2856 goto out; 2857 BUG_ON(ret); 2858 2859 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2860 ret = add_tree_block(rc, &key, path, blocks); 2861 out: 2862 btrfs_free_path(path); 2863 return ret; 2864 } 2865 2866 /* 2867 * helper to check if the block use full backrefs for pointers in it 2868 */ 2869 static int block_use_full_backref(struct reloc_control *rc, 2870 struct extent_buffer *eb) 2871 { 2872 struct btrfs_path *path; 2873 struct btrfs_extent_item *ei; 2874 struct btrfs_key key; 2875 u64 flags; 2876 int ret; 2877 2878 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || 2879 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) 2880 return 1; 2881 2882 path = btrfs_alloc_path(); 2883 BUG_ON(!path); 2884 2885 key.objectid = eb->start; 2886 key.type = BTRFS_EXTENT_ITEM_KEY; 2887 key.offset = eb->len; 2888 2889 path->search_commit_root = 1; 2890 path->skip_locking = 1; 2891 ret = btrfs_search_slot(NULL, rc->extent_root, 2892 &key, path, 0, 0); 2893 BUG_ON(ret); 2894 2895 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 2896 struct btrfs_extent_item); 2897 flags = btrfs_extent_flags(path->nodes[0], ei); 2898 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 2899 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 2900 ret = 1; 2901 else 2902 ret = 0; 2903 btrfs_free_path(path); 2904 return ret; 2905 } 2906 2907 /* 2908 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 2909 * this function scans fs tree to find blocks reference the data extent 2910 */ 2911 static int find_data_references(struct reloc_control *rc, 2912 struct btrfs_key *extent_key, 2913 struct extent_buffer *leaf, 2914 struct btrfs_extent_data_ref *ref, 2915 struct rb_root *blocks) 2916 { 2917 struct btrfs_path *path; 2918 struct tree_block *block; 2919 struct btrfs_root *root; 2920 struct btrfs_file_extent_item *fi; 2921 struct rb_node *rb_node; 2922 struct btrfs_key key; 2923 u64 ref_root; 2924 u64 ref_objectid; 2925 u64 ref_offset; 2926 u32 ref_count; 2927 u32 nritems; 2928 int err = 0; 2929 int added = 0; 2930 int counted; 2931 int ret; 2932 2933 path = btrfs_alloc_path(); 2934 if (!path) 2935 return -ENOMEM; 2936 2937 ref_root = btrfs_extent_data_ref_root(leaf, ref); 2938 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 2939 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 2940 ref_count = btrfs_extent_data_ref_count(leaf, ref); 2941 2942 root = read_fs_root(rc->extent_root->fs_info, ref_root); 2943 if (IS_ERR(root)) { 2944 err = PTR_ERR(root); 2945 goto out; 2946 } 2947 2948 key.objectid = ref_objectid; 2949 key.offset = ref_offset; 2950 key.type = BTRFS_EXTENT_DATA_KEY; 2951 2952 path->search_commit_root = 1; 2953 path->skip_locking = 1; 2954 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2955 if (ret < 0) { 2956 err = ret; 2957 goto out; 2958 } 2959 2960 leaf = path->nodes[0]; 2961 nritems = btrfs_header_nritems(leaf); 2962 /* 2963 * the references in tree blocks that use full backrefs 2964 * are not counted in 2965 */ 2966 if (block_use_full_backref(rc, leaf)) 2967 counted = 0; 2968 else 2969 counted = 1; 2970 rb_node = tree_search(blocks, leaf->start); 2971 if (rb_node) { 2972 if (counted) 2973 added = 1; 2974 else 2975 path->slots[0] = nritems; 2976 } 2977 2978 while (ref_count > 0) { 2979 while (path->slots[0] >= nritems) { 2980 ret = btrfs_next_leaf(root, path); 2981 if (ret < 0) { 2982 err = ret; 2983 goto out; 2984 } 2985 if (ret > 0) { 2986 WARN_ON(1); 2987 goto out; 2988 } 2989 2990 leaf = path->nodes[0]; 2991 nritems = btrfs_header_nritems(leaf); 2992 added = 0; 2993 2994 if (block_use_full_backref(rc, leaf)) 2995 counted = 0; 2996 else 2997 counted = 1; 2998 rb_node = tree_search(blocks, leaf->start); 2999 if (rb_node) { 3000 if (counted) 3001 added = 1; 3002 else 3003 path->slots[0] = nritems; 3004 } 3005 } 3006 3007 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3008 if (key.objectid != ref_objectid || 3009 key.type != BTRFS_EXTENT_DATA_KEY) { 3010 WARN_ON(1); 3011 break; 3012 } 3013 3014 fi = btrfs_item_ptr(leaf, path->slots[0], 3015 struct btrfs_file_extent_item); 3016 3017 if (btrfs_file_extent_type(leaf, fi) == 3018 BTRFS_FILE_EXTENT_INLINE) 3019 goto next; 3020 3021 if (btrfs_file_extent_disk_bytenr(leaf, fi) != 3022 extent_key->objectid) 3023 goto next; 3024 3025 key.offset -= btrfs_file_extent_offset(leaf, fi); 3026 if (key.offset != ref_offset) 3027 goto next; 3028 3029 if (counted) 3030 ref_count--; 3031 if (added) 3032 goto next; 3033 3034 if (!tree_block_processed(leaf->start, leaf->len, rc)) { 3035 block = kmalloc(sizeof(*block), GFP_NOFS); 3036 if (!block) { 3037 err = -ENOMEM; 3038 break; 3039 } 3040 block->bytenr = leaf->start; 3041 btrfs_item_key_to_cpu(leaf, &block->key, 0); 3042 block->level = 0; 3043 block->key_ready = 1; 3044 rb_node = tree_insert(blocks, block->bytenr, 3045 &block->rb_node); 3046 BUG_ON(rb_node); 3047 } 3048 if (counted) 3049 added = 1; 3050 else 3051 path->slots[0] = nritems; 3052 next: 3053 path->slots[0]++; 3054 3055 } 3056 out: 3057 btrfs_free_path(path); 3058 return err; 3059 } 3060 3061 /* 3062 * hepler to find all tree blocks that reference a given data extent 3063 */ 3064 static noinline_for_stack 3065 int add_data_references(struct reloc_control *rc, 3066 struct btrfs_key *extent_key, 3067 struct btrfs_path *path, 3068 struct rb_root *blocks) 3069 { 3070 struct btrfs_key key; 3071 struct extent_buffer *eb; 3072 struct btrfs_extent_data_ref *dref; 3073 struct btrfs_extent_inline_ref *iref; 3074 unsigned long ptr; 3075 unsigned long end; 3076 u32 blocksize; 3077 int ret; 3078 int err = 0; 3079 3080 ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, 3081 extent_key->offset); 3082 BUG_ON(ret < 0); 3083 if (ret > 0) { 3084 /* the relocated data is fragmented */ 3085 rc->extents_skipped++; 3086 btrfs_release_path(rc->extent_root, path); 3087 return 0; 3088 } 3089 3090 blocksize = btrfs_level_size(rc->extent_root, 0); 3091 3092 eb = path->nodes[0]; 3093 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3094 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3095 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3096 if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3097 ptr = end; 3098 else 3099 #endif 3100 ptr += sizeof(struct btrfs_extent_item); 3101 3102 while (ptr < end) { 3103 iref = (struct btrfs_extent_inline_ref *)ptr; 3104 key.type = btrfs_extent_inline_ref_type(eb, iref); 3105 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3106 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3107 ret = __add_tree_block(rc, key.offset, blocksize, 3108 blocks); 3109 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3110 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 3111 ret = find_data_references(rc, extent_key, 3112 eb, dref, blocks); 3113 } else { 3114 BUG(); 3115 } 3116 ptr += btrfs_extent_inline_ref_size(key.type); 3117 } 3118 WARN_ON(ptr > end); 3119 3120 while (1) { 3121 cond_resched(); 3122 eb = path->nodes[0]; 3123 if (path->slots[0] >= btrfs_header_nritems(eb)) { 3124 ret = btrfs_next_leaf(rc->extent_root, path); 3125 if (ret < 0) { 3126 err = ret; 3127 break; 3128 } 3129 if (ret > 0) 3130 break; 3131 eb = path->nodes[0]; 3132 } 3133 3134 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 3135 if (key.objectid != extent_key->objectid) 3136 break; 3137 3138 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3139 if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3140 key.type == BTRFS_EXTENT_REF_V0_KEY) { 3141 #else 3142 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3143 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3144 #endif 3145 ret = __add_tree_block(rc, key.offset, blocksize, 3146 blocks); 3147 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3148 dref = btrfs_item_ptr(eb, path->slots[0], 3149 struct btrfs_extent_data_ref); 3150 ret = find_data_references(rc, extent_key, 3151 eb, dref, blocks); 3152 } else { 3153 ret = 0; 3154 } 3155 if (ret) { 3156 err = ret; 3157 break; 3158 } 3159 path->slots[0]++; 3160 } 3161 btrfs_release_path(rc->extent_root, path); 3162 if (err) 3163 free_block_list(blocks); 3164 return err; 3165 } 3166 3167 /* 3168 * hepler to find next unprocessed extent 3169 */ 3170 static noinline_for_stack 3171 int find_next_extent(struct btrfs_trans_handle *trans, 3172 struct reloc_control *rc, struct btrfs_path *path) 3173 { 3174 struct btrfs_key key; 3175 struct extent_buffer *leaf; 3176 u64 start, end, last; 3177 int ret; 3178 3179 last = rc->block_group->key.objectid + rc->block_group->key.offset; 3180 while (1) { 3181 cond_resched(); 3182 if (rc->search_start >= last) { 3183 ret = 1; 3184 break; 3185 } 3186 3187 key.objectid = rc->search_start; 3188 key.type = BTRFS_EXTENT_ITEM_KEY; 3189 key.offset = 0; 3190 3191 path->search_commit_root = 1; 3192 path->skip_locking = 1; 3193 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3194 0, 0); 3195 if (ret < 0) 3196 break; 3197 next: 3198 leaf = path->nodes[0]; 3199 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3200 ret = btrfs_next_leaf(rc->extent_root, path); 3201 if (ret != 0) 3202 break; 3203 leaf = path->nodes[0]; 3204 } 3205 3206 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3207 if (key.objectid >= last) { 3208 ret = 1; 3209 break; 3210 } 3211 3212 if (key.type != BTRFS_EXTENT_ITEM_KEY || 3213 key.objectid + key.offset <= rc->search_start) { 3214 path->slots[0]++; 3215 goto next; 3216 } 3217 3218 ret = find_first_extent_bit(&rc->processed_blocks, 3219 key.objectid, &start, &end, 3220 EXTENT_DIRTY); 3221 3222 if (ret == 0 && start <= key.objectid) { 3223 btrfs_release_path(rc->extent_root, path); 3224 rc->search_start = end + 1; 3225 } else { 3226 rc->search_start = key.objectid + key.offset; 3227 return 0; 3228 } 3229 } 3230 btrfs_release_path(rc->extent_root, path); 3231 return ret; 3232 } 3233 3234 static void set_reloc_control(struct reloc_control *rc) 3235 { 3236 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3237 mutex_lock(&fs_info->trans_mutex); 3238 fs_info->reloc_ctl = rc; 3239 mutex_unlock(&fs_info->trans_mutex); 3240 } 3241 3242 static void unset_reloc_control(struct reloc_control *rc) 3243 { 3244 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3245 mutex_lock(&fs_info->trans_mutex); 3246 fs_info->reloc_ctl = NULL; 3247 mutex_unlock(&fs_info->trans_mutex); 3248 } 3249 3250 static int check_extent_flags(u64 flags) 3251 { 3252 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3253 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3254 return 1; 3255 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3256 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3257 return 1; 3258 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3259 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3260 return 1; 3261 return 0; 3262 } 3263 3264 3265 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3266 { 3267 struct rb_root blocks = RB_ROOT; 3268 struct btrfs_key key; 3269 struct file_extent_cluster *cluster; 3270 struct btrfs_trans_handle *trans = NULL; 3271 struct btrfs_path *path; 3272 struct btrfs_extent_item *ei; 3273 unsigned long nr; 3274 u64 flags; 3275 u32 item_size; 3276 int ret; 3277 int err = 0; 3278 3279 cluster = kzalloc(sizeof(*cluster), GFP_NOFS); 3280 if (!cluster) 3281 return -ENOMEM; 3282 3283 path = btrfs_alloc_path(); 3284 if (!path) 3285 return -ENOMEM; 3286 3287 rc->extents_found = 0; 3288 rc->extents_skipped = 0; 3289 3290 rc->search_start = rc->block_group->key.objectid; 3291 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3292 GFP_NOFS); 3293 3294 rc->create_reloc_root = 1; 3295 set_reloc_control(rc); 3296 3297 trans = btrfs_start_transaction(rc->extent_root, 1); 3298 btrfs_commit_transaction(trans, rc->extent_root); 3299 3300 while (1) { 3301 trans = btrfs_start_transaction(rc->extent_root, 1); 3302 3303 ret = find_next_extent(trans, rc, path); 3304 if (ret < 0) 3305 err = ret; 3306 if (ret != 0) 3307 break; 3308 3309 rc->extents_found++; 3310 3311 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3312 struct btrfs_extent_item); 3313 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3314 item_size = btrfs_item_size_nr(path->nodes[0], 3315 path->slots[0]); 3316 if (item_size >= sizeof(*ei)) { 3317 flags = btrfs_extent_flags(path->nodes[0], ei); 3318 ret = check_extent_flags(flags); 3319 BUG_ON(ret); 3320 3321 } else { 3322 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3323 u64 ref_owner; 3324 int path_change = 0; 3325 3326 BUG_ON(item_size != 3327 sizeof(struct btrfs_extent_item_v0)); 3328 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 3329 &path_change); 3330 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 3331 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 3332 else 3333 flags = BTRFS_EXTENT_FLAG_DATA; 3334 3335 if (path_change) { 3336 btrfs_release_path(rc->extent_root, path); 3337 3338 path->search_commit_root = 1; 3339 path->skip_locking = 1; 3340 ret = btrfs_search_slot(NULL, rc->extent_root, 3341 &key, path, 0, 0); 3342 if (ret < 0) { 3343 err = ret; 3344 break; 3345 } 3346 BUG_ON(ret > 0); 3347 } 3348 #else 3349 BUG(); 3350 #endif 3351 } 3352 3353 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 3354 ret = add_tree_block(rc, &key, path, &blocks); 3355 } else if (rc->stage == UPDATE_DATA_PTRS && 3356 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3357 ret = add_data_references(rc, &key, path, &blocks); 3358 } else { 3359 btrfs_release_path(rc->extent_root, path); 3360 ret = 0; 3361 } 3362 if (ret < 0) { 3363 err = 0; 3364 break; 3365 } 3366 3367 if (!RB_EMPTY_ROOT(&blocks)) { 3368 ret = relocate_tree_blocks(trans, rc, &blocks); 3369 if (ret < 0) { 3370 err = ret; 3371 break; 3372 } 3373 } 3374 3375 nr = trans->blocks_used; 3376 btrfs_end_transaction(trans, rc->extent_root); 3377 trans = NULL; 3378 btrfs_btree_balance_dirty(rc->extent_root, nr); 3379 3380 if (rc->stage == MOVE_DATA_EXTENTS && 3381 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3382 rc->found_file_extent = 1; 3383 ret = relocate_data_extent(rc->data_inode, 3384 &key, cluster); 3385 if (ret < 0) { 3386 err = ret; 3387 break; 3388 } 3389 } 3390 } 3391 btrfs_free_path(path); 3392 3393 if (trans) { 3394 nr = trans->blocks_used; 3395 btrfs_end_transaction(trans, rc->extent_root); 3396 btrfs_btree_balance_dirty(rc->extent_root, nr); 3397 } 3398 3399 if (!err) { 3400 ret = relocate_file_extent_cluster(rc->data_inode, cluster); 3401 if (ret < 0) 3402 err = ret; 3403 } 3404 3405 kfree(cluster); 3406 3407 rc->create_reloc_root = 0; 3408 smp_mb(); 3409 3410 if (rc->extents_found > 0) { 3411 trans = btrfs_start_transaction(rc->extent_root, 1); 3412 btrfs_commit_transaction(trans, rc->extent_root); 3413 } 3414 3415 merge_reloc_roots(rc); 3416 3417 unset_reloc_control(rc); 3418 3419 /* get rid of pinned extents */ 3420 trans = btrfs_start_transaction(rc->extent_root, 1); 3421 btrfs_commit_transaction(trans, rc->extent_root); 3422 3423 return err; 3424 } 3425 3426 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3427 struct btrfs_root *root, u64 objectid) 3428 { 3429 struct btrfs_path *path; 3430 struct btrfs_inode_item *item; 3431 struct extent_buffer *leaf; 3432 int ret; 3433 3434 path = btrfs_alloc_path(); 3435 if (!path) 3436 return -ENOMEM; 3437 3438 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 3439 if (ret) 3440 goto out; 3441 3442 leaf = path->nodes[0]; 3443 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3444 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3445 btrfs_set_inode_generation(leaf, item, 1); 3446 btrfs_set_inode_size(leaf, item, 0); 3447 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3448 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 3449 btrfs_mark_buffer_dirty(leaf); 3450 btrfs_release_path(root, path); 3451 out: 3452 btrfs_free_path(path); 3453 return ret; 3454 } 3455 3456 /* 3457 * helper to create inode for data relocation. 3458 * the inode is in data relocation tree and its link count is 0 3459 */ 3460 static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 3461 struct btrfs_block_group_cache *group) 3462 { 3463 struct inode *inode = NULL; 3464 struct btrfs_trans_handle *trans; 3465 struct btrfs_root *root; 3466 struct btrfs_key key; 3467 unsigned long nr; 3468 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 3469 int err = 0; 3470 3471 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 3472 if (IS_ERR(root)) 3473 return ERR_CAST(root); 3474 3475 trans = btrfs_start_transaction(root, 1); 3476 BUG_ON(!trans); 3477 3478 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3479 if (err) 3480 goto out; 3481 3482 err = __insert_orphan_inode(trans, root, objectid); 3483 BUG_ON(err); 3484 3485 key.objectid = objectid; 3486 key.type = BTRFS_INODE_ITEM_KEY; 3487 key.offset = 0; 3488 inode = btrfs_iget(root->fs_info->sb, &key, root); 3489 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 3490 BTRFS_I(inode)->index_cnt = group->key.objectid; 3491 3492 err = btrfs_orphan_add(trans, inode); 3493 out: 3494 nr = trans->blocks_used; 3495 btrfs_end_transaction(trans, root); 3496 3497 btrfs_btree_balance_dirty(root, nr); 3498 if (err) { 3499 if (inode) 3500 iput(inode); 3501 inode = ERR_PTR(err); 3502 } 3503 return inode; 3504 } 3505 3506 /* 3507 * function to relocate all extents in a block group. 3508 */ 3509 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) 3510 { 3511 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3512 struct reloc_control *rc; 3513 int ret; 3514 int err = 0; 3515 3516 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3517 if (!rc) 3518 return -ENOMEM; 3519 3520 mapping_tree_init(&rc->reloc_root_tree); 3521 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); 3522 INIT_LIST_HEAD(&rc->reloc_roots); 3523 3524 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 3525 BUG_ON(!rc->block_group); 3526 3527 btrfs_init_workers(&rc->workers, "relocate", 3528 fs_info->thread_pool_size, NULL); 3529 3530 rc->extent_root = extent_root; 3531 btrfs_prepare_block_group_relocation(extent_root, rc->block_group); 3532 3533 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3534 if (IS_ERR(rc->data_inode)) { 3535 err = PTR_ERR(rc->data_inode); 3536 rc->data_inode = NULL; 3537 goto out; 3538 } 3539 3540 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 3541 (unsigned long long)rc->block_group->key.objectid, 3542 (unsigned long long)rc->block_group->flags); 3543 3544 btrfs_start_delalloc_inodes(fs_info->tree_root, 0); 3545 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); 3546 3547 while (1) { 3548 rc->extents_found = 0; 3549 rc->extents_skipped = 0; 3550 3551 mutex_lock(&fs_info->cleaner_mutex); 3552 3553 btrfs_clean_old_snapshots(fs_info->tree_root); 3554 ret = relocate_block_group(rc); 3555 3556 mutex_unlock(&fs_info->cleaner_mutex); 3557 if (ret < 0) { 3558 err = ret; 3559 break; 3560 } 3561 3562 if (rc->extents_found == 0) 3563 break; 3564 3565 printk(KERN_INFO "btrfs: found %llu extents\n", 3566 (unsigned long long)rc->extents_found); 3567 3568 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 3569 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 3570 invalidate_mapping_pages(rc->data_inode->i_mapping, 3571 0, -1); 3572 rc->stage = UPDATE_DATA_PTRS; 3573 } else if (rc->stage == UPDATE_DATA_PTRS && 3574 rc->extents_skipped >= rc->extents_found) { 3575 iput(rc->data_inode); 3576 rc->data_inode = create_reloc_inode(fs_info, 3577 rc->block_group); 3578 if (IS_ERR(rc->data_inode)) { 3579 err = PTR_ERR(rc->data_inode); 3580 rc->data_inode = NULL; 3581 break; 3582 } 3583 rc->stage = MOVE_DATA_EXTENTS; 3584 rc->found_file_extent = 0; 3585 } 3586 } 3587 3588 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, 3589 rc->block_group->key.objectid, 3590 rc->block_group->key.objectid + 3591 rc->block_group->key.offset - 1); 3592 3593 WARN_ON(rc->block_group->pinned > 0); 3594 WARN_ON(rc->block_group->reserved > 0); 3595 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 3596 out: 3597 iput(rc->data_inode); 3598 btrfs_stop_workers(&rc->workers); 3599 btrfs_put_block_group(rc->block_group); 3600 kfree(rc); 3601 return err; 3602 } 3603 3604 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 3605 { 3606 struct btrfs_trans_handle *trans; 3607 int ret; 3608 3609 trans = btrfs_start_transaction(root->fs_info->tree_root, 1); 3610 3611 memset(&root->root_item.drop_progress, 0, 3612 sizeof(root->root_item.drop_progress)); 3613 root->root_item.drop_level = 0; 3614 btrfs_set_root_refs(&root->root_item, 0); 3615 ret = btrfs_update_root(trans, root->fs_info->tree_root, 3616 &root->root_key, &root->root_item); 3617 BUG_ON(ret); 3618 3619 ret = btrfs_end_transaction(trans, root->fs_info->tree_root); 3620 BUG_ON(ret); 3621 return 0; 3622 } 3623 3624 /* 3625 * recover relocation interrupted by system crash. 3626 * 3627 * this function resumes merging reloc trees with corresponding fs trees. 3628 * this is important for keeping the sharing of tree blocks 3629 */ 3630 int btrfs_recover_relocation(struct btrfs_root *root) 3631 { 3632 LIST_HEAD(reloc_roots); 3633 struct btrfs_key key; 3634 struct btrfs_root *fs_root; 3635 struct btrfs_root *reloc_root; 3636 struct btrfs_path *path; 3637 struct extent_buffer *leaf; 3638 struct reloc_control *rc = NULL; 3639 struct btrfs_trans_handle *trans; 3640 int ret; 3641 int err = 0; 3642 3643 path = btrfs_alloc_path(); 3644 if (!path) 3645 return -ENOMEM; 3646 3647 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 3648 key.type = BTRFS_ROOT_ITEM_KEY; 3649 key.offset = (u64)-1; 3650 3651 while (1) { 3652 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, 3653 path, 0, 0); 3654 if (ret < 0) { 3655 err = ret; 3656 goto out; 3657 } 3658 if (ret > 0) { 3659 if (path->slots[0] == 0) 3660 break; 3661 path->slots[0]--; 3662 } 3663 leaf = path->nodes[0]; 3664 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3665 btrfs_release_path(root->fs_info->tree_root, path); 3666 3667 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 3668 key.type != BTRFS_ROOT_ITEM_KEY) 3669 break; 3670 3671 reloc_root = btrfs_read_fs_root_no_radix(root, &key); 3672 if (IS_ERR(reloc_root)) { 3673 err = PTR_ERR(reloc_root); 3674 goto out; 3675 } 3676 3677 list_add(&reloc_root->root_list, &reloc_roots); 3678 3679 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 3680 fs_root = read_fs_root(root->fs_info, 3681 reloc_root->root_key.offset); 3682 if (IS_ERR(fs_root)) { 3683 ret = PTR_ERR(fs_root); 3684 if (ret != -ENOENT) { 3685 err = ret; 3686 goto out; 3687 } 3688 mark_garbage_root(reloc_root); 3689 } 3690 } 3691 3692 if (key.offset == 0) 3693 break; 3694 3695 key.offset--; 3696 } 3697 btrfs_release_path(root->fs_info->tree_root, path); 3698 3699 if (list_empty(&reloc_roots)) 3700 goto out; 3701 3702 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3703 if (!rc) { 3704 err = -ENOMEM; 3705 goto out; 3706 } 3707 3708 mapping_tree_init(&rc->reloc_root_tree); 3709 INIT_LIST_HEAD(&rc->reloc_roots); 3710 btrfs_init_workers(&rc->workers, "relocate", 3711 root->fs_info->thread_pool_size, NULL); 3712 rc->extent_root = root->fs_info->extent_root; 3713 3714 set_reloc_control(rc); 3715 3716 while (!list_empty(&reloc_roots)) { 3717 reloc_root = list_entry(reloc_roots.next, 3718 struct btrfs_root, root_list); 3719 list_del(&reloc_root->root_list); 3720 3721 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 3722 list_add_tail(&reloc_root->root_list, 3723 &rc->reloc_roots); 3724 continue; 3725 } 3726 3727 fs_root = read_fs_root(root->fs_info, 3728 reloc_root->root_key.offset); 3729 BUG_ON(IS_ERR(fs_root)); 3730 3731 __add_reloc_root(reloc_root); 3732 fs_root->reloc_root = reloc_root; 3733 } 3734 3735 trans = btrfs_start_transaction(rc->extent_root, 1); 3736 btrfs_commit_transaction(trans, rc->extent_root); 3737 3738 merge_reloc_roots(rc); 3739 3740 unset_reloc_control(rc); 3741 3742 trans = btrfs_start_transaction(rc->extent_root, 1); 3743 btrfs_commit_transaction(trans, rc->extent_root); 3744 out: 3745 if (rc) { 3746 btrfs_stop_workers(&rc->workers); 3747 kfree(rc); 3748 } 3749 while (!list_empty(&reloc_roots)) { 3750 reloc_root = list_entry(reloc_roots.next, 3751 struct btrfs_root, root_list); 3752 list_del(&reloc_root->root_list); 3753 free_extent_buffer(reloc_root->node); 3754 free_extent_buffer(reloc_root->commit_root); 3755 kfree(reloc_root); 3756 } 3757 btrfs_free_path(path); 3758 3759 if (err == 0) { 3760 /* cleanup orphan inode in data relocation tree */ 3761 fs_root = read_fs_root(root->fs_info, 3762 BTRFS_DATA_RELOC_TREE_OBJECTID); 3763 if (IS_ERR(fs_root)) 3764 err = PTR_ERR(fs_root); 3765 btrfs_orphan_cleanup(fs_root); 3766 } 3767 return err; 3768 } 3769 3770 /* 3771 * helper to add ordered checksum for data relocation. 3772 * 3773 * cloning checksum properly handles the nodatasum extents. 3774 * it also saves CPU time to re-calculate the checksum. 3775 */ 3776 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 3777 { 3778 struct btrfs_ordered_sum *sums; 3779 struct btrfs_sector_sum *sector_sum; 3780 struct btrfs_ordered_extent *ordered; 3781 struct btrfs_root *root = BTRFS_I(inode)->root; 3782 size_t offset; 3783 int ret; 3784 u64 disk_bytenr; 3785 LIST_HEAD(list); 3786 3787 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 3788 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 3789 3790 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 3791 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 3792 disk_bytenr + len - 1, &list); 3793 3794 while (!list_empty(&list)) { 3795 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 3796 list_del_init(&sums->list); 3797 3798 sector_sum = sums->sums; 3799 sums->bytenr = ordered->start; 3800 3801 offset = 0; 3802 while (offset < sums->len) { 3803 sector_sum->bytenr += ordered->start - disk_bytenr; 3804 sector_sum++; 3805 offset += root->sectorsize; 3806 } 3807 3808 btrfs_add_ordered_sum(inode, ordered, sums); 3809 } 3810 btrfs_put_ordered_extent(ordered); 3811 return 0; 3812 } 3813