1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include "compat.h" 26 #include "hash.h" 27 #include "ctree.h" 28 #include "disk-io.h" 29 #include "print-tree.h" 30 #include "transaction.h" 31 #include "volumes.h" 32 #include "locking.h" 33 #include "free-space-cache.h" 34 35 static int update_reserved_extents(struct btrfs_root *root, 36 u64 bytenr, u64 num, int reserve); 37 static int update_block_group(struct btrfs_trans_handle *trans, 38 struct btrfs_root *root, 39 u64 bytenr, u64 num_bytes, int alloc, 40 int mark_free); 41 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 42 struct btrfs_root *root, 43 u64 bytenr, u64 num_bytes, u64 parent, 44 u64 root_objectid, u64 owner_objectid, 45 u64 owner_offset, int refs_to_drop, 46 struct btrfs_delayed_extent_op *extra_op); 47 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 48 struct extent_buffer *leaf, 49 struct btrfs_extent_item *ei); 50 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 51 struct btrfs_root *root, 52 u64 parent, u64 root_objectid, 53 u64 flags, u64 owner, u64 offset, 54 struct btrfs_key *ins, int ref_mod); 55 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 parent, u64 root_objectid, 58 u64 flags, struct btrfs_disk_key *key, 59 int level, struct btrfs_key *ins); 60 61 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 62 struct btrfs_root *extent_root, u64 alloc_bytes, 63 u64 flags, int force); 64 65 static noinline int 66 block_group_cache_done(struct btrfs_block_group_cache *cache) 67 { 68 smp_mb(); 69 return cache->cached == BTRFS_CACHE_FINISHED; 70 } 71 72 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 73 { 74 return (cache->flags & bits) == bits; 75 } 76 77 /* 78 * this adds the block group to the fs_info rb tree for the block group 79 * cache 80 */ 81 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 82 struct btrfs_block_group_cache *block_group) 83 { 84 struct rb_node **p; 85 struct rb_node *parent = NULL; 86 struct btrfs_block_group_cache *cache; 87 88 spin_lock(&info->block_group_cache_lock); 89 p = &info->block_group_cache_tree.rb_node; 90 91 while (*p) { 92 parent = *p; 93 cache = rb_entry(parent, struct btrfs_block_group_cache, 94 cache_node); 95 if (block_group->key.objectid < cache->key.objectid) { 96 p = &(*p)->rb_left; 97 } else if (block_group->key.objectid > cache->key.objectid) { 98 p = &(*p)->rb_right; 99 } else { 100 spin_unlock(&info->block_group_cache_lock); 101 return -EEXIST; 102 } 103 } 104 105 rb_link_node(&block_group->cache_node, parent, p); 106 rb_insert_color(&block_group->cache_node, 107 &info->block_group_cache_tree); 108 spin_unlock(&info->block_group_cache_lock); 109 110 return 0; 111 } 112 113 /* 114 * This will return the block group at or after bytenr if contains is 0, else 115 * it will return the block group that contains the bytenr 116 */ 117 static struct btrfs_block_group_cache * 118 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 119 int contains) 120 { 121 struct btrfs_block_group_cache *cache, *ret = NULL; 122 struct rb_node *n; 123 u64 end, start; 124 125 spin_lock(&info->block_group_cache_lock); 126 n = info->block_group_cache_tree.rb_node; 127 128 while (n) { 129 cache = rb_entry(n, struct btrfs_block_group_cache, 130 cache_node); 131 end = cache->key.objectid + cache->key.offset - 1; 132 start = cache->key.objectid; 133 134 if (bytenr < start) { 135 if (!contains && (!ret || start < ret->key.objectid)) 136 ret = cache; 137 n = n->rb_left; 138 } else if (bytenr > start) { 139 if (contains && bytenr <= end) { 140 ret = cache; 141 break; 142 } 143 n = n->rb_right; 144 } else { 145 ret = cache; 146 break; 147 } 148 } 149 if (ret) 150 atomic_inc(&ret->count); 151 spin_unlock(&info->block_group_cache_lock); 152 153 return ret; 154 } 155 156 /* 157 * We always set EXTENT_LOCKED for the super mirror extents so we don't 158 * overwrite them, so those bits need to be unset. Also, if we are unmounting 159 * with pinned extents still sitting there because we had a block group caching, 160 * we need to clear those now, since we are done. 161 */ 162 void btrfs_free_pinned_extents(struct btrfs_fs_info *info) 163 { 164 u64 start, end, last = 0; 165 int ret; 166 167 while (1) { 168 ret = find_first_extent_bit(&info->pinned_extents, last, 169 &start, &end, 170 EXTENT_LOCKED|EXTENT_DIRTY); 171 if (ret) 172 break; 173 174 clear_extent_bits(&info->pinned_extents, start, end, 175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); 176 last = end+1; 177 } 178 } 179 180 static int remove_sb_from_cache(struct btrfs_root *root, 181 struct btrfs_block_group_cache *cache) 182 { 183 struct btrfs_fs_info *fs_info = root->fs_info; 184 u64 bytenr; 185 u64 *logical; 186 int stripe_len; 187 int i, nr, ret; 188 189 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 190 bytenr = btrfs_sb_offset(i); 191 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 192 cache->key.objectid, bytenr, 193 0, &logical, &nr, &stripe_len); 194 BUG_ON(ret); 195 while (nr--) { 196 try_lock_extent(&fs_info->pinned_extents, 197 logical[nr], 198 logical[nr] + stripe_len - 1, GFP_NOFS); 199 } 200 kfree(logical); 201 } 202 203 return 0; 204 } 205 206 /* 207 * this is only called by cache_block_group, since we could have freed extents 208 * we need to check the pinned_extents for any extents that can't be used yet 209 * since their free space will be released as soon as the transaction commits. 210 */ 211 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 212 struct btrfs_fs_info *info, u64 start, u64 end) 213 { 214 u64 extent_start, extent_end, size, total_added = 0; 215 int ret; 216 217 while (start < end) { 218 ret = find_first_extent_bit(&info->pinned_extents, start, 219 &extent_start, &extent_end, 220 EXTENT_DIRTY|EXTENT_LOCKED); 221 if (ret) 222 break; 223 224 if (extent_start == start) { 225 start = extent_end + 1; 226 } else if (extent_start > start && extent_start < end) { 227 size = extent_start - start; 228 total_added += size; 229 ret = btrfs_add_free_space(block_group, start, 230 size); 231 BUG_ON(ret); 232 start = extent_end + 1; 233 } else { 234 break; 235 } 236 } 237 238 if (start < end) { 239 size = end - start; 240 total_added += size; 241 ret = btrfs_add_free_space(block_group, start, size); 242 BUG_ON(ret); 243 } 244 245 return total_added; 246 } 247 248 static int caching_kthread(void *data) 249 { 250 struct btrfs_block_group_cache *block_group = data; 251 struct btrfs_fs_info *fs_info = block_group->fs_info; 252 u64 last = 0; 253 struct btrfs_path *path; 254 int ret = 0; 255 struct btrfs_key key; 256 struct extent_buffer *leaf; 257 int slot; 258 u64 total_found = 0; 259 260 BUG_ON(!fs_info); 261 262 path = btrfs_alloc_path(); 263 if (!path) 264 return -ENOMEM; 265 266 atomic_inc(&block_group->space_info->caching_threads); 267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 268 /* 269 * We don't want to deadlock with somebody trying to allocate a new 270 * extent for the extent root while also trying to search the extent 271 * root to add free space. So we skip locking and search the commit 272 * root, since its read-only 273 */ 274 path->skip_locking = 1; 275 path->search_commit_root = 1; 276 path->reada = 2; 277 278 key.objectid = last; 279 key.offset = 0; 280 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 281 again: 282 /* need to make sure the commit_root doesn't disappear */ 283 down_read(&fs_info->extent_commit_sem); 284 285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 286 if (ret < 0) 287 goto err; 288 289 while (1) { 290 smp_mb(); 291 if (block_group->fs_info->closing > 1) { 292 last = (u64)-1; 293 break; 294 } 295 296 leaf = path->nodes[0]; 297 slot = path->slots[0]; 298 if (slot >= btrfs_header_nritems(leaf)) { 299 ret = btrfs_next_leaf(fs_info->extent_root, path); 300 if (ret < 0) 301 goto err; 302 else if (ret) 303 break; 304 305 if (need_resched() || 306 btrfs_transaction_in_commit(fs_info)) { 307 leaf = path->nodes[0]; 308 309 /* this shouldn't happen, but if the 310 * leaf is empty just move on. 311 */ 312 if (btrfs_header_nritems(leaf) == 0) 313 break; 314 /* 315 * we need to copy the key out so that 316 * we are sure the next search advances 317 * us forward in the btree. 318 */ 319 btrfs_item_key_to_cpu(leaf, &key, 0); 320 btrfs_release_path(fs_info->extent_root, path); 321 up_read(&fs_info->extent_commit_sem); 322 schedule_timeout(1); 323 goto again; 324 } 325 326 continue; 327 } 328 btrfs_item_key_to_cpu(leaf, &key, slot); 329 if (key.objectid < block_group->key.objectid) 330 goto next; 331 332 if (key.objectid >= block_group->key.objectid + 333 block_group->key.offset) 334 break; 335 336 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 337 total_found += add_new_free_space(block_group, 338 fs_info, last, 339 key.objectid); 340 last = key.objectid + key.offset; 341 } 342 343 if (total_found > (1024 * 1024 * 2)) { 344 total_found = 0; 345 wake_up(&block_group->caching_q); 346 } 347 next: 348 path->slots[0]++; 349 } 350 ret = 0; 351 352 total_found += add_new_free_space(block_group, fs_info, last, 353 block_group->key.objectid + 354 block_group->key.offset); 355 356 spin_lock(&block_group->lock); 357 block_group->cached = BTRFS_CACHE_FINISHED; 358 spin_unlock(&block_group->lock); 359 360 err: 361 btrfs_free_path(path); 362 up_read(&fs_info->extent_commit_sem); 363 atomic_dec(&block_group->space_info->caching_threads); 364 wake_up(&block_group->caching_q); 365 366 return 0; 367 } 368 369 static int cache_block_group(struct btrfs_block_group_cache *cache) 370 { 371 struct task_struct *tsk; 372 int ret = 0; 373 374 spin_lock(&cache->lock); 375 if (cache->cached != BTRFS_CACHE_NO) { 376 spin_unlock(&cache->lock); 377 return ret; 378 } 379 cache->cached = BTRFS_CACHE_STARTED; 380 spin_unlock(&cache->lock); 381 382 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 383 cache->key.objectid); 384 if (IS_ERR(tsk)) { 385 ret = PTR_ERR(tsk); 386 printk(KERN_ERR "error running thread %d\n", ret); 387 BUG(); 388 } 389 390 return ret; 391 } 392 393 /* 394 * return the block group that starts at or after bytenr 395 */ 396 static struct btrfs_block_group_cache * 397 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 398 { 399 struct btrfs_block_group_cache *cache; 400 401 cache = block_group_cache_tree_search(info, bytenr, 0); 402 403 return cache; 404 } 405 406 /* 407 * return the block group that contains the given bytenr 408 */ 409 struct btrfs_block_group_cache *btrfs_lookup_block_group( 410 struct btrfs_fs_info *info, 411 u64 bytenr) 412 { 413 struct btrfs_block_group_cache *cache; 414 415 cache = block_group_cache_tree_search(info, bytenr, 1); 416 417 return cache; 418 } 419 420 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 421 { 422 if (atomic_dec_and_test(&cache->count)) 423 kfree(cache); 424 } 425 426 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 427 u64 flags) 428 { 429 struct list_head *head = &info->space_info; 430 struct btrfs_space_info *found; 431 432 rcu_read_lock(); 433 list_for_each_entry_rcu(found, head, list) { 434 if (found->flags == flags) { 435 rcu_read_unlock(); 436 return found; 437 } 438 } 439 rcu_read_unlock(); 440 return NULL; 441 } 442 443 /* 444 * after adding space to the filesystem, we need to clear the full flags 445 * on all the space infos. 446 */ 447 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 448 { 449 struct list_head *head = &info->space_info; 450 struct btrfs_space_info *found; 451 452 rcu_read_lock(); 453 list_for_each_entry_rcu(found, head, list) 454 found->full = 0; 455 rcu_read_unlock(); 456 } 457 458 static u64 div_factor(u64 num, int factor) 459 { 460 if (factor == 10) 461 return num; 462 num *= factor; 463 do_div(num, 10); 464 return num; 465 } 466 467 u64 btrfs_find_block_group(struct btrfs_root *root, 468 u64 search_start, u64 search_hint, int owner) 469 { 470 struct btrfs_block_group_cache *cache; 471 u64 used; 472 u64 last = max(search_hint, search_start); 473 u64 group_start = 0; 474 int full_search = 0; 475 int factor = 9; 476 int wrapped = 0; 477 again: 478 while (1) { 479 cache = btrfs_lookup_first_block_group(root->fs_info, last); 480 if (!cache) 481 break; 482 483 spin_lock(&cache->lock); 484 last = cache->key.objectid + cache->key.offset; 485 used = btrfs_block_group_used(&cache->item); 486 487 if ((full_search || !cache->ro) && 488 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 489 if (used + cache->pinned + cache->reserved < 490 div_factor(cache->key.offset, factor)) { 491 group_start = cache->key.objectid; 492 spin_unlock(&cache->lock); 493 btrfs_put_block_group(cache); 494 goto found; 495 } 496 } 497 spin_unlock(&cache->lock); 498 btrfs_put_block_group(cache); 499 cond_resched(); 500 } 501 if (!wrapped) { 502 last = search_start; 503 wrapped = 1; 504 goto again; 505 } 506 if (!full_search && factor < 10) { 507 last = search_start; 508 full_search = 1; 509 factor = 10; 510 goto again; 511 } 512 found: 513 return group_start; 514 } 515 516 /* simple helper to search for an existing extent at a given offset */ 517 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 518 { 519 int ret; 520 struct btrfs_key key; 521 struct btrfs_path *path; 522 523 path = btrfs_alloc_path(); 524 BUG_ON(!path); 525 key.objectid = start; 526 key.offset = len; 527 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 528 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 529 0, 0); 530 btrfs_free_path(path); 531 return ret; 532 } 533 534 /* 535 * Back reference rules. Back refs have three main goals: 536 * 537 * 1) differentiate between all holders of references to an extent so that 538 * when a reference is dropped we can make sure it was a valid reference 539 * before freeing the extent. 540 * 541 * 2) Provide enough information to quickly find the holders of an extent 542 * if we notice a given block is corrupted or bad. 543 * 544 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 545 * maintenance. This is actually the same as #2, but with a slightly 546 * different use case. 547 * 548 * There are two kinds of back refs. The implicit back refs is optimized 549 * for pointers in non-shared tree blocks. For a given pointer in a block, 550 * back refs of this kind provide information about the block's owner tree 551 * and the pointer's key. These information allow us to find the block by 552 * b-tree searching. The full back refs is for pointers in tree blocks not 553 * referenced by their owner trees. The location of tree block is recorded 554 * in the back refs. Actually the full back refs is generic, and can be 555 * used in all cases the implicit back refs is used. The major shortcoming 556 * of the full back refs is its overhead. Every time a tree block gets 557 * COWed, we have to update back refs entry for all pointers in it. 558 * 559 * For a newly allocated tree block, we use implicit back refs for 560 * pointers in it. This means most tree related operations only involve 561 * implicit back refs. For a tree block created in old transaction, the 562 * only way to drop a reference to it is COW it. So we can detect the 563 * event that tree block loses its owner tree's reference and do the 564 * back refs conversion. 565 * 566 * When a tree block is COW'd through a tree, there are four cases: 567 * 568 * The reference count of the block is one and the tree is the block's 569 * owner tree. Nothing to do in this case. 570 * 571 * The reference count of the block is one and the tree is not the 572 * block's owner tree. In this case, full back refs is used for pointers 573 * in the block. Remove these full back refs, add implicit back refs for 574 * every pointers in the new block. 575 * 576 * The reference count of the block is greater than one and the tree is 577 * the block's owner tree. In this case, implicit back refs is used for 578 * pointers in the block. Add full back refs for every pointers in the 579 * block, increase lower level extents' reference counts. The original 580 * implicit back refs are entailed to the new block. 581 * 582 * The reference count of the block is greater than one and the tree is 583 * not the block's owner tree. Add implicit back refs for every pointer in 584 * the new block, increase lower level extents' reference count. 585 * 586 * Back Reference Key composing: 587 * 588 * The key objectid corresponds to the first byte in the extent, 589 * The key type is used to differentiate between types of back refs. 590 * There are different meanings of the key offset for different types 591 * of back refs. 592 * 593 * File extents can be referenced by: 594 * 595 * - multiple snapshots, subvolumes, or different generations in one subvol 596 * - different files inside a single subvolume 597 * - different offsets inside a file (bookend extents in file.c) 598 * 599 * The extent ref structure for the implicit back refs has fields for: 600 * 601 * - Objectid of the subvolume root 602 * - objectid of the file holding the reference 603 * - original offset in the file 604 * - how many bookend extents 605 * 606 * The key offset for the implicit back refs is hash of the first 607 * three fields. 608 * 609 * The extent ref structure for the full back refs has field for: 610 * 611 * - number of pointers in the tree leaf 612 * 613 * The key offset for the implicit back refs is the first byte of 614 * the tree leaf 615 * 616 * When a file extent is allocated, The implicit back refs is used. 617 * the fields are filled in: 618 * 619 * (root_key.objectid, inode objectid, offset in file, 1) 620 * 621 * When a file extent is removed file truncation, we find the 622 * corresponding implicit back refs and check the following fields: 623 * 624 * (btrfs_header_owner(leaf), inode objectid, offset in file) 625 * 626 * Btree extents can be referenced by: 627 * 628 * - Different subvolumes 629 * 630 * Both the implicit back refs and the full back refs for tree blocks 631 * only consist of key. The key offset for the implicit back refs is 632 * objectid of block's owner tree. The key offset for the full back refs 633 * is the first byte of parent block. 634 * 635 * When implicit back refs is used, information about the lowest key and 636 * level of the tree block are required. These information are stored in 637 * tree block info structure. 638 */ 639 640 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 641 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 642 struct btrfs_root *root, 643 struct btrfs_path *path, 644 u64 owner, u32 extra_size) 645 { 646 struct btrfs_extent_item *item; 647 struct btrfs_extent_item_v0 *ei0; 648 struct btrfs_extent_ref_v0 *ref0; 649 struct btrfs_tree_block_info *bi; 650 struct extent_buffer *leaf; 651 struct btrfs_key key; 652 struct btrfs_key found_key; 653 u32 new_size = sizeof(*item); 654 u64 refs; 655 int ret; 656 657 leaf = path->nodes[0]; 658 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 659 660 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 661 ei0 = btrfs_item_ptr(leaf, path->slots[0], 662 struct btrfs_extent_item_v0); 663 refs = btrfs_extent_refs_v0(leaf, ei0); 664 665 if (owner == (u64)-1) { 666 while (1) { 667 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 668 ret = btrfs_next_leaf(root, path); 669 if (ret < 0) 670 return ret; 671 BUG_ON(ret > 0); 672 leaf = path->nodes[0]; 673 } 674 btrfs_item_key_to_cpu(leaf, &found_key, 675 path->slots[0]); 676 BUG_ON(key.objectid != found_key.objectid); 677 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 678 path->slots[0]++; 679 continue; 680 } 681 ref0 = btrfs_item_ptr(leaf, path->slots[0], 682 struct btrfs_extent_ref_v0); 683 owner = btrfs_ref_objectid_v0(leaf, ref0); 684 break; 685 } 686 } 687 btrfs_release_path(root, path); 688 689 if (owner < BTRFS_FIRST_FREE_OBJECTID) 690 new_size += sizeof(*bi); 691 692 new_size -= sizeof(*ei0); 693 ret = btrfs_search_slot(trans, root, &key, path, 694 new_size + extra_size, 1); 695 if (ret < 0) 696 return ret; 697 BUG_ON(ret); 698 699 ret = btrfs_extend_item(trans, root, path, new_size); 700 BUG_ON(ret); 701 702 leaf = path->nodes[0]; 703 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 704 btrfs_set_extent_refs(leaf, item, refs); 705 /* FIXME: get real generation */ 706 btrfs_set_extent_generation(leaf, item, 0); 707 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 708 btrfs_set_extent_flags(leaf, item, 709 BTRFS_EXTENT_FLAG_TREE_BLOCK | 710 BTRFS_BLOCK_FLAG_FULL_BACKREF); 711 bi = (struct btrfs_tree_block_info *)(item + 1); 712 /* FIXME: get first key of the block */ 713 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 714 btrfs_set_tree_block_level(leaf, bi, (int)owner); 715 } else { 716 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 717 } 718 btrfs_mark_buffer_dirty(leaf); 719 return 0; 720 } 721 #endif 722 723 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 724 { 725 u32 high_crc = ~(u32)0; 726 u32 low_crc = ~(u32)0; 727 __le64 lenum; 728 729 lenum = cpu_to_le64(root_objectid); 730 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 731 lenum = cpu_to_le64(owner); 732 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 733 lenum = cpu_to_le64(offset); 734 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 735 736 return ((u64)high_crc << 31) ^ (u64)low_crc; 737 } 738 739 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 740 struct btrfs_extent_data_ref *ref) 741 { 742 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 743 btrfs_extent_data_ref_objectid(leaf, ref), 744 btrfs_extent_data_ref_offset(leaf, ref)); 745 } 746 747 static int match_extent_data_ref(struct extent_buffer *leaf, 748 struct btrfs_extent_data_ref *ref, 749 u64 root_objectid, u64 owner, u64 offset) 750 { 751 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 752 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 753 btrfs_extent_data_ref_offset(leaf, ref) != offset) 754 return 0; 755 return 1; 756 } 757 758 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 759 struct btrfs_root *root, 760 struct btrfs_path *path, 761 u64 bytenr, u64 parent, 762 u64 root_objectid, 763 u64 owner, u64 offset) 764 { 765 struct btrfs_key key; 766 struct btrfs_extent_data_ref *ref; 767 struct extent_buffer *leaf; 768 u32 nritems; 769 int ret; 770 int recow; 771 int err = -ENOENT; 772 773 key.objectid = bytenr; 774 if (parent) { 775 key.type = BTRFS_SHARED_DATA_REF_KEY; 776 key.offset = parent; 777 } else { 778 key.type = BTRFS_EXTENT_DATA_REF_KEY; 779 key.offset = hash_extent_data_ref(root_objectid, 780 owner, offset); 781 } 782 again: 783 recow = 0; 784 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 785 if (ret < 0) { 786 err = ret; 787 goto fail; 788 } 789 790 if (parent) { 791 if (!ret) 792 return 0; 793 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 794 key.type = BTRFS_EXTENT_REF_V0_KEY; 795 btrfs_release_path(root, path); 796 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 797 if (ret < 0) { 798 err = ret; 799 goto fail; 800 } 801 if (!ret) 802 return 0; 803 #endif 804 goto fail; 805 } 806 807 leaf = path->nodes[0]; 808 nritems = btrfs_header_nritems(leaf); 809 while (1) { 810 if (path->slots[0] >= nritems) { 811 ret = btrfs_next_leaf(root, path); 812 if (ret < 0) 813 err = ret; 814 if (ret) 815 goto fail; 816 817 leaf = path->nodes[0]; 818 nritems = btrfs_header_nritems(leaf); 819 recow = 1; 820 } 821 822 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 823 if (key.objectid != bytenr || 824 key.type != BTRFS_EXTENT_DATA_REF_KEY) 825 goto fail; 826 827 ref = btrfs_item_ptr(leaf, path->slots[0], 828 struct btrfs_extent_data_ref); 829 830 if (match_extent_data_ref(leaf, ref, root_objectid, 831 owner, offset)) { 832 if (recow) { 833 btrfs_release_path(root, path); 834 goto again; 835 } 836 err = 0; 837 break; 838 } 839 path->slots[0]++; 840 } 841 fail: 842 return err; 843 } 844 845 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 846 struct btrfs_root *root, 847 struct btrfs_path *path, 848 u64 bytenr, u64 parent, 849 u64 root_objectid, u64 owner, 850 u64 offset, int refs_to_add) 851 { 852 struct btrfs_key key; 853 struct extent_buffer *leaf; 854 u32 size; 855 u32 num_refs; 856 int ret; 857 858 key.objectid = bytenr; 859 if (parent) { 860 key.type = BTRFS_SHARED_DATA_REF_KEY; 861 key.offset = parent; 862 size = sizeof(struct btrfs_shared_data_ref); 863 } else { 864 key.type = BTRFS_EXTENT_DATA_REF_KEY; 865 key.offset = hash_extent_data_ref(root_objectid, 866 owner, offset); 867 size = sizeof(struct btrfs_extent_data_ref); 868 } 869 870 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 871 if (ret && ret != -EEXIST) 872 goto fail; 873 874 leaf = path->nodes[0]; 875 if (parent) { 876 struct btrfs_shared_data_ref *ref; 877 ref = btrfs_item_ptr(leaf, path->slots[0], 878 struct btrfs_shared_data_ref); 879 if (ret == 0) { 880 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 881 } else { 882 num_refs = btrfs_shared_data_ref_count(leaf, ref); 883 num_refs += refs_to_add; 884 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 885 } 886 } else { 887 struct btrfs_extent_data_ref *ref; 888 while (ret == -EEXIST) { 889 ref = btrfs_item_ptr(leaf, path->slots[0], 890 struct btrfs_extent_data_ref); 891 if (match_extent_data_ref(leaf, ref, root_objectid, 892 owner, offset)) 893 break; 894 btrfs_release_path(root, path); 895 key.offset++; 896 ret = btrfs_insert_empty_item(trans, root, path, &key, 897 size); 898 if (ret && ret != -EEXIST) 899 goto fail; 900 901 leaf = path->nodes[0]; 902 } 903 ref = btrfs_item_ptr(leaf, path->slots[0], 904 struct btrfs_extent_data_ref); 905 if (ret == 0) { 906 btrfs_set_extent_data_ref_root(leaf, ref, 907 root_objectid); 908 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 909 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 910 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 911 } else { 912 num_refs = btrfs_extent_data_ref_count(leaf, ref); 913 num_refs += refs_to_add; 914 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 915 } 916 } 917 btrfs_mark_buffer_dirty(leaf); 918 ret = 0; 919 fail: 920 btrfs_release_path(root, path); 921 return ret; 922 } 923 924 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 925 struct btrfs_root *root, 926 struct btrfs_path *path, 927 int refs_to_drop) 928 { 929 struct btrfs_key key; 930 struct btrfs_extent_data_ref *ref1 = NULL; 931 struct btrfs_shared_data_ref *ref2 = NULL; 932 struct extent_buffer *leaf; 933 u32 num_refs = 0; 934 int ret = 0; 935 936 leaf = path->nodes[0]; 937 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 938 939 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 940 ref1 = btrfs_item_ptr(leaf, path->slots[0], 941 struct btrfs_extent_data_ref); 942 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 943 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 944 ref2 = btrfs_item_ptr(leaf, path->slots[0], 945 struct btrfs_shared_data_ref); 946 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 947 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 948 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 949 struct btrfs_extent_ref_v0 *ref0; 950 ref0 = btrfs_item_ptr(leaf, path->slots[0], 951 struct btrfs_extent_ref_v0); 952 num_refs = btrfs_ref_count_v0(leaf, ref0); 953 #endif 954 } else { 955 BUG(); 956 } 957 958 BUG_ON(num_refs < refs_to_drop); 959 num_refs -= refs_to_drop; 960 961 if (num_refs == 0) { 962 ret = btrfs_del_item(trans, root, path); 963 } else { 964 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 965 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 966 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 967 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 968 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 969 else { 970 struct btrfs_extent_ref_v0 *ref0; 971 ref0 = btrfs_item_ptr(leaf, path->slots[0], 972 struct btrfs_extent_ref_v0); 973 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 974 } 975 #endif 976 btrfs_mark_buffer_dirty(leaf); 977 } 978 return ret; 979 } 980 981 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 982 struct btrfs_path *path, 983 struct btrfs_extent_inline_ref *iref) 984 { 985 struct btrfs_key key; 986 struct extent_buffer *leaf; 987 struct btrfs_extent_data_ref *ref1; 988 struct btrfs_shared_data_ref *ref2; 989 u32 num_refs = 0; 990 991 leaf = path->nodes[0]; 992 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 993 if (iref) { 994 if (btrfs_extent_inline_ref_type(leaf, iref) == 995 BTRFS_EXTENT_DATA_REF_KEY) { 996 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 997 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 998 } else { 999 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 1000 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1001 } 1002 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1003 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1004 struct btrfs_extent_data_ref); 1005 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1006 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1007 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1008 struct btrfs_shared_data_ref); 1009 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1010 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1011 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1012 struct btrfs_extent_ref_v0 *ref0; 1013 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1014 struct btrfs_extent_ref_v0); 1015 num_refs = btrfs_ref_count_v0(leaf, ref0); 1016 #endif 1017 } else { 1018 WARN_ON(1); 1019 } 1020 return num_refs; 1021 } 1022 1023 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1024 struct btrfs_root *root, 1025 struct btrfs_path *path, 1026 u64 bytenr, u64 parent, 1027 u64 root_objectid) 1028 { 1029 struct btrfs_key key; 1030 int ret; 1031 1032 key.objectid = bytenr; 1033 if (parent) { 1034 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1035 key.offset = parent; 1036 } else { 1037 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1038 key.offset = root_objectid; 1039 } 1040 1041 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1042 if (ret > 0) 1043 ret = -ENOENT; 1044 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1045 if (ret == -ENOENT && parent) { 1046 btrfs_release_path(root, path); 1047 key.type = BTRFS_EXTENT_REF_V0_KEY; 1048 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1049 if (ret > 0) 1050 ret = -ENOENT; 1051 } 1052 #endif 1053 return ret; 1054 } 1055 1056 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1057 struct btrfs_root *root, 1058 struct btrfs_path *path, 1059 u64 bytenr, u64 parent, 1060 u64 root_objectid) 1061 { 1062 struct btrfs_key key; 1063 int ret; 1064 1065 key.objectid = bytenr; 1066 if (parent) { 1067 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1068 key.offset = parent; 1069 } else { 1070 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1071 key.offset = root_objectid; 1072 } 1073 1074 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1075 btrfs_release_path(root, path); 1076 return ret; 1077 } 1078 1079 static inline int extent_ref_type(u64 parent, u64 owner) 1080 { 1081 int type; 1082 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1083 if (parent > 0) 1084 type = BTRFS_SHARED_BLOCK_REF_KEY; 1085 else 1086 type = BTRFS_TREE_BLOCK_REF_KEY; 1087 } else { 1088 if (parent > 0) 1089 type = BTRFS_SHARED_DATA_REF_KEY; 1090 else 1091 type = BTRFS_EXTENT_DATA_REF_KEY; 1092 } 1093 return type; 1094 } 1095 1096 static int find_next_key(struct btrfs_path *path, int level, 1097 struct btrfs_key *key) 1098 1099 { 1100 for (; level < BTRFS_MAX_LEVEL; level++) { 1101 if (!path->nodes[level]) 1102 break; 1103 if (path->slots[level] + 1 >= 1104 btrfs_header_nritems(path->nodes[level])) 1105 continue; 1106 if (level == 0) 1107 btrfs_item_key_to_cpu(path->nodes[level], key, 1108 path->slots[level] + 1); 1109 else 1110 btrfs_node_key_to_cpu(path->nodes[level], key, 1111 path->slots[level] + 1); 1112 return 0; 1113 } 1114 return 1; 1115 } 1116 1117 /* 1118 * look for inline back ref. if back ref is found, *ref_ret is set 1119 * to the address of inline back ref, and 0 is returned. 1120 * 1121 * if back ref isn't found, *ref_ret is set to the address where it 1122 * should be inserted, and -ENOENT is returned. 1123 * 1124 * if insert is true and there are too many inline back refs, the path 1125 * points to the extent item, and -EAGAIN is returned. 1126 * 1127 * NOTE: inline back refs are ordered in the same way that back ref 1128 * items in the tree are ordered. 1129 */ 1130 static noinline_for_stack 1131 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1132 struct btrfs_root *root, 1133 struct btrfs_path *path, 1134 struct btrfs_extent_inline_ref **ref_ret, 1135 u64 bytenr, u64 num_bytes, 1136 u64 parent, u64 root_objectid, 1137 u64 owner, u64 offset, int insert) 1138 { 1139 struct btrfs_key key; 1140 struct extent_buffer *leaf; 1141 struct btrfs_extent_item *ei; 1142 struct btrfs_extent_inline_ref *iref; 1143 u64 flags; 1144 u64 item_size; 1145 unsigned long ptr; 1146 unsigned long end; 1147 int extra_size; 1148 int type; 1149 int want; 1150 int ret; 1151 int err = 0; 1152 1153 key.objectid = bytenr; 1154 key.type = BTRFS_EXTENT_ITEM_KEY; 1155 key.offset = num_bytes; 1156 1157 want = extent_ref_type(parent, owner); 1158 if (insert) { 1159 extra_size = btrfs_extent_inline_ref_size(want); 1160 path->keep_locks = 1; 1161 } else 1162 extra_size = -1; 1163 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1164 if (ret < 0) { 1165 err = ret; 1166 goto out; 1167 } 1168 BUG_ON(ret); 1169 1170 leaf = path->nodes[0]; 1171 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1172 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1173 if (item_size < sizeof(*ei)) { 1174 if (!insert) { 1175 err = -ENOENT; 1176 goto out; 1177 } 1178 ret = convert_extent_item_v0(trans, root, path, owner, 1179 extra_size); 1180 if (ret < 0) { 1181 err = ret; 1182 goto out; 1183 } 1184 leaf = path->nodes[0]; 1185 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1186 } 1187 #endif 1188 BUG_ON(item_size < sizeof(*ei)); 1189 1190 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1191 flags = btrfs_extent_flags(leaf, ei); 1192 1193 ptr = (unsigned long)(ei + 1); 1194 end = (unsigned long)ei + item_size; 1195 1196 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1197 ptr += sizeof(struct btrfs_tree_block_info); 1198 BUG_ON(ptr > end); 1199 } else { 1200 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1201 } 1202 1203 err = -ENOENT; 1204 while (1) { 1205 if (ptr >= end) { 1206 WARN_ON(ptr > end); 1207 break; 1208 } 1209 iref = (struct btrfs_extent_inline_ref *)ptr; 1210 type = btrfs_extent_inline_ref_type(leaf, iref); 1211 if (want < type) 1212 break; 1213 if (want > type) { 1214 ptr += btrfs_extent_inline_ref_size(type); 1215 continue; 1216 } 1217 1218 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1219 struct btrfs_extent_data_ref *dref; 1220 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1221 if (match_extent_data_ref(leaf, dref, root_objectid, 1222 owner, offset)) { 1223 err = 0; 1224 break; 1225 } 1226 if (hash_extent_data_ref_item(leaf, dref) < 1227 hash_extent_data_ref(root_objectid, owner, offset)) 1228 break; 1229 } else { 1230 u64 ref_offset; 1231 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1232 if (parent > 0) { 1233 if (parent == ref_offset) { 1234 err = 0; 1235 break; 1236 } 1237 if (ref_offset < parent) 1238 break; 1239 } else { 1240 if (root_objectid == ref_offset) { 1241 err = 0; 1242 break; 1243 } 1244 if (ref_offset < root_objectid) 1245 break; 1246 } 1247 } 1248 ptr += btrfs_extent_inline_ref_size(type); 1249 } 1250 if (err == -ENOENT && insert) { 1251 if (item_size + extra_size >= 1252 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1253 err = -EAGAIN; 1254 goto out; 1255 } 1256 /* 1257 * To add new inline back ref, we have to make sure 1258 * there is no corresponding back ref item. 1259 * For simplicity, we just do not add new inline back 1260 * ref if there is any kind of item for this block 1261 */ 1262 if (find_next_key(path, 0, &key) == 0 && 1263 key.objectid == bytenr && 1264 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1265 err = -EAGAIN; 1266 goto out; 1267 } 1268 } 1269 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1270 out: 1271 if (insert) { 1272 path->keep_locks = 0; 1273 btrfs_unlock_up_safe(path, 1); 1274 } 1275 return err; 1276 } 1277 1278 /* 1279 * helper to add new inline back ref 1280 */ 1281 static noinline_for_stack 1282 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1283 struct btrfs_root *root, 1284 struct btrfs_path *path, 1285 struct btrfs_extent_inline_ref *iref, 1286 u64 parent, u64 root_objectid, 1287 u64 owner, u64 offset, int refs_to_add, 1288 struct btrfs_delayed_extent_op *extent_op) 1289 { 1290 struct extent_buffer *leaf; 1291 struct btrfs_extent_item *ei; 1292 unsigned long ptr; 1293 unsigned long end; 1294 unsigned long item_offset; 1295 u64 refs; 1296 int size; 1297 int type; 1298 int ret; 1299 1300 leaf = path->nodes[0]; 1301 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1302 item_offset = (unsigned long)iref - (unsigned long)ei; 1303 1304 type = extent_ref_type(parent, owner); 1305 size = btrfs_extent_inline_ref_size(type); 1306 1307 ret = btrfs_extend_item(trans, root, path, size); 1308 BUG_ON(ret); 1309 1310 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1311 refs = btrfs_extent_refs(leaf, ei); 1312 refs += refs_to_add; 1313 btrfs_set_extent_refs(leaf, ei, refs); 1314 if (extent_op) 1315 __run_delayed_extent_op(extent_op, leaf, ei); 1316 1317 ptr = (unsigned long)ei + item_offset; 1318 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1319 if (ptr < end - size) 1320 memmove_extent_buffer(leaf, ptr + size, ptr, 1321 end - size - ptr); 1322 1323 iref = (struct btrfs_extent_inline_ref *)ptr; 1324 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1325 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1326 struct btrfs_extent_data_ref *dref; 1327 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1328 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1329 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1330 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1331 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1332 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1333 struct btrfs_shared_data_ref *sref; 1334 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1335 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1336 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1337 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1338 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1339 } else { 1340 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1341 } 1342 btrfs_mark_buffer_dirty(leaf); 1343 return 0; 1344 } 1345 1346 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1347 struct btrfs_root *root, 1348 struct btrfs_path *path, 1349 struct btrfs_extent_inline_ref **ref_ret, 1350 u64 bytenr, u64 num_bytes, u64 parent, 1351 u64 root_objectid, u64 owner, u64 offset) 1352 { 1353 int ret; 1354 1355 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1356 bytenr, num_bytes, parent, 1357 root_objectid, owner, offset, 0); 1358 if (ret != -ENOENT) 1359 return ret; 1360 1361 btrfs_release_path(root, path); 1362 *ref_ret = NULL; 1363 1364 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1365 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1366 root_objectid); 1367 } else { 1368 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1369 root_objectid, owner, offset); 1370 } 1371 return ret; 1372 } 1373 1374 /* 1375 * helper to update/remove inline back ref 1376 */ 1377 static noinline_for_stack 1378 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1379 struct btrfs_root *root, 1380 struct btrfs_path *path, 1381 struct btrfs_extent_inline_ref *iref, 1382 int refs_to_mod, 1383 struct btrfs_delayed_extent_op *extent_op) 1384 { 1385 struct extent_buffer *leaf; 1386 struct btrfs_extent_item *ei; 1387 struct btrfs_extent_data_ref *dref = NULL; 1388 struct btrfs_shared_data_ref *sref = NULL; 1389 unsigned long ptr; 1390 unsigned long end; 1391 u32 item_size; 1392 int size; 1393 int type; 1394 int ret; 1395 u64 refs; 1396 1397 leaf = path->nodes[0]; 1398 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1399 refs = btrfs_extent_refs(leaf, ei); 1400 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1401 refs += refs_to_mod; 1402 btrfs_set_extent_refs(leaf, ei, refs); 1403 if (extent_op) 1404 __run_delayed_extent_op(extent_op, leaf, ei); 1405 1406 type = btrfs_extent_inline_ref_type(leaf, iref); 1407 1408 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1409 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1410 refs = btrfs_extent_data_ref_count(leaf, dref); 1411 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1412 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1413 refs = btrfs_shared_data_ref_count(leaf, sref); 1414 } else { 1415 refs = 1; 1416 BUG_ON(refs_to_mod != -1); 1417 } 1418 1419 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1420 refs += refs_to_mod; 1421 1422 if (refs > 0) { 1423 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1424 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1425 else 1426 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1427 } else { 1428 size = btrfs_extent_inline_ref_size(type); 1429 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1430 ptr = (unsigned long)iref; 1431 end = (unsigned long)ei + item_size; 1432 if (ptr + size < end) 1433 memmove_extent_buffer(leaf, ptr, ptr + size, 1434 end - ptr - size); 1435 item_size -= size; 1436 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1437 BUG_ON(ret); 1438 } 1439 btrfs_mark_buffer_dirty(leaf); 1440 return 0; 1441 } 1442 1443 static noinline_for_stack 1444 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1445 struct btrfs_root *root, 1446 struct btrfs_path *path, 1447 u64 bytenr, u64 num_bytes, u64 parent, 1448 u64 root_objectid, u64 owner, 1449 u64 offset, int refs_to_add, 1450 struct btrfs_delayed_extent_op *extent_op) 1451 { 1452 struct btrfs_extent_inline_ref *iref; 1453 int ret; 1454 1455 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1456 bytenr, num_bytes, parent, 1457 root_objectid, owner, offset, 1); 1458 if (ret == 0) { 1459 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1460 ret = update_inline_extent_backref(trans, root, path, iref, 1461 refs_to_add, extent_op); 1462 } else if (ret == -ENOENT) { 1463 ret = setup_inline_extent_backref(trans, root, path, iref, 1464 parent, root_objectid, 1465 owner, offset, refs_to_add, 1466 extent_op); 1467 } 1468 return ret; 1469 } 1470 1471 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1472 struct btrfs_root *root, 1473 struct btrfs_path *path, 1474 u64 bytenr, u64 parent, u64 root_objectid, 1475 u64 owner, u64 offset, int refs_to_add) 1476 { 1477 int ret; 1478 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1479 BUG_ON(refs_to_add != 1); 1480 ret = insert_tree_block_ref(trans, root, path, bytenr, 1481 parent, root_objectid); 1482 } else { 1483 ret = insert_extent_data_ref(trans, root, path, bytenr, 1484 parent, root_objectid, 1485 owner, offset, refs_to_add); 1486 } 1487 return ret; 1488 } 1489 1490 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1491 struct btrfs_root *root, 1492 struct btrfs_path *path, 1493 struct btrfs_extent_inline_ref *iref, 1494 int refs_to_drop, int is_data) 1495 { 1496 int ret; 1497 1498 BUG_ON(!is_data && refs_to_drop != 1); 1499 if (iref) { 1500 ret = update_inline_extent_backref(trans, root, path, iref, 1501 -refs_to_drop, NULL); 1502 } else if (is_data) { 1503 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1504 } else { 1505 ret = btrfs_del_item(trans, root, path); 1506 } 1507 return ret; 1508 } 1509 1510 #ifdef BIO_RW_DISCARD 1511 static void btrfs_issue_discard(struct block_device *bdev, 1512 u64 start, u64 len) 1513 { 1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1515 DISCARD_FL_BARRIER); 1516 } 1517 #endif 1518 1519 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1520 u64 num_bytes) 1521 { 1522 #ifdef BIO_RW_DISCARD 1523 int ret; 1524 u64 map_length = num_bytes; 1525 struct btrfs_multi_bio *multi = NULL; 1526 1527 /* Tell the block device(s) that the sectors can be discarded */ 1528 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1529 bytenr, &map_length, &multi, 0); 1530 if (!ret) { 1531 struct btrfs_bio_stripe *stripe = multi->stripes; 1532 int i; 1533 1534 if (map_length > num_bytes) 1535 map_length = num_bytes; 1536 1537 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1538 btrfs_issue_discard(stripe->dev->bdev, 1539 stripe->physical, 1540 map_length); 1541 } 1542 kfree(multi); 1543 } 1544 1545 return ret; 1546 #else 1547 return 0; 1548 #endif 1549 } 1550 1551 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1552 struct btrfs_root *root, 1553 u64 bytenr, u64 num_bytes, u64 parent, 1554 u64 root_objectid, u64 owner, u64 offset) 1555 { 1556 int ret; 1557 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1558 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1559 1560 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1561 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1562 parent, root_objectid, (int)owner, 1563 BTRFS_ADD_DELAYED_REF, NULL); 1564 } else { 1565 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1566 parent, root_objectid, owner, offset, 1567 BTRFS_ADD_DELAYED_REF, NULL); 1568 } 1569 return ret; 1570 } 1571 1572 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1573 struct btrfs_root *root, 1574 u64 bytenr, u64 num_bytes, 1575 u64 parent, u64 root_objectid, 1576 u64 owner, u64 offset, int refs_to_add, 1577 struct btrfs_delayed_extent_op *extent_op) 1578 { 1579 struct btrfs_path *path; 1580 struct extent_buffer *leaf; 1581 struct btrfs_extent_item *item; 1582 u64 refs; 1583 int ret; 1584 int err = 0; 1585 1586 path = btrfs_alloc_path(); 1587 if (!path) 1588 return -ENOMEM; 1589 1590 path->reada = 1; 1591 path->leave_spinning = 1; 1592 /* this will setup the path even if it fails to insert the back ref */ 1593 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1594 path, bytenr, num_bytes, parent, 1595 root_objectid, owner, offset, 1596 refs_to_add, extent_op); 1597 if (ret == 0) 1598 goto out; 1599 1600 if (ret != -EAGAIN) { 1601 err = ret; 1602 goto out; 1603 } 1604 1605 leaf = path->nodes[0]; 1606 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1607 refs = btrfs_extent_refs(leaf, item); 1608 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1609 if (extent_op) 1610 __run_delayed_extent_op(extent_op, leaf, item); 1611 1612 btrfs_mark_buffer_dirty(leaf); 1613 btrfs_release_path(root->fs_info->extent_root, path); 1614 1615 path->reada = 1; 1616 path->leave_spinning = 1; 1617 1618 /* now insert the actual backref */ 1619 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1620 path, bytenr, parent, root_objectid, 1621 owner, offset, refs_to_add); 1622 BUG_ON(ret); 1623 out: 1624 btrfs_free_path(path); 1625 return err; 1626 } 1627 1628 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1629 struct btrfs_root *root, 1630 struct btrfs_delayed_ref_node *node, 1631 struct btrfs_delayed_extent_op *extent_op, 1632 int insert_reserved) 1633 { 1634 int ret = 0; 1635 struct btrfs_delayed_data_ref *ref; 1636 struct btrfs_key ins; 1637 u64 parent = 0; 1638 u64 ref_root = 0; 1639 u64 flags = 0; 1640 1641 ins.objectid = node->bytenr; 1642 ins.offset = node->num_bytes; 1643 ins.type = BTRFS_EXTENT_ITEM_KEY; 1644 1645 ref = btrfs_delayed_node_to_data_ref(node); 1646 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1647 parent = ref->parent; 1648 else 1649 ref_root = ref->root; 1650 1651 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1652 if (extent_op) { 1653 BUG_ON(extent_op->update_key); 1654 flags |= extent_op->flags_to_set; 1655 } 1656 ret = alloc_reserved_file_extent(trans, root, 1657 parent, ref_root, flags, 1658 ref->objectid, ref->offset, 1659 &ins, node->ref_mod); 1660 update_reserved_extents(root, ins.objectid, ins.offset, 0); 1661 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1662 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1663 node->num_bytes, parent, 1664 ref_root, ref->objectid, 1665 ref->offset, node->ref_mod, 1666 extent_op); 1667 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1668 ret = __btrfs_free_extent(trans, root, node->bytenr, 1669 node->num_bytes, parent, 1670 ref_root, ref->objectid, 1671 ref->offset, node->ref_mod, 1672 extent_op); 1673 } else { 1674 BUG(); 1675 } 1676 return ret; 1677 } 1678 1679 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1680 struct extent_buffer *leaf, 1681 struct btrfs_extent_item *ei) 1682 { 1683 u64 flags = btrfs_extent_flags(leaf, ei); 1684 if (extent_op->update_flags) { 1685 flags |= extent_op->flags_to_set; 1686 btrfs_set_extent_flags(leaf, ei, flags); 1687 } 1688 1689 if (extent_op->update_key) { 1690 struct btrfs_tree_block_info *bi; 1691 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1692 bi = (struct btrfs_tree_block_info *)(ei + 1); 1693 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1694 } 1695 } 1696 1697 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1698 struct btrfs_root *root, 1699 struct btrfs_delayed_ref_node *node, 1700 struct btrfs_delayed_extent_op *extent_op) 1701 { 1702 struct btrfs_key key; 1703 struct btrfs_path *path; 1704 struct btrfs_extent_item *ei; 1705 struct extent_buffer *leaf; 1706 u32 item_size; 1707 int ret; 1708 int err = 0; 1709 1710 path = btrfs_alloc_path(); 1711 if (!path) 1712 return -ENOMEM; 1713 1714 key.objectid = node->bytenr; 1715 key.type = BTRFS_EXTENT_ITEM_KEY; 1716 key.offset = node->num_bytes; 1717 1718 path->reada = 1; 1719 path->leave_spinning = 1; 1720 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1721 path, 0, 1); 1722 if (ret < 0) { 1723 err = ret; 1724 goto out; 1725 } 1726 if (ret > 0) { 1727 err = -EIO; 1728 goto out; 1729 } 1730 1731 leaf = path->nodes[0]; 1732 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1733 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1734 if (item_size < sizeof(*ei)) { 1735 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1736 path, (u64)-1, 0); 1737 if (ret < 0) { 1738 err = ret; 1739 goto out; 1740 } 1741 leaf = path->nodes[0]; 1742 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1743 } 1744 #endif 1745 BUG_ON(item_size < sizeof(*ei)); 1746 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1747 __run_delayed_extent_op(extent_op, leaf, ei); 1748 1749 btrfs_mark_buffer_dirty(leaf); 1750 out: 1751 btrfs_free_path(path); 1752 return err; 1753 } 1754 1755 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 1756 struct btrfs_root *root, 1757 struct btrfs_delayed_ref_node *node, 1758 struct btrfs_delayed_extent_op *extent_op, 1759 int insert_reserved) 1760 { 1761 int ret = 0; 1762 struct btrfs_delayed_tree_ref *ref; 1763 struct btrfs_key ins; 1764 u64 parent = 0; 1765 u64 ref_root = 0; 1766 1767 ins.objectid = node->bytenr; 1768 ins.offset = node->num_bytes; 1769 ins.type = BTRFS_EXTENT_ITEM_KEY; 1770 1771 ref = btrfs_delayed_node_to_tree_ref(node); 1772 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1773 parent = ref->parent; 1774 else 1775 ref_root = ref->root; 1776 1777 BUG_ON(node->ref_mod != 1); 1778 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1779 BUG_ON(!extent_op || !extent_op->update_flags || 1780 !extent_op->update_key); 1781 ret = alloc_reserved_tree_block(trans, root, 1782 parent, ref_root, 1783 extent_op->flags_to_set, 1784 &extent_op->key, 1785 ref->level, &ins); 1786 update_reserved_extents(root, ins.objectid, ins.offset, 0); 1787 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1788 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1789 node->num_bytes, parent, ref_root, 1790 ref->level, 0, 1, extent_op); 1791 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1792 ret = __btrfs_free_extent(trans, root, node->bytenr, 1793 node->num_bytes, parent, ref_root, 1794 ref->level, 0, 1, extent_op); 1795 } else { 1796 BUG(); 1797 } 1798 return ret; 1799 } 1800 1801 1802 /* helper function to actually process a single delayed ref entry */ 1803 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1804 struct btrfs_root *root, 1805 struct btrfs_delayed_ref_node *node, 1806 struct btrfs_delayed_extent_op *extent_op, 1807 int insert_reserved) 1808 { 1809 int ret; 1810 if (btrfs_delayed_ref_is_head(node)) { 1811 struct btrfs_delayed_ref_head *head; 1812 /* 1813 * we've hit the end of the chain and we were supposed 1814 * to insert this extent into the tree. But, it got 1815 * deleted before we ever needed to insert it, so all 1816 * we have to do is clean up the accounting 1817 */ 1818 BUG_ON(extent_op); 1819 head = btrfs_delayed_node_to_head(node); 1820 if (insert_reserved) { 1821 if (head->is_data) { 1822 ret = btrfs_del_csums(trans, root, 1823 node->bytenr, 1824 node->num_bytes); 1825 BUG_ON(ret); 1826 } 1827 btrfs_update_pinned_extents(root, node->bytenr, 1828 node->num_bytes, 1); 1829 update_reserved_extents(root, node->bytenr, 1830 node->num_bytes, 0); 1831 } 1832 mutex_unlock(&head->mutex); 1833 return 0; 1834 } 1835 1836 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1837 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1838 ret = run_delayed_tree_ref(trans, root, node, extent_op, 1839 insert_reserved); 1840 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1841 node->type == BTRFS_SHARED_DATA_REF_KEY) 1842 ret = run_delayed_data_ref(trans, root, node, extent_op, 1843 insert_reserved); 1844 else 1845 BUG(); 1846 return ret; 1847 } 1848 1849 static noinline struct btrfs_delayed_ref_node * 1850 select_delayed_ref(struct btrfs_delayed_ref_head *head) 1851 { 1852 struct rb_node *node; 1853 struct btrfs_delayed_ref_node *ref; 1854 int action = BTRFS_ADD_DELAYED_REF; 1855 again: 1856 /* 1857 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 1858 * this prevents ref count from going down to zero when 1859 * there still are pending delayed ref. 1860 */ 1861 node = rb_prev(&head->node.rb_node); 1862 while (1) { 1863 if (!node) 1864 break; 1865 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1866 rb_node); 1867 if (ref->bytenr != head->node.bytenr) 1868 break; 1869 if (ref->action == action) 1870 return ref; 1871 node = rb_prev(node); 1872 } 1873 if (action == BTRFS_ADD_DELAYED_REF) { 1874 action = BTRFS_DROP_DELAYED_REF; 1875 goto again; 1876 } 1877 return NULL; 1878 } 1879 1880 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 1881 struct btrfs_root *root, 1882 struct list_head *cluster) 1883 { 1884 struct btrfs_delayed_ref_root *delayed_refs; 1885 struct btrfs_delayed_ref_node *ref; 1886 struct btrfs_delayed_ref_head *locked_ref = NULL; 1887 struct btrfs_delayed_extent_op *extent_op; 1888 int ret; 1889 int count = 0; 1890 int must_insert_reserved = 0; 1891 1892 delayed_refs = &trans->transaction->delayed_refs; 1893 while (1) { 1894 if (!locked_ref) { 1895 /* pick a new head ref from the cluster list */ 1896 if (list_empty(cluster)) 1897 break; 1898 1899 locked_ref = list_entry(cluster->next, 1900 struct btrfs_delayed_ref_head, cluster); 1901 1902 /* grab the lock that says we are going to process 1903 * all the refs for this head */ 1904 ret = btrfs_delayed_ref_lock(trans, locked_ref); 1905 1906 /* 1907 * we may have dropped the spin lock to get the head 1908 * mutex lock, and that might have given someone else 1909 * time to free the head. If that's true, it has been 1910 * removed from our list and we can move on. 1911 */ 1912 if (ret == -EAGAIN) { 1913 locked_ref = NULL; 1914 count++; 1915 continue; 1916 } 1917 } 1918 1919 /* 1920 * record the must insert reserved flag before we 1921 * drop the spin lock. 1922 */ 1923 must_insert_reserved = locked_ref->must_insert_reserved; 1924 locked_ref->must_insert_reserved = 0; 1925 1926 extent_op = locked_ref->extent_op; 1927 locked_ref->extent_op = NULL; 1928 1929 /* 1930 * locked_ref is the head node, so we have to go one 1931 * node back for any delayed ref updates 1932 */ 1933 ref = select_delayed_ref(locked_ref); 1934 if (!ref) { 1935 /* All delayed refs have been processed, Go ahead 1936 * and send the head node to run_one_delayed_ref, 1937 * so that any accounting fixes can happen 1938 */ 1939 ref = &locked_ref->node; 1940 1941 if (extent_op && must_insert_reserved) { 1942 kfree(extent_op); 1943 extent_op = NULL; 1944 } 1945 1946 if (extent_op) { 1947 spin_unlock(&delayed_refs->lock); 1948 1949 ret = run_delayed_extent_op(trans, root, 1950 ref, extent_op); 1951 BUG_ON(ret); 1952 kfree(extent_op); 1953 1954 cond_resched(); 1955 spin_lock(&delayed_refs->lock); 1956 continue; 1957 } 1958 1959 list_del_init(&locked_ref->cluster); 1960 locked_ref = NULL; 1961 } 1962 1963 ref->in_tree = 0; 1964 rb_erase(&ref->rb_node, &delayed_refs->root); 1965 delayed_refs->num_entries--; 1966 1967 spin_unlock(&delayed_refs->lock); 1968 1969 ret = run_one_delayed_ref(trans, root, ref, extent_op, 1970 must_insert_reserved); 1971 BUG_ON(ret); 1972 1973 btrfs_put_delayed_ref(ref); 1974 kfree(extent_op); 1975 count++; 1976 1977 cond_resched(); 1978 spin_lock(&delayed_refs->lock); 1979 } 1980 return count; 1981 } 1982 1983 /* 1984 * this starts processing the delayed reference count updates and 1985 * extent insertions we have queued up so far. count can be 1986 * 0, which means to process everything in the tree at the start 1987 * of the run (but not newly added entries), or it can be some target 1988 * number you'd like to process. 1989 */ 1990 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 1991 struct btrfs_root *root, unsigned long count) 1992 { 1993 struct rb_node *node; 1994 struct btrfs_delayed_ref_root *delayed_refs; 1995 struct btrfs_delayed_ref_node *ref; 1996 struct list_head cluster; 1997 int ret; 1998 int run_all = count == (unsigned long)-1; 1999 int run_most = 0; 2000 2001 if (root == root->fs_info->extent_root) 2002 root = root->fs_info->tree_root; 2003 2004 delayed_refs = &trans->transaction->delayed_refs; 2005 INIT_LIST_HEAD(&cluster); 2006 again: 2007 spin_lock(&delayed_refs->lock); 2008 if (count == 0) { 2009 count = delayed_refs->num_entries * 2; 2010 run_most = 1; 2011 } 2012 while (1) { 2013 if (!(run_all || run_most) && 2014 delayed_refs->num_heads_ready < 64) 2015 break; 2016 2017 /* 2018 * go find something we can process in the rbtree. We start at 2019 * the beginning of the tree, and then build a cluster 2020 * of refs to process starting at the first one we are able to 2021 * lock 2022 */ 2023 ret = btrfs_find_ref_cluster(trans, &cluster, 2024 delayed_refs->run_delayed_start); 2025 if (ret) 2026 break; 2027 2028 ret = run_clustered_refs(trans, root, &cluster); 2029 BUG_ON(ret < 0); 2030 2031 count -= min_t(unsigned long, ret, count); 2032 2033 if (count == 0) 2034 break; 2035 } 2036 2037 if (run_all) { 2038 node = rb_first(&delayed_refs->root); 2039 if (!node) 2040 goto out; 2041 count = (unsigned long)-1; 2042 2043 while (node) { 2044 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2045 rb_node); 2046 if (btrfs_delayed_ref_is_head(ref)) { 2047 struct btrfs_delayed_ref_head *head; 2048 2049 head = btrfs_delayed_node_to_head(ref); 2050 atomic_inc(&ref->refs); 2051 2052 spin_unlock(&delayed_refs->lock); 2053 mutex_lock(&head->mutex); 2054 mutex_unlock(&head->mutex); 2055 2056 btrfs_put_delayed_ref(ref); 2057 cond_resched(); 2058 goto again; 2059 } 2060 node = rb_next(node); 2061 } 2062 spin_unlock(&delayed_refs->lock); 2063 schedule_timeout(1); 2064 goto again; 2065 } 2066 out: 2067 spin_unlock(&delayed_refs->lock); 2068 return 0; 2069 } 2070 2071 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2072 struct btrfs_root *root, 2073 u64 bytenr, u64 num_bytes, u64 flags, 2074 int is_data) 2075 { 2076 struct btrfs_delayed_extent_op *extent_op; 2077 int ret; 2078 2079 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2080 if (!extent_op) 2081 return -ENOMEM; 2082 2083 extent_op->flags_to_set = flags; 2084 extent_op->update_flags = 1; 2085 extent_op->update_key = 0; 2086 extent_op->is_data = is_data ? 1 : 0; 2087 2088 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2089 if (ret) 2090 kfree(extent_op); 2091 return ret; 2092 } 2093 2094 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2095 struct btrfs_root *root, 2096 struct btrfs_path *path, 2097 u64 objectid, u64 offset, u64 bytenr) 2098 { 2099 struct btrfs_delayed_ref_head *head; 2100 struct btrfs_delayed_ref_node *ref; 2101 struct btrfs_delayed_data_ref *data_ref; 2102 struct btrfs_delayed_ref_root *delayed_refs; 2103 struct rb_node *node; 2104 int ret = 0; 2105 2106 ret = -ENOENT; 2107 delayed_refs = &trans->transaction->delayed_refs; 2108 spin_lock(&delayed_refs->lock); 2109 head = btrfs_find_delayed_ref_head(trans, bytenr); 2110 if (!head) 2111 goto out; 2112 2113 if (!mutex_trylock(&head->mutex)) { 2114 atomic_inc(&head->node.refs); 2115 spin_unlock(&delayed_refs->lock); 2116 2117 btrfs_release_path(root->fs_info->extent_root, path); 2118 2119 mutex_lock(&head->mutex); 2120 mutex_unlock(&head->mutex); 2121 btrfs_put_delayed_ref(&head->node); 2122 return -EAGAIN; 2123 } 2124 2125 node = rb_prev(&head->node.rb_node); 2126 if (!node) 2127 goto out_unlock; 2128 2129 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2130 2131 if (ref->bytenr != bytenr) 2132 goto out_unlock; 2133 2134 ret = 1; 2135 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2136 goto out_unlock; 2137 2138 data_ref = btrfs_delayed_node_to_data_ref(ref); 2139 2140 node = rb_prev(node); 2141 if (node) { 2142 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2143 if (ref->bytenr == bytenr) 2144 goto out_unlock; 2145 } 2146 2147 if (data_ref->root != root->root_key.objectid || 2148 data_ref->objectid != objectid || data_ref->offset != offset) 2149 goto out_unlock; 2150 2151 ret = 0; 2152 out_unlock: 2153 mutex_unlock(&head->mutex); 2154 out: 2155 spin_unlock(&delayed_refs->lock); 2156 return ret; 2157 } 2158 2159 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2160 struct btrfs_root *root, 2161 struct btrfs_path *path, 2162 u64 objectid, u64 offset, u64 bytenr) 2163 { 2164 struct btrfs_root *extent_root = root->fs_info->extent_root; 2165 struct extent_buffer *leaf; 2166 struct btrfs_extent_data_ref *ref; 2167 struct btrfs_extent_inline_ref *iref; 2168 struct btrfs_extent_item *ei; 2169 struct btrfs_key key; 2170 u32 item_size; 2171 int ret; 2172 2173 key.objectid = bytenr; 2174 key.offset = (u64)-1; 2175 key.type = BTRFS_EXTENT_ITEM_KEY; 2176 2177 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2178 if (ret < 0) 2179 goto out; 2180 BUG_ON(ret == 0); 2181 2182 ret = -ENOENT; 2183 if (path->slots[0] == 0) 2184 goto out; 2185 2186 path->slots[0]--; 2187 leaf = path->nodes[0]; 2188 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2189 2190 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2191 goto out; 2192 2193 ret = 1; 2194 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2195 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2196 if (item_size < sizeof(*ei)) { 2197 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2198 goto out; 2199 } 2200 #endif 2201 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2202 2203 if (item_size != sizeof(*ei) + 2204 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2205 goto out; 2206 2207 if (btrfs_extent_generation(leaf, ei) <= 2208 btrfs_root_last_snapshot(&root->root_item)) 2209 goto out; 2210 2211 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2212 if (btrfs_extent_inline_ref_type(leaf, iref) != 2213 BTRFS_EXTENT_DATA_REF_KEY) 2214 goto out; 2215 2216 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2217 if (btrfs_extent_refs(leaf, ei) != 2218 btrfs_extent_data_ref_count(leaf, ref) || 2219 btrfs_extent_data_ref_root(leaf, ref) != 2220 root->root_key.objectid || 2221 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2222 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2223 goto out; 2224 2225 ret = 0; 2226 out: 2227 return ret; 2228 } 2229 2230 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2231 struct btrfs_root *root, 2232 u64 objectid, u64 offset, u64 bytenr) 2233 { 2234 struct btrfs_path *path; 2235 int ret; 2236 int ret2; 2237 2238 path = btrfs_alloc_path(); 2239 if (!path) 2240 return -ENOENT; 2241 2242 do { 2243 ret = check_committed_ref(trans, root, path, objectid, 2244 offset, bytenr); 2245 if (ret && ret != -ENOENT) 2246 goto out; 2247 2248 ret2 = check_delayed_ref(trans, root, path, objectid, 2249 offset, bytenr); 2250 } while (ret2 == -EAGAIN); 2251 2252 if (ret2 && ret2 != -ENOENT) { 2253 ret = ret2; 2254 goto out; 2255 } 2256 2257 if (ret != -ENOENT || ret2 != -ENOENT) 2258 ret = 0; 2259 out: 2260 btrfs_free_path(path); 2261 return ret; 2262 } 2263 2264 #if 0 2265 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2266 struct extent_buffer *buf, u32 nr_extents) 2267 { 2268 struct btrfs_key key; 2269 struct btrfs_file_extent_item *fi; 2270 u64 root_gen; 2271 u32 nritems; 2272 int i; 2273 int level; 2274 int ret = 0; 2275 int shared = 0; 2276 2277 if (!root->ref_cows) 2278 return 0; 2279 2280 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 2281 shared = 0; 2282 root_gen = root->root_key.offset; 2283 } else { 2284 shared = 1; 2285 root_gen = trans->transid - 1; 2286 } 2287 2288 level = btrfs_header_level(buf); 2289 nritems = btrfs_header_nritems(buf); 2290 2291 if (level == 0) { 2292 struct btrfs_leaf_ref *ref; 2293 struct btrfs_extent_info *info; 2294 2295 ref = btrfs_alloc_leaf_ref(root, nr_extents); 2296 if (!ref) { 2297 ret = -ENOMEM; 2298 goto out; 2299 } 2300 2301 ref->root_gen = root_gen; 2302 ref->bytenr = buf->start; 2303 ref->owner = btrfs_header_owner(buf); 2304 ref->generation = btrfs_header_generation(buf); 2305 ref->nritems = nr_extents; 2306 info = ref->extents; 2307 2308 for (i = 0; nr_extents > 0 && i < nritems; i++) { 2309 u64 disk_bytenr; 2310 btrfs_item_key_to_cpu(buf, &key, i); 2311 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2312 continue; 2313 fi = btrfs_item_ptr(buf, i, 2314 struct btrfs_file_extent_item); 2315 if (btrfs_file_extent_type(buf, fi) == 2316 BTRFS_FILE_EXTENT_INLINE) 2317 continue; 2318 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2319 if (disk_bytenr == 0) 2320 continue; 2321 2322 info->bytenr = disk_bytenr; 2323 info->num_bytes = 2324 btrfs_file_extent_disk_num_bytes(buf, fi); 2325 info->objectid = key.objectid; 2326 info->offset = key.offset; 2327 info++; 2328 } 2329 2330 ret = btrfs_add_leaf_ref(root, ref, shared); 2331 if (ret == -EEXIST && shared) { 2332 struct btrfs_leaf_ref *old; 2333 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 2334 BUG_ON(!old); 2335 btrfs_remove_leaf_ref(root, old); 2336 btrfs_free_leaf_ref(root, old); 2337 ret = btrfs_add_leaf_ref(root, ref, shared); 2338 } 2339 WARN_ON(ret); 2340 btrfs_free_leaf_ref(root, ref); 2341 } 2342 out: 2343 return ret; 2344 } 2345 2346 /* when a block goes through cow, we update the reference counts of 2347 * everything that block points to. The internal pointers of the block 2348 * can be in just about any order, and it is likely to have clusters of 2349 * things that are close together and clusters of things that are not. 2350 * 2351 * To help reduce the seeks that come with updating all of these reference 2352 * counts, sort them by byte number before actual updates are done. 2353 * 2354 * struct refsort is used to match byte number to slot in the btree block. 2355 * we sort based on the byte number and then use the slot to actually 2356 * find the item. 2357 * 2358 * struct refsort is smaller than strcut btrfs_item and smaller than 2359 * struct btrfs_key_ptr. Since we're currently limited to the page size 2360 * for a btree block, there's no way for a kmalloc of refsorts for a 2361 * single node to be bigger than a page. 2362 */ 2363 struct refsort { 2364 u64 bytenr; 2365 u32 slot; 2366 }; 2367 2368 /* 2369 * for passing into sort() 2370 */ 2371 static int refsort_cmp(const void *a_void, const void *b_void) 2372 { 2373 const struct refsort *a = a_void; 2374 const struct refsort *b = b_void; 2375 2376 if (a->bytenr < b->bytenr) 2377 return -1; 2378 if (a->bytenr > b->bytenr) 2379 return 1; 2380 return 0; 2381 } 2382 #endif 2383 2384 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2385 struct btrfs_root *root, 2386 struct extent_buffer *buf, 2387 int full_backref, int inc) 2388 { 2389 u64 bytenr; 2390 u64 num_bytes; 2391 u64 parent; 2392 u64 ref_root; 2393 u32 nritems; 2394 struct btrfs_key key; 2395 struct btrfs_file_extent_item *fi; 2396 int i; 2397 int level; 2398 int ret = 0; 2399 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2400 u64, u64, u64, u64, u64, u64); 2401 2402 ref_root = btrfs_header_owner(buf); 2403 nritems = btrfs_header_nritems(buf); 2404 level = btrfs_header_level(buf); 2405 2406 if (!root->ref_cows && level == 0) 2407 return 0; 2408 2409 if (inc) 2410 process_func = btrfs_inc_extent_ref; 2411 else 2412 process_func = btrfs_free_extent; 2413 2414 if (full_backref) 2415 parent = buf->start; 2416 else 2417 parent = 0; 2418 2419 for (i = 0; i < nritems; i++) { 2420 if (level == 0) { 2421 btrfs_item_key_to_cpu(buf, &key, i); 2422 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2423 continue; 2424 fi = btrfs_item_ptr(buf, i, 2425 struct btrfs_file_extent_item); 2426 if (btrfs_file_extent_type(buf, fi) == 2427 BTRFS_FILE_EXTENT_INLINE) 2428 continue; 2429 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2430 if (bytenr == 0) 2431 continue; 2432 2433 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2434 key.offset -= btrfs_file_extent_offset(buf, fi); 2435 ret = process_func(trans, root, bytenr, num_bytes, 2436 parent, ref_root, key.objectid, 2437 key.offset); 2438 if (ret) 2439 goto fail; 2440 } else { 2441 bytenr = btrfs_node_blockptr(buf, i); 2442 num_bytes = btrfs_level_size(root, level - 1); 2443 ret = process_func(trans, root, bytenr, num_bytes, 2444 parent, ref_root, level - 1, 0); 2445 if (ret) 2446 goto fail; 2447 } 2448 } 2449 return 0; 2450 fail: 2451 BUG(); 2452 return ret; 2453 } 2454 2455 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2456 struct extent_buffer *buf, int full_backref) 2457 { 2458 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2459 } 2460 2461 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2462 struct extent_buffer *buf, int full_backref) 2463 { 2464 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2465 } 2466 2467 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2468 struct btrfs_root *root, 2469 struct btrfs_path *path, 2470 struct btrfs_block_group_cache *cache) 2471 { 2472 int ret; 2473 struct btrfs_root *extent_root = root->fs_info->extent_root; 2474 unsigned long bi; 2475 struct extent_buffer *leaf; 2476 2477 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2478 if (ret < 0) 2479 goto fail; 2480 BUG_ON(ret); 2481 2482 leaf = path->nodes[0]; 2483 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2484 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2485 btrfs_mark_buffer_dirty(leaf); 2486 btrfs_release_path(extent_root, path); 2487 fail: 2488 if (ret) 2489 return ret; 2490 return 0; 2491 2492 } 2493 2494 static struct btrfs_block_group_cache * 2495 next_block_group(struct btrfs_root *root, 2496 struct btrfs_block_group_cache *cache) 2497 { 2498 struct rb_node *node; 2499 spin_lock(&root->fs_info->block_group_cache_lock); 2500 node = rb_next(&cache->cache_node); 2501 btrfs_put_block_group(cache); 2502 if (node) { 2503 cache = rb_entry(node, struct btrfs_block_group_cache, 2504 cache_node); 2505 atomic_inc(&cache->count); 2506 } else 2507 cache = NULL; 2508 spin_unlock(&root->fs_info->block_group_cache_lock); 2509 return cache; 2510 } 2511 2512 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2513 struct btrfs_root *root) 2514 { 2515 struct btrfs_block_group_cache *cache; 2516 int err = 0; 2517 struct btrfs_path *path; 2518 u64 last = 0; 2519 2520 path = btrfs_alloc_path(); 2521 if (!path) 2522 return -ENOMEM; 2523 2524 while (1) { 2525 if (last == 0) { 2526 err = btrfs_run_delayed_refs(trans, root, 2527 (unsigned long)-1); 2528 BUG_ON(err); 2529 } 2530 2531 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2532 while (cache) { 2533 if (cache->dirty) 2534 break; 2535 cache = next_block_group(root, cache); 2536 } 2537 if (!cache) { 2538 if (last == 0) 2539 break; 2540 last = 0; 2541 continue; 2542 } 2543 2544 cache->dirty = 0; 2545 last = cache->key.objectid + cache->key.offset; 2546 2547 err = write_one_cache_group(trans, root, path, cache); 2548 BUG_ON(err); 2549 btrfs_put_block_group(cache); 2550 } 2551 2552 btrfs_free_path(path); 2553 return 0; 2554 } 2555 2556 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2557 { 2558 struct btrfs_block_group_cache *block_group; 2559 int readonly = 0; 2560 2561 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2562 if (!block_group || block_group->ro) 2563 readonly = 1; 2564 if (block_group) 2565 btrfs_put_block_group(block_group); 2566 return readonly; 2567 } 2568 2569 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2570 u64 total_bytes, u64 bytes_used, 2571 struct btrfs_space_info **space_info) 2572 { 2573 struct btrfs_space_info *found; 2574 2575 found = __find_space_info(info, flags); 2576 if (found) { 2577 spin_lock(&found->lock); 2578 found->total_bytes += total_bytes; 2579 found->bytes_used += bytes_used; 2580 found->full = 0; 2581 spin_unlock(&found->lock); 2582 *space_info = found; 2583 return 0; 2584 } 2585 found = kzalloc(sizeof(*found), GFP_NOFS); 2586 if (!found) 2587 return -ENOMEM; 2588 2589 INIT_LIST_HEAD(&found->block_groups); 2590 init_rwsem(&found->groups_sem); 2591 spin_lock_init(&found->lock); 2592 found->flags = flags; 2593 found->total_bytes = total_bytes; 2594 found->bytes_used = bytes_used; 2595 found->bytes_pinned = 0; 2596 found->bytes_reserved = 0; 2597 found->bytes_readonly = 0; 2598 found->bytes_delalloc = 0; 2599 found->full = 0; 2600 found->force_alloc = 0; 2601 *space_info = found; 2602 list_add_rcu(&found->list, &info->space_info); 2603 atomic_set(&found->caching_threads, 0); 2604 return 0; 2605 } 2606 2607 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2608 { 2609 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2610 BTRFS_BLOCK_GROUP_RAID1 | 2611 BTRFS_BLOCK_GROUP_RAID10 | 2612 BTRFS_BLOCK_GROUP_DUP); 2613 if (extra_flags) { 2614 if (flags & BTRFS_BLOCK_GROUP_DATA) 2615 fs_info->avail_data_alloc_bits |= extra_flags; 2616 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2617 fs_info->avail_metadata_alloc_bits |= extra_flags; 2618 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2619 fs_info->avail_system_alloc_bits |= extra_flags; 2620 } 2621 } 2622 2623 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 2624 { 2625 spin_lock(&cache->space_info->lock); 2626 spin_lock(&cache->lock); 2627 if (!cache->ro) { 2628 cache->space_info->bytes_readonly += cache->key.offset - 2629 btrfs_block_group_used(&cache->item); 2630 cache->ro = 1; 2631 } 2632 spin_unlock(&cache->lock); 2633 spin_unlock(&cache->space_info->lock); 2634 } 2635 2636 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2637 { 2638 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2639 2640 if (num_devices == 1) 2641 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2642 if (num_devices < 4) 2643 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2644 2645 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2646 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2647 BTRFS_BLOCK_GROUP_RAID10))) { 2648 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2649 } 2650 2651 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2652 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2653 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2654 } 2655 2656 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2657 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2658 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2659 (flags & BTRFS_BLOCK_GROUP_DUP))) 2660 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2661 return flags; 2662 } 2663 2664 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 2665 { 2666 struct btrfs_fs_info *info = root->fs_info; 2667 u64 alloc_profile; 2668 2669 if (data) { 2670 alloc_profile = info->avail_data_alloc_bits & 2671 info->data_alloc_profile; 2672 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 2673 } else if (root == root->fs_info->chunk_root) { 2674 alloc_profile = info->avail_system_alloc_bits & 2675 info->system_alloc_profile; 2676 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 2677 } else { 2678 alloc_profile = info->avail_metadata_alloc_bits & 2679 info->metadata_alloc_profile; 2680 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 2681 } 2682 2683 return btrfs_reduce_alloc_profile(root, data); 2684 } 2685 2686 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2687 { 2688 u64 alloc_target; 2689 2690 alloc_target = btrfs_get_alloc_profile(root, 1); 2691 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2692 alloc_target); 2693 } 2694 2695 /* 2696 * for now this just makes sure we have at least 5% of our metadata space free 2697 * for use. 2698 */ 2699 int btrfs_check_metadata_free_space(struct btrfs_root *root) 2700 { 2701 struct btrfs_fs_info *info = root->fs_info; 2702 struct btrfs_space_info *meta_sinfo; 2703 u64 alloc_target, thresh; 2704 int committed = 0, ret; 2705 2706 /* get the space info for where the metadata will live */ 2707 alloc_target = btrfs_get_alloc_profile(root, 0); 2708 meta_sinfo = __find_space_info(info, alloc_target); 2709 2710 again: 2711 spin_lock(&meta_sinfo->lock); 2712 if (!meta_sinfo->full) 2713 thresh = meta_sinfo->total_bytes * 80; 2714 else 2715 thresh = meta_sinfo->total_bytes * 95; 2716 2717 do_div(thresh, 100); 2718 2719 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2720 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { 2721 struct btrfs_trans_handle *trans; 2722 if (!meta_sinfo->full) { 2723 meta_sinfo->force_alloc = 1; 2724 spin_unlock(&meta_sinfo->lock); 2725 2726 trans = btrfs_start_transaction(root, 1); 2727 if (!trans) 2728 return -ENOMEM; 2729 2730 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2731 2 * 1024 * 1024, alloc_target, 0); 2732 btrfs_end_transaction(trans, root); 2733 goto again; 2734 } 2735 spin_unlock(&meta_sinfo->lock); 2736 2737 if (!committed) { 2738 committed = 1; 2739 trans = btrfs_join_transaction(root, 1); 2740 if (!trans) 2741 return -ENOMEM; 2742 ret = btrfs_commit_transaction(trans, root); 2743 if (ret) 2744 return ret; 2745 goto again; 2746 } 2747 return -ENOSPC; 2748 } 2749 spin_unlock(&meta_sinfo->lock); 2750 2751 return 0; 2752 } 2753 2754 /* 2755 * This will check the space that the inode allocates from to make sure we have 2756 * enough space for bytes. 2757 */ 2758 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2759 u64 bytes) 2760 { 2761 struct btrfs_space_info *data_sinfo; 2762 int ret = 0, committed = 0; 2763 2764 /* make sure bytes are sectorsize aligned */ 2765 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2766 2767 data_sinfo = BTRFS_I(inode)->space_info; 2768 again: 2769 /* make sure we have enough space to handle the data first */ 2770 spin_lock(&data_sinfo->lock); 2771 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 2772 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 2773 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 2774 data_sinfo->bytes_may_use < bytes) { 2775 struct btrfs_trans_handle *trans; 2776 2777 /* 2778 * if we don't have enough free bytes in this space then we need 2779 * to alloc a new chunk. 2780 */ 2781 if (!data_sinfo->full) { 2782 u64 alloc_target; 2783 2784 data_sinfo->force_alloc = 1; 2785 spin_unlock(&data_sinfo->lock); 2786 2787 alloc_target = btrfs_get_alloc_profile(root, 1); 2788 trans = btrfs_start_transaction(root, 1); 2789 if (!trans) 2790 return -ENOMEM; 2791 2792 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2793 bytes + 2 * 1024 * 1024, 2794 alloc_target, 0); 2795 btrfs_end_transaction(trans, root); 2796 if (ret) 2797 return ret; 2798 goto again; 2799 } 2800 spin_unlock(&data_sinfo->lock); 2801 2802 /* commit the current transaction and try again */ 2803 if (!committed) { 2804 committed = 1; 2805 trans = btrfs_join_transaction(root, 1); 2806 if (!trans) 2807 return -ENOMEM; 2808 ret = btrfs_commit_transaction(trans, root); 2809 if (ret) 2810 return ret; 2811 goto again; 2812 } 2813 2814 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2815 ", %llu bytes_used, %llu bytes_reserved, " 2816 "%llu bytes_pinned, %llu bytes_readonly, %llu may use " 2817 "%llu total\n", (unsigned long long)bytes, 2818 (unsigned long long)data_sinfo->bytes_delalloc, 2819 (unsigned long long)data_sinfo->bytes_used, 2820 (unsigned long long)data_sinfo->bytes_reserved, 2821 (unsigned long long)data_sinfo->bytes_pinned, 2822 (unsigned long long)data_sinfo->bytes_readonly, 2823 (unsigned long long)data_sinfo->bytes_may_use, 2824 (unsigned long long)data_sinfo->total_bytes); 2825 return -ENOSPC; 2826 } 2827 data_sinfo->bytes_may_use += bytes; 2828 BTRFS_I(inode)->reserved_bytes += bytes; 2829 spin_unlock(&data_sinfo->lock); 2830 2831 return btrfs_check_metadata_free_space(root); 2832 } 2833 2834 /* 2835 * if there was an error for whatever reason after calling 2836 * btrfs_check_data_free_space, call this so we can cleanup the counters. 2837 */ 2838 void btrfs_free_reserved_data_space(struct btrfs_root *root, 2839 struct inode *inode, u64 bytes) 2840 { 2841 struct btrfs_space_info *data_sinfo; 2842 2843 /* make sure bytes are sectorsize aligned */ 2844 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2845 2846 data_sinfo = BTRFS_I(inode)->space_info; 2847 spin_lock(&data_sinfo->lock); 2848 data_sinfo->bytes_may_use -= bytes; 2849 BTRFS_I(inode)->reserved_bytes -= bytes; 2850 spin_unlock(&data_sinfo->lock); 2851 } 2852 2853 /* called when we are adding a delalloc extent to the inode's io_tree */ 2854 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, 2855 u64 bytes) 2856 { 2857 struct btrfs_space_info *data_sinfo; 2858 2859 /* get the space info for where this inode will be storing its data */ 2860 data_sinfo = BTRFS_I(inode)->space_info; 2861 2862 /* make sure we have enough space to handle the data first */ 2863 spin_lock(&data_sinfo->lock); 2864 data_sinfo->bytes_delalloc += bytes; 2865 2866 /* 2867 * we are adding a delalloc extent without calling 2868 * btrfs_check_data_free_space first. This happens on a weird 2869 * writepage condition, but shouldn't hurt our accounting 2870 */ 2871 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { 2872 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; 2873 BTRFS_I(inode)->reserved_bytes = 0; 2874 } else { 2875 data_sinfo->bytes_may_use -= bytes; 2876 BTRFS_I(inode)->reserved_bytes -= bytes; 2877 } 2878 2879 spin_unlock(&data_sinfo->lock); 2880 } 2881 2882 /* called when we are clearing an delalloc extent from the inode's io_tree */ 2883 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2884 u64 bytes) 2885 { 2886 struct btrfs_space_info *info; 2887 2888 info = BTRFS_I(inode)->space_info; 2889 2890 spin_lock(&info->lock); 2891 info->bytes_delalloc -= bytes; 2892 spin_unlock(&info->lock); 2893 } 2894 2895 static void force_metadata_allocation(struct btrfs_fs_info *info) 2896 { 2897 struct list_head *head = &info->space_info; 2898 struct btrfs_space_info *found; 2899 2900 rcu_read_lock(); 2901 list_for_each_entry_rcu(found, head, list) { 2902 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 2903 found->force_alloc = 1; 2904 } 2905 rcu_read_unlock(); 2906 } 2907 2908 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 2909 struct btrfs_root *extent_root, u64 alloc_bytes, 2910 u64 flags, int force) 2911 { 2912 struct btrfs_space_info *space_info; 2913 struct btrfs_fs_info *fs_info = extent_root->fs_info; 2914 u64 thresh; 2915 int ret = 0; 2916 2917 mutex_lock(&fs_info->chunk_mutex); 2918 2919 flags = btrfs_reduce_alloc_profile(extent_root, flags); 2920 2921 space_info = __find_space_info(extent_root->fs_info, flags); 2922 if (!space_info) { 2923 ret = update_space_info(extent_root->fs_info, flags, 2924 0, 0, &space_info); 2925 BUG_ON(ret); 2926 } 2927 BUG_ON(!space_info); 2928 2929 spin_lock(&space_info->lock); 2930 if (space_info->force_alloc) { 2931 force = 1; 2932 space_info->force_alloc = 0; 2933 } 2934 if (space_info->full) { 2935 spin_unlock(&space_info->lock); 2936 goto out; 2937 } 2938 2939 thresh = space_info->total_bytes - space_info->bytes_readonly; 2940 thresh = div_factor(thresh, 6); 2941 if (!force && 2942 (space_info->bytes_used + space_info->bytes_pinned + 2943 space_info->bytes_reserved + alloc_bytes) < thresh) { 2944 spin_unlock(&space_info->lock); 2945 goto out; 2946 } 2947 spin_unlock(&space_info->lock); 2948 2949 /* 2950 * if we're doing a data chunk, go ahead and make sure that 2951 * we keep a reasonable number of metadata chunks allocated in the 2952 * FS as well. 2953 */ 2954 if (flags & BTRFS_BLOCK_GROUP_DATA) { 2955 fs_info->data_chunk_allocations++; 2956 if (!(fs_info->data_chunk_allocations % 2957 fs_info->metadata_ratio)) 2958 force_metadata_allocation(fs_info); 2959 } 2960 2961 ret = btrfs_alloc_chunk(trans, extent_root, flags); 2962 if (ret) 2963 space_info->full = 1; 2964 out: 2965 mutex_unlock(&extent_root->fs_info->chunk_mutex); 2966 return ret; 2967 } 2968 2969 static int update_block_group(struct btrfs_trans_handle *trans, 2970 struct btrfs_root *root, 2971 u64 bytenr, u64 num_bytes, int alloc, 2972 int mark_free) 2973 { 2974 struct btrfs_block_group_cache *cache; 2975 struct btrfs_fs_info *info = root->fs_info; 2976 u64 total = num_bytes; 2977 u64 old_val; 2978 u64 byte_in_group; 2979 2980 /* block accounting for super block */ 2981 spin_lock(&info->delalloc_lock); 2982 old_val = btrfs_super_bytes_used(&info->super_copy); 2983 if (alloc) 2984 old_val += num_bytes; 2985 else 2986 old_val -= num_bytes; 2987 btrfs_set_super_bytes_used(&info->super_copy, old_val); 2988 2989 /* block accounting for root item */ 2990 old_val = btrfs_root_used(&root->root_item); 2991 if (alloc) 2992 old_val += num_bytes; 2993 else 2994 old_val -= num_bytes; 2995 btrfs_set_root_used(&root->root_item, old_val); 2996 spin_unlock(&info->delalloc_lock); 2997 2998 while (total) { 2999 cache = btrfs_lookup_block_group(info, bytenr); 3000 if (!cache) 3001 return -1; 3002 byte_in_group = bytenr - cache->key.objectid; 3003 WARN_ON(byte_in_group > cache->key.offset); 3004 3005 spin_lock(&cache->space_info->lock); 3006 spin_lock(&cache->lock); 3007 cache->dirty = 1; 3008 old_val = btrfs_block_group_used(&cache->item); 3009 num_bytes = min(total, cache->key.offset - byte_in_group); 3010 if (alloc) { 3011 old_val += num_bytes; 3012 cache->space_info->bytes_used += num_bytes; 3013 if (cache->ro) 3014 cache->space_info->bytes_readonly -= num_bytes; 3015 btrfs_set_block_group_used(&cache->item, old_val); 3016 spin_unlock(&cache->lock); 3017 spin_unlock(&cache->space_info->lock); 3018 } else { 3019 old_val -= num_bytes; 3020 cache->space_info->bytes_used -= num_bytes; 3021 if (cache->ro) 3022 cache->space_info->bytes_readonly += num_bytes; 3023 btrfs_set_block_group_used(&cache->item, old_val); 3024 spin_unlock(&cache->lock); 3025 spin_unlock(&cache->space_info->lock); 3026 if (mark_free) { 3027 int ret; 3028 3029 ret = btrfs_discard_extent(root, bytenr, 3030 num_bytes); 3031 WARN_ON(ret); 3032 3033 ret = btrfs_add_free_space(cache, bytenr, 3034 num_bytes); 3035 WARN_ON(ret); 3036 } 3037 } 3038 btrfs_put_block_group(cache); 3039 total -= num_bytes; 3040 bytenr += num_bytes; 3041 } 3042 return 0; 3043 } 3044 3045 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 3046 { 3047 struct btrfs_block_group_cache *cache; 3048 u64 bytenr; 3049 3050 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 3051 if (!cache) 3052 return 0; 3053 3054 bytenr = cache->key.objectid; 3055 btrfs_put_block_group(cache); 3056 3057 return bytenr; 3058 } 3059 3060 int btrfs_update_pinned_extents(struct btrfs_root *root, 3061 u64 bytenr, u64 num, int pin) 3062 { 3063 u64 len; 3064 struct btrfs_block_group_cache *cache; 3065 struct btrfs_fs_info *fs_info = root->fs_info; 3066 3067 if (pin) 3068 set_extent_dirty(&fs_info->pinned_extents, 3069 bytenr, bytenr + num - 1, GFP_NOFS); 3070 3071 while (num > 0) { 3072 cache = btrfs_lookup_block_group(fs_info, bytenr); 3073 BUG_ON(!cache); 3074 len = min(num, cache->key.offset - 3075 (bytenr - cache->key.objectid)); 3076 if (pin) { 3077 spin_lock(&cache->space_info->lock); 3078 spin_lock(&cache->lock); 3079 cache->pinned += len; 3080 cache->space_info->bytes_pinned += len; 3081 spin_unlock(&cache->lock); 3082 spin_unlock(&cache->space_info->lock); 3083 fs_info->total_pinned += len; 3084 } else { 3085 int unpin = 0; 3086 3087 /* 3088 * in order to not race with the block group caching, we 3089 * only want to unpin the extent if we are cached. If 3090 * we aren't cached, we want to start async caching this 3091 * block group so we can free the extent the next time 3092 * around. 3093 */ 3094 spin_lock(&cache->space_info->lock); 3095 spin_lock(&cache->lock); 3096 unpin = (cache->cached == BTRFS_CACHE_FINISHED); 3097 if (likely(unpin)) { 3098 cache->pinned -= len; 3099 cache->space_info->bytes_pinned -= len; 3100 fs_info->total_pinned -= len; 3101 } 3102 spin_unlock(&cache->lock); 3103 spin_unlock(&cache->space_info->lock); 3104 3105 if (likely(unpin)) 3106 clear_extent_dirty(&fs_info->pinned_extents, 3107 bytenr, bytenr + len -1, 3108 GFP_NOFS); 3109 else 3110 cache_block_group(cache); 3111 3112 if (unpin) 3113 btrfs_add_free_space(cache, bytenr, len); 3114 } 3115 btrfs_put_block_group(cache); 3116 bytenr += len; 3117 num -= len; 3118 } 3119 return 0; 3120 } 3121 3122 static int update_reserved_extents(struct btrfs_root *root, 3123 u64 bytenr, u64 num, int reserve) 3124 { 3125 u64 len; 3126 struct btrfs_block_group_cache *cache; 3127 struct btrfs_fs_info *fs_info = root->fs_info; 3128 3129 while (num > 0) { 3130 cache = btrfs_lookup_block_group(fs_info, bytenr); 3131 BUG_ON(!cache); 3132 len = min(num, cache->key.offset - 3133 (bytenr - cache->key.objectid)); 3134 3135 spin_lock(&cache->space_info->lock); 3136 spin_lock(&cache->lock); 3137 if (reserve) { 3138 cache->reserved += len; 3139 cache->space_info->bytes_reserved += len; 3140 } else { 3141 cache->reserved -= len; 3142 cache->space_info->bytes_reserved -= len; 3143 } 3144 spin_unlock(&cache->lock); 3145 spin_unlock(&cache->space_info->lock); 3146 btrfs_put_block_group(cache); 3147 bytenr += len; 3148 num -= len; 3149 } 3150 return 0; 3151 } 3152 3153 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 3154 { 3155 u64 last = 0; 3156 u64 start; 3157 u64 end; 3158 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 3159 int ret; 3160 3161 while (1) { 3162 ret = find_first_extent_bit(pinned_extents, last, 3163 &start, &end, EXTENT_DIRTY); 3164 if (ret) 3165 break; 3166 3167 set_extent_dirty(copy, start, end, GFP_NOFS); 3168 last = end + 1; 3169 } 3170 return 0; 3171 } 3172 3173 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3174 struct btrfs_root *root, 3175 struct extent_io_tree *unpin) 3176 { 3177 u64 start; 3178 u64 end; 3179 int ret; 3180 3181 while (1) { 3182 ret = find_first_extent_bit(unpin, 0, &start, &end, 3183 EXTENT_DIRTY); 3184 if (ret) 3185 break; 3186 3187 ret = btrfs_discard_extent(root, start, end + 1 - start); 3188 3189 /* unlocks the pinned mutex */ 3190 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 3191 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3192 3193 cond_resched(); 3194 } 3195 3196 return ret; 3197 } 3198 3199 static int pin_down_bytes(struct btrfs_trans_handle *trans, 3200 struct btrfs_root *root, 3201 struct btrfs_path *path, 3202 u64 bytenr, u64 num_bytes, int is_data, 3203 struct extent_buffer **must_clean) 3204 { 3205 int err = 0; 3206 struct extent_buffer *buf; 3207 3208 if (is_data) 3209 goto pinit; 3210 3211 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3212 if (!buf) 3213 goto pinit; 3214 3215 /* we can reuse a block if it hasn't been written 3216 * and it is from this transaction. We can't 3217 * reuse anything from the tree log root because 3218 * it has tiny sub-transactions. 3219 */ 3220 if (btrfs_buffer_uptodate(buf, 0) && 3221 btrfs_try_tree_lock(buf)) { 3222 u64 header_owner = btrfs_header_owner(buf); 3223 u64 header_transid = btrfs_header_generation(buf); 3224 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3225 header_transid == trans->transid && 3226 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3227 *must_clean = buf; 3228 return 1; 3229 } 3230 btrfs_tree_unlock(buf); 3231 } 3232 free_extent_buffer(buf); 3233 pinit: 3234 btrfs_set_path_blocking(path); 3235 /* unlocks the pinned mutex */ 3236 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3237 3238 BUG_ON(err < 0); 3239 return 0; 3240 } 3241 3242 3243 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3244 struct btrfs_root *root, 3245 u64 bytenr, u64 num_bytes, u64 parent, 3246 u64 root_objectid, u64 owner_objectid, 3247 u64 owner_offset, int refs_to_drop, 3248 struct btrfs_delayed_extent_op *extent_op) 3249 { 3250 struct btrfs_key key; 3251 struct btrfs_path *path; 3252 struct btrfs_fs_info *info = root->fs_info; 3253 struct btrfs_root *extent_root = info->extent_root; 3254 struct extent_buffer *leaf; 3255 struct btrfs_extent_item *ei; 3256 struct btrfs_extent_inline_ref *iref; 3257 int ret; 3258 int is_data; 3259 int extent_slot = 0; 3260 int found_extent = 0; 3261 int num_to_del = 1; 3262 u32 item_size; 3263 u64 refs; 3264 3265 path = btrfs_alloc_path(); 3266 if (!path) 3267 return -ENOMEM; 3268 3269 path->reada = 1; 3270 path->leave_spinning = 1; 3271 3272 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 3273 BUG_ON(!is_data && refs_to_drop != 1); 3274 3275 ret = lookup_extent_backref(trans, extent_root, path, &iref, 3276 bytenr, num_bytes, parent, 3277 root_objectid, owner_objectid, 3278 owner_offset); 3279 if (ret == 0) { 3280 extent_slot = path->slots[0]; 3281 while (extent_slot >= 0) { 3282 btrfs_item_key_to_cpu(path->nodes[0], &key, 3283 extent_slot); 3284 if (key.objectid != bytenr) 3285 break; 3286 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3287 key.offset == num_bytes) { 3288 found_extent = 1; 3289 break; 3290 } 3291 if (path->slots[0] - extent_slot > 5) 3292 break; 3293 extent_slot--; 3294 } 3295 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3296 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 3297 if (found_extent && item_size < sizeof(*ei)) 3298 found_extent = 0; 3299 #endif 3300 if (!found_extent) { 3301 BUG_ON(iref); 3302 ret = remove_extent_backref(trans, extent_root, path, 3303 NULL, refs_to_drop, 3304 is_data); 3305 BUG_ON(ret); 3306 btrfs_release_path(extent_root, path); 3307 path->leave_spinning = 1; 3308 3309 key.objectid = bytenr; 3310 key.type = BTRFS_EXTENT_ITEM_KEY; 3311 key.offset = num_bytes; 3312 3313 ret = btrfs_search_slot(trans, extent_root, 3314 &key, path, -1, 1); 3315 if (ret) { 3316 printk(KERN_ERR "umm, got %d back from search" 3317 ", was looking for %llu\n", ret, 3318 (unsigned long long)bytenr); 3319 btrfs_print_leaf(extent_root, path->nodes[0]); 3320 } 3321 BUG_ON(ret); 3322 extent_slot = path->slots[0]; 3323 } 3324 } else { 3325 btrfs_print_leaf(extent_root, path->nodes[0]); 3326 WARN_ON(1); 3327 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3328 "parent %llu root %llu owner %llu offset %llu\n", 3329 (unsigned long long)bytenr, 3330 (unsigned long long)parent, 3331 (unsigned long long)root_objectid, 3332 (unsigned long long)owner_objectid, 3333 (unsigned long long)owner_offset); 3334 } 3335 3336 leaf = path->nodes[0]; 3337 item_size = btrfs_item_size_nr(leaf, extent_slot); 3338 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3339 if (item_size < sizeof(*ei)) { 3340 BUG_ON(found_extent || extent_slot != path->slots[0]); 3341 ret = convert_extent_item_v0(trans, extent_root, path, 3342 owner_objectid, 0); 3343 BUG_ON(ret < 0); 3344 3345 btrfs_release_path(extent_root, path); 3346 path->leave_spinning = 1; 3347 3348 key.objectid = bytenr; 3349 key.type = BTRFS_EXTENT_ITEM_KEY; 3350 key.offset = num_bytes; 3351 3352 ret = btrfs_search_slot(trans, extent_root, &key, path, 3353 -1, 1); 3354 if (ret) { 3355 printk(KERN_ERR "umm, got %d back from search" 3356 ", was looking for %llu\n", ret, 3357 (unsigned long long)bytenr); 3358 btrfs_print_leaf(extent_root, path->nodes[0]); 3359 } 3360 BUG_ON(ret); 3361 extent_slot = path->slots[0]; 3362 leaf = path->nodes[0]; 3363 item_size = btrfs_item_size_nr(leaf, extent_slot); 3364 } 3365 #endif 3366 BUG_ON(item_size < sizeof(*ei)); 3367 ei = btrfs_item_ptr(leaf, extent_slot, 3368 struct btrfs_extent_item); 3369 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3370 struct btrfs_tree_block_info *bi; 3371 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 3372 bi = (struct btrfs_tree_block_info *)(ei + 1); 3373 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 3374 } 3375 3376 refs = btrfs_extent_refs(leaf, ei); 3377 BUG_ON(refs < refs_to_drop); 3378 refs -= refs_to_drop; 3379 3380 if (refs > 0) { 3381 if (extent_op) 3382 __run_delayed_extent_op(extent_op, leaf, ei); 3383 /* 3384 * In the case of inline back ref, reference count will 3385 * be updated by remove_extent_backref 3386 */ 3387 if (iref) { 3388 BUG_ON(!found_extent); 3389 } else { 3390 btrfs_set_extent_refs(leaf, ei, refs); 3391 btrfs_mark_buffer_dirty(leaf); 3392 } 3393 if (found_extent) { 3394 ret = remove_extent_backref(trans, extent_root, path, 3395 iref, refs_to_drop, 3396 is_data); 3397 BUG_ON(ret); 3398 } 3399 } else { 3400 int mark_free = 0; 3401 struct extent_buffer *must_clean = NULL; 3402 3403 if (found_extent) { 3404 BUG_ON(is_data && refs_to_drop != 3405 extent_data_ref_count(root, path, iref)); 3406 if (iref) { 3407 BUG_ON(path->slots[0] != extent_slot); 3408 } else { 3409 BUG_ON(path->slots[0] != extent_slot + 1); 3410 path->slots[0] = extent_slot; 3411 num_to_del = 2; 3412 } 3413 } 3414 3415 ret = pin_down_bytes(trans, root, path, bytenr, 3416 num_bytes, is_data, &must_clean); 3417 if (ret > 0) 3418 mark_free = 1; 3419 BUG_ON(ret < 0); 3420 /* 3421 * it is going to be very rare for someone to be waiting 3422 * on the block we're freeing. del_items might need to 3423 * schedule, so rather than get fancy, just force it 3424 * to blocking here 3425 */ 3426 if (must_clean) 3427 btrfs_set_lock_blocking(must_clean); 3428 3429 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3430 num_to_del); 3431 BUG_ON(ret); 3432 btrfs_release_path(extent_root, path); 3433 3434 if (must_clean) { 3435 clean_tree_block(NULL, root, must_clean); 3436 btrfs_tree_unlock(must_clean); 3437 free_extent_buffer(must_clean); 3438 } 3439 3440 if (is_data) { 3441 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3442 BUG_ON(ret); 3443 } else { 3444 invalidate_mapping_pages(info->btree_inode->i_mapping, 3445 bytenr >> PAGE_CACHE_SHIFT, 3446 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 3447 } 3448 3449 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 3450 mark_free); 3451 BUG_ON(ret); 3452 } 3453 btrfs_free_path(path); 3454 return ret; 3455 } 3456 3457 /* 3458 * when we free an extent, it is possible (and likely) that we free the last 3459 * delayed ref for that extent as well. This searches the delayed ref tree for 3460 * a given extent, and if there are no other delayed refs to be processed, it 3461 * removes it from the tree. 3462 */ 3463 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 3464 struct btrfs_root *root, u64 bytenr) 3465 { 3466 struct btrfs_delayed_ref_head *head; 3467 struct btrfs_delayed_ref_root *delayed_refs; 3468 struct btrfs_delayed_ref_node *ref; 3469 struct rb_node *node; 3470 int ret; 3471 3472 delayed_refs = &trans->transaction->delayed_refs; 3473 spin_lock(&delayed_refs->lock); 3474 head = btrfs_find_delayed_ref_head(trans, bytenr); 3475 if (!head) 3476 goto out; 3477 3478 node = rb_prev(&head->node.rb_node); 3479 if (!node) 3480 goto out; 3481 3482 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3483 3484 /* there are still entries for this ref, we can't drop it */ 3485 if (ref->bytenr == bytenr) 3486 goto out; 3487 3488 if (head->extent_op) { 3489 if (!head->must_insert_reserved) 3490 goto out; 3491 kfree(head->extent_op); 3492 head->extent_op = NULL; 3493 } 3494 3495 /* 3496 * waiting for the lock here would deadlock. If someone else has it 3497 * locked they are already in the process of dropping it anyway 3498 */ 3499 if (!mutex_trylock(&head->mutex)) 3500 goto out; 3501 3502 /* 3503 * at this point we have a head with no other entries. Go 3504 * ahead and process it. 3505 */ 3506 head->node.in_tree = 0; 3507 rb_erase(&head->node.rb_node, &delayed_refs->root); 3508 3509 delayed_refs->num_entries--; 3510 3511 /* 3512 * we don't take a ref on the node because we're removing it from the 3513 * tree, so we just steal the ref the tree was holding. 3514 */ 3515 delayed_refs->num_heads--; 3516 if (list_empty(&head->cluster)) 3517 delayed_refs->num_heads_ready--; 3518 3519 list_del_init(&head->cluster); 3520 spin_unlock(&delayed_refs->lock); 3521 3522 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 3523 &head->node, head->extent_op, 3524 head->must_insert_reserved); 3525 BUG_ON(ret); 3526 btrfs_put_delayed_ref(&head->node); 3527 return 0; 3528 out: 3529 spin_unlock(&delayed_refs->lock); 3530 return 0; 3531 } 3532 3533 int btrfs_free_extent(struct btrfs_trans_handle *trans, 3534 struct btrfs_root *root, 3535 u64 bytenr, u64 num_bytes, u64 parent, 3536 u64 root_objectid, u64 owner, u64 offset) 3537 { 3538 int ret; 3539 3540 /* 3541 * tree log blocks never actually go into the extent allocation 3542 * tree, just update pinning info and exit early. 3543 */ 3544 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3545 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3546 /* unlocks the pinned mutex */ 3547 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3548 update_reserved_extents(root, bytenr, num_bytes, 0); 3549 ret = 0; 3550 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3551 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3552 parent, root_objectid, (int)owner, 3553 BTRFS_DROP_DELAYED_REF, NULL); 3554 BUG_ON(ret); 3555 ret = check_ref_cleanup(trans, root, bytenr); 3556 BUG_ON(ret); 3557 } else { 3558 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 3559 parent, root_objectid, owner, 3560 offset, BTRFS_DROP_DELAYED_REF, NULL); 3561 BUG_ON(ret); 3562 } 3563 return ret; 3564 } 3565 3566 static u64 stripe_align(struct btrfs_root *root, u64 val) 3567 { 3568 u64 mask = ((u64)root->stripesize - 1); 3569 u64 ret = (val + mask) & ~mask; 3570 return ret; 3571 } 3572 3573 /* 3574 * when we wait for progress in the block group caching, its because 3575 * our allocation attempt failed at least once. So, we must sleep 3576 * and let some progress happen before we try again. 3577 * 3578 * This function will sleep at least once waiting for new free space to 3579 * show up, and then it will check the block group free space numbers 3580 * for our min num_bytes. Another option is to have it go ahead 3581 * and look in the rbtree for a free extent of a given size, but this 3582 * is a good start. 3583 */ 3584 static noinline int 3585 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 3586 u64 num_bytes) 3587 { 3588 DEFINE_WAIT(wait); 3589 3590 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); 3591 3592 if (block_group_cache_done(cache)) { 3593 finish_wait(&cache->caching_q, &wait); 3594 return 0; 3595 } 3596 schedule(); 3597 finish_wait(&cache->caching_q, &wait); 3598 3599 wait_event(cache->caching_q, block_group_cache_done(cache) || 3600 (cache->free_space >= num_bytes)); 3601 return 0; 3602 } 3603 3604 enum btrfs_loop_type { 3605 LOOP_CACHED_ONLY = 0, 3606 LOOP_CACHING_NOWAIT = 1, 3607 LOOP_CACHING_WAIT = 2, 3608 LOOP_ALLOC_CHUNK = 3, 3609 LOOP_NO_EMPTY_SIZE = 4, 3610 }; 3611 3612 /* 3613 * walks the btree of allocated extents and find a hole of a given size. 3614 * The key ins is changed to record the hole: 3615 * ins->objectid == block start 3616 * ins->flags = BTRFS_EXTENT_ITEM_KEY 3617 * ins->offset == number of blocks 3618 * Any available blocks before search_start are skipped. 3619 */ 3620 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 3621 struct btrfs_root *orig_root, 3622 u64 num_bytes, u64 empty_size, 3623 u64 search_start, u64 search_end, 3624 u64 hint_byte, struct btrfs_key *ins, 3625 u64 exclude_start, u64 exclude_nr, 3626 int data) 3627 { 3628 int ret = 0; 3629 struct btrfs_root *root = orig_root->fs_info->extent_root; 3630 struct btrfs_free_cluster *last_ptr = NULL; 3631 struct btrfs_block_group_cache *block_group = NULL; 3632 int empty_cluster = 2 * 1024 * 1024; 3633 int allowed_chunk_alloc = 0; 3634 struct btrfs_space_info *space_info; 3635 int last_ptr_loop = 0; 3636 int loop = 0; 3637 bool found_uncached_bg = false; 3638 3639 WARN_ON(num_bytes < root->sectorsize); 3640 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3641 ins->objectid = 0; 3642 ins->offset = 0; 3643 3644 space_info = __find_space_info(root->fs_info, data); 3645 3646 if (orig_root->ref_cows || empty_size) 3647 allowed_chunk_alloc = 1; 3648 3649 if (data & BTRFS_BLOCK_GROUP_METADATA) { 3650 last_ptr = &root->fs_info->meta_alloc_cluster; 3651 if (!btrfs_test_opt(root, SSD)) 3652 empty_cluster = 64 * 1024; 3653 } 3654 3655 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 3656 last_ptr = &root->fs_info->data_alloc_cluster; 3657 } 3658 3659 if (last_ptr) { 3660 spin_lock(&last_ptr->lock); 3661 if (last_ptr->block_group) 3662 hint_byte = last_ptr->window_start; 3663 spin_unlock(&last_ptr->lock); 3664 } 3665 3666 search_start = max(search_start, first_logical_byte(root, 0)); 3667 search_start = max(search_start, hint_byte); 3668 3669 if (!last_ptr) 3670 empty_cluster = 0; 3671 3672 if (search_start == hint_byte) { 3673 block_group = btrfs_lookup_block_group(root->fs_info, 3674 search_start); 3675 /* 3676 * we don't want to use the block group if it doesn't match our 3677 * allocation bits, or if its not cached. 3678 */ 3679 if (block_group && block_group_bits(block_group, data) && 3680 block_group_cache_done(block_group)) { 3681 down_read(&space_info->groups_sem); 3682 if (list_empty(&block_group->list) || 3683 block_group->ro) { 3684 /* 3685 * someone is removing this block group, 3686 * we can't jump into the have_block_group 3687 * target because our list pointers are not 3688 * valid 3689 */ 3690 btrfs_put_block_group(block_group); 3691 up_read(&space_info->groups_sem); 3692 } else 3693 goto have_block_group; 3694 } else if (block_group) { 3695 btrfs_put_block_group(block_group); 3696 } 3697 } 3698 3699 search: 3700 down_read(&space_info->groups_sem); 3701 list_for_each_entry(block_group, &space_info->block_groups, list) { 3702 u64 offset; 3703 int cached; 3704 3705 atomic_inc(&block_group->count); 3706 search_start = block_group->key.objectid; 3707 3708 have_block_group: 3709 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 3710 /* 3711 * we want to start caching kthreads, but not too many 3712 * right off the bat so we don't overwhelm the system, 3713 * so only start them if there are less than 2 and we're 3714 * in the initial allocation phase. 3715 */ 3716 if (loop > LOOP_CACHING_NOWAIT || 3717 atomic_read(&space_info->caching_threads) < 2) { 3718 ret = cache_block_group(block_group); 3719 BUG_ON(ret); 3720 } 3721 } 3722 3723 cached = block_group_cache_done(block_group); 3724 if (unlikely(!cached)) { 3725 found_uncached_bg = true; 3726 3727 /* if we only want cached bgs, loop */ 3728 if (loop == LOOP_CACHED_ONLY) 3729 goto loop; 3730 } 3731 3732 if (unlikely(block_group->ro)) 3733 goto loop; 3734 3735 if (last_ptr) { 3736 /* 3737 * the refill lock keeps out other 3738 * people trying to start a new cluster 3739 */ 3740 spin_lock(&last_ptr->refill_lock); 3741 if (last_ptr->block_group && 3742 (last_ptr->block_group->ro || 3743 !block_group_bits(last_ptr->block_group, data))) { 3744 offset = 0; 3745 goto refill_cluster; 3746 } 3747 3748 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 3749 num_bytes, search_start); 3750 if (offset) { 3751 /* we have a block, we're done */ 3752 spin_unlock(&last_ptr->refill_lock); 3753 goto checks; 3754 } 3755 3756 spin_lock(&last_ptr->lock); 3757 /* 3758 * whoops, this cluster doesn't actually point to 3759 * this block group. Get a ref on the block 3760 * group is does point to and try again 3761 */ 3762 if (!last_ptr_loop && last_ptr->block_group && 3763 last_ptr->block_group != block_group) { 3764 3765 btrfs_put_block_group(block_group); 3766 block_group = last_ptr->block_group; 3767 atomic_inc(&block_group->count); 3768 spin_unlock(&last_ptr->lock); 3769 spin_unlock(&last_ptr->refill_lock); 3770 3771 last_ptr_loop = 1; 3772 search_start = block_group->key.objectid; 3773 /* 3774 * we know this block group is properly 3775 * in the list because 3776 * btrfs_remove_block_group, drops the 3777 * cluster before it removes the block 3778 * group from the list 3779 */ 3780 goto have_block_group; 3781 } 3782 spin_unlock(&last_ptr->lock); 3783 refill_cluster: 3784 /* 3785 * this cluster didn't work out, free it and 3786 * start over 3787 */ 3788 btrfs_return_cluster_to_free_space(NULL, last_ptr); 3789 3790 last_ptr_loop = 0; 3791 3792 /* allocate a cluster in this block group */ 3793 ret = btrfs_find_space_cluster(trans, root, 3794 block_group, last_ptr, 3795 offset, num_bytes, 3796 empty_cluster + empty_size); 3797 if (ret == 0) { 3798 /* 3799 * now pull our allocation out of this 3800 * cluster 3801 */ 3802 offset = btrfs_alloc_from_cluster(block_group, 3803 last_ptr, num_bytes, 3804 search_start); 3805 if (offset) { 3806 /* we found one, proceed */ 3807 spin_unlock(&last_ptr->refill_lock); 3808 goto checks; 3809 } 3810 } else if (!cached && loop > LOOP_CACHING_NOWAIT) { 3811 spin_unlock(&last_ptr->refill_lock); 3812 3813 wait_block_group_cache_progress(block_group, 3814 num_bytes + empty_cluster + empty_size); 3815 goto have_block_group; 3816 } 3817 3818 /* 3819 * at this point we either didn't find a cluster 3820 * or we weren't able to allocate a block from our 3821 * cluster. Free the cluster we've been trying 3822 * to use, and go to the next block group 3823 */ 3824 if (loop < LOOP_NO_EMPTY_SIZE) { 3825 btrfs_return_cluster_to_free_space(NULL, 3826 last_ptr); 3827 spin_unlock(&last_ptr->refill_lock); 3828 goto loop; 3829 } 3830 spin_unlock(&last_ptr->refill_lock); 3831 } 3832 3833 offset = btrfs_find_space_for_alloc(block_group, search_start, 3834 num_bytes, empty_size); 3835 if (!offset && (cached || (!cached && 3836 loop == LOOP_CACHING_NOWAIT))) { 3837 goto loop; 3838 } else if (!offset && (!cached && 3839 loop > LOOP_CACHING_NOWAIT)) { 3840 wait_block_group_cache_progress(block_group, 3841 num_bytes + empty_size); 3842 goto have_block_group; 3843 } 3844 checks: 3845 search_start = stripe_align(root, offset); 3846 /* move on to the next group */ 3847 if (search_start + num_bytes >= search_end) { 3848 btrfs_add_free_space(block_group, offset, num_bytes); 3849 goto loop; 3850 } 3851 3852 /* move on to the next group */ 3853 if (search_start + num_bytes > 3854 block_group->key.objectid + block_group->key.offset) { 3855 btrfs_add_free_space(block_group, offset, num_bytes); 3856 goto loop; 3857 } 3858 3859 if (exclude_nr > 0 && 3860 (search_start + num_bytes > exclude_start && 3861 search_start < exclude_start + exclude_nr)) { 3862 search_start = exclude_start + exclude_nr; 3863 3864 btrfs_add_free_space(block_group, offset, num_bytes); 3865 /* 3866 * if search_start is still in this block group 3867 * then we just re-search this block group 3868 */ 3869 if (search_start >= block_group->key.objectid && 3870 search_start < (block_group->key.objectid + 3871 block_group->key.offset)) 3872 goto have_block_group; 3873 goto loop; 3874 } 3875 3876 ins->objectid = search_start; 3877 ins->offset = num_bytes; 3878 3879 if (offset < search_start) 3880 btrfs_add_free_space(block_group, offset, 3881 search_start - offset); 3882 BUG_ON(offset > search_start); 3883 3884 /* we are all good, lets return */ 3885 break; 3886 loop: 3887 btrfs_put_block_group(block_group); 3888 } 3889 up_read(&space_info->groups_sem); 3890 3891 /* LOOP_CACHED_ONLY, only search fully cached block groups 3892 * LOOP_CACHING_NOWAIT, search partially cached block groups, but 3893 * dont wait foR them to finish caching 3894 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 3895 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 3896 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 3897 * again 3898 */ 3899 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 3900 (found_uncached_bg || empty_size || empty_cluster || 3901 allowed_chunk_alloc)) { 3902 if (found_uncached_bg) { 3903 found_uncached_bg = false; 3904 if (loop < LOOP_CACHING_WAIT) { 3905 loop++; 3906 goto search; 3907 } 3908 } 3909 3910 if (loop == LOOP_ALLOC_CHUNK) { 3911 empty_size = 0; 3912 empty_cluster = 0; 3913 } 3914 3915 if (allowed_chunk_alloc) { 3916 ret = do_chunk_alloc(trans, root, num_bytes + 3917 2 * 1024 * 1024, data, 1); 3918 allowed_chunk_alloc = 0; 3919 } else { 3920 space_info->force_alloc = 1; 3921 } 3922 3923 if (loop < LOOP_NO_EMPTY_SIZE) { 3924 loop++; 3925 goto search; 3926 } 3927 ret = -ENOSPC; 3928 } else if (!ins->objectid) { 3929 ret = -ENOSPC; 3930 } 3931 3932 /* we found what we needed */ 3933 if (ins->objectid) { 3934 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 3935 trans->block_group = block_group->key.objectid; 3936 3937 btrfs_put_block_group(block_group); 3938 ret = 0; 3939 } 3940 3941 return ret; 3942 } 3943 3944 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3945 { 3946 struct btrfs_block_group_cache *cache; 3947 3948 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3949 (unsigned long long)(info->total_bytes - info->bytes_used - 3950 info->bytes_pinned - info->bytes_reserved), 3951 (info->full) ? "" : "not "); 3952 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 3953 " may_use=%llu, used=%llu\n", 3954 (unsigned long long)info->total_bytes, 3955 (unsigned long long)info->bytes_pinned, 3956 (unsigned long long)info->bytes_delalloc, 3957 (unsigned long long)info->bytes_may_use, 3958 (unsigned long long)info->bytes_used); 3959 3960 down_read(&info->groups_sem); 3961 list_for_each_entry(cache, &info->block_groups, list) { 3962 spin_lock(&cache->lock); 3963 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3964 "%llu pinned %llu reserved\n", 3965 (unsigned long long)cache->key.objectid, 3966 (unsigned long long)cache->key.offset, 3967 (unsigned long long)btrfs_block_group_used(&cache->item), 3968 (unsigned long long)cache->pinned, 3969 (unsigned long long)cache->reserved); 3970 btrfs_dump_free_space(cache, bytes); 3971 spin_unlock(&cache->lock); 3972 } 3973 up_read(&info->groups_sem); 3974 } 3975 3976 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3977 struct btrfs_root *root, 3978 u64 num_bytes, u64 min_alloc_size, 3979 u64 empty_size, u64 hint_byte, 3980 u64 search_end, struct btrfs_key *ins, 3981 u64 data) 3982 { 3983 int ret; 3984 u64 search_start = 0; 3985 struct btrfs_fs_info *info = root->fs_info; 3986 3987 data = btrfs_get_alloc_profile(root, data); 3988 again: 3989 /* 3990 * the only place that sets empty_size is btrfs_realloc_node, which 3991 * is not called recursively on allocations 3992 */ 3993 if (empty_size || root->ref_cows) { 3994 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 3995 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3996 2 * 1024 * 1024, 3997 BTRFS_BLOCK_GROUP_METADATA | 3998 (info->metadata_alloc_profile & 3999 info->avail_metadata_alloc_bits), 0); 4000 } 4001 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4002 num_bytes + 2 * 1024 * 1024, data, 0); 4003 } 4004 4005 WARN_ON(num_bytes < root->sectorsize); 4006 ret = find_free_extent(trans, root, num_bytes, empty_size, 4007 search_start, search_end, hint_byte, ins, 4008 trans->alloc_exclude_start, 4009 trans->alloc_exclude_nr, data); 4010 4011 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 4012 num_bytes = num_bytes >> 1; 4013 num_bytes = num_bytes & ~(root->sectorsize - 1); 4014 num_bytes = max(num_bytes, min_alloc_size); 4015 do_chunk_alloc(trans, root->fs_info->extent_root, 4016 num_bytes, data, 1); 4017 goto again; 4018 } 4019 if (ret == -ENOSPC) { 4020 struct btrfs_space_info *sinfo; 4021 4022 sinfo = __find_space_info(root->fs_info, data); 4023 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4024 "wanted %llu\n", (unsigned long long)data, 4025 (unsigned long long)num_bytes); 4026 dump_space_info(sinfo, num_bytes); 4027 } 4028 4029 return ret; 4030 } 4031 4032 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 4033 { 4034 struct btrfs_block_group_cache *cache; 4035 int ret = 0; 4036 4037 cache = btrfs_lookup_block_group(root->fs_info, start); 4038 if (!cache) { 4039 printk(KERN_ERR "Unable to find block group for %llu\n", 4040 (unsigned long long)start); 4041 return -ENOSPC; 4042 } 4043 4044 ret = btrfs_discard_extent(root, start, len); 4045 4046 btrfs_add_free_space(cache, start, len); 4047 btrfs_put_block_group(cache); 4048 update_reserved_extents(root, start, len, 0); 4049 4050 return ret; 4051 } 4052 4053 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4054 struct btrfs_root *root, 4055 u64 num_bytes, u64 min_alloc_size, 4056 u64 empty_size, u64 hint_byte, 4057 u64 search_end, struct btrfs_key *ins, 4058 u64 data) 4059 { 4060 int ret; 4061 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 4062 empty_size, hint_byte, search_end, ins, 4063 data); 4064 if (!ret) 4065 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4066 4067 return ret; 4068 } 4069 4070 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4071 struct btrfs_root *root, 4072 u64 parent, u64 root_objectid, 4073 u64 flags, u64 owner, u64 offset, 4074 struct btrfs_key *ins, int ref_mod) 4075 { 4076 int ret; 4077 struct btrfs_fs_info *fs_info = root->fs_info; 4078 struct btrfs_extent_item *extent_item; 4079 struct btrfs_extent_inline_ref *iref; 4080 struct btrfs_path *path; 4081 struct extent_buffer *leaf; 4082 int type; 4083 u32 size; 4084 4085 if (parent > 0) 4086 type = BTRFS_SHARED_DATA_REF_KEY; 4087 else 4088 type = BTRFS_EXTENT_DATA_REF_KEY; 4089 4090 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 4091 4092 path = btrfs_alloc_path(); 4093 BUG_ON(!path); 4094 4095 path->leave_spinning = 1; 4096 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4097 ins, size); 4098 BUG_ON(ret); 4099 4100 leaf = path->nodes[0]; 4101 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4102 struct btrfs_extent_item); 4103 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 4104 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4105 btrfs_set_extent_flags(leaf, extent_item, 4106 flags | BTRFS_EXTENT_FLAG_DATA); 4107 4108 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 4109 btrfs_set_extent_inline_ref_type(leaf, iref, type); 4110 if (parent > 0) { 4111 struct btrfs_shared_data_ref *ref; 4112 ref = (struct btrfs_shared_data_ref *)(iref + 1); 4113 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4114 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 4115 } else { 4116 struct btrfs_extent_data_ref *ref; 4117 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 4118 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 4119 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 4120 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 4121 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 4122 } 4123 4124 btrfs_mark_buffer_dirty(path->nodes[0]); 4125 btrfs_free_path(path); 4126 4127 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4128 1, 0); 4129 if (ret) { 4130 printk(KERN_ERR "btrfs update block group failed for %llu " 4131 "%llu\n", (unsigned long long)ins->objectid, 4132 (unsigned long long)ins->offset); 4133 BUG(); 4134 } 4135 return ret; 4136 } 4137 4138 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 4139 struct btrfs_root *root, 4140 u64 parent, u64 root_objectid, 4141 u64 flags, struct btrfs_disk_key *key, 4142 int level, struct btrfs_key *ins) 4143 { 4144 int ret; 4145 struct btrfs_fs_info *fs_info = root->fs_info; 4146 struct btrfs_extent_item *extent_item; 4147 struct btrfs_tree_block_info *block_info; 4148 struct btrfs_extent_inline_ref *iref; 4149 struct btrfs_path *path; 4150 struct extent_buffer *leaf; 4151 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 4152 4153 path = btrfs_alloc_path(); 4154 BUG_ON(!path); 4155 4156 path->leave_spinning = 1; 4157 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4158 ins, size); 4159 BUG_ON(ret); 4160 4161 leaf = path->nodes[0]; 4162 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4163 struct btrfs_extent_item); 4164 btrfs_set_extent_refs(leaf, extent_item, 1); 4165 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4166 btrfs_set_extent_flags(leaf, extent_item, 4167 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 4168 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 4169 4170 btrfs_set_tree_block_key(leaf, block_info, key); 4171 btrfs_set_tree_block_level(leaf, block_info, level); 4172 4173 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 4174 if (parent > 0) { 4175 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4176 btrfs_set_extent_inline_ref_type(leaf, iref, 4177 BTRFS_SHARED_BLOCK_REF_KEY); 4178 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4179 } else { 4180 btrfs_set_extent_inline_ref_type(leaf, iref, 4181 BTRFS_TREE_BLOCK_REF_KEY); 4182 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 4183 } 4184 4185 btrfs_mark_buffer_dirty(leaf); 4186 btrfs_free_path(path); 4187 4188 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4189 1, 0); 4190 if (ret) { 4191 printk(KERN_ERR "btrfs update block group failed for %llu " 4192 "%llu\n", (unsigned long long)ins->objectid, 4193 (unsigned long long)ins->offset); 4194 BUG(); 4195 } 4196 return ret; 4197 } 4198 4199 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4200 struct btrfs_root *root, 4201 u64 root_objectid, u64 owner, 4202 u64 offset, struct btrfs_key *ins) 4203 { 4204 int ret; 4205 4206 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 4207 4208 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 4209 0, root_objectid, owner, offset, 4210 BTRFS_ADD_DELAYED_EXTENT, NULL); 4211 return ret; 4212 } 4213 4214 /* 4215 * this is used by the tree logging recovery code. It records that 4216 * an extent has been allocated and makes sure to clear the free 4217 * space cache bits as well 4218 */ 4219 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 4220 struct btrfs_root *root, 4221 u64 root_objectid, u64 owner, u64 offset, 4222 struct btrfs_key *ins) 4223 { 4224 int ret; 4225 struct btrfs_block_group_cache *block_group; 4226 4227 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4228 cache_block_group(block_group); 4229 wait_event(block_group->caching_q, 4230 block_group_cache_done(block_group)); 4231 4232 ret = btrfs_remove_free_space(block_group, ins->objectid, 4233 ins->offset); 4234 BUG_ON(ret); 4235 btrfs_put_block_group(block_group); 4236 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4237 0, owner, offset, ins, 1); 4238 return ret; 4239 } 4240 4241 /* 4242 * finds a free extent and does all the dirty work required for allocation 4243 * returns the key for the extent through ins, and a tree buffer for 4244 * the first block of the extent through buf. 4245 * 4246 * returns 0 if everything worked, non-zero otherwise. 4247 */ 4248 static int alloc_tree_block(struct btrfs_trans_handle *trans, 4249 struct btrfs_root *root, 4250 u64 num_bytes, u64 parent, u64 root_objectid, 4251 struct btrfs_disk_key *key, int level, 4252 u64 empty_size, u64 hint_byte, u64 search_end, 4253 struct btrfs_key *ins) 4254 { 4255 int ret; 4256 u64 flags = 0; 4257 4258 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4259 empty_size, hint_byte, search_end, 4260 ins, 0); 4261 if (ret) 4262 return ret; 4263 4264 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4265 if (parent == 0) 4266 parent = ins->objectid; 4267 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 4268 } else 4269 BUG_ON(parent > 0); 4270 4271 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4272 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4273 struct btrfs_delayed_extent_op *extent_op; 4274 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4275 BUG_ON(!extent_op); 4276 if (key) 4277 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 4278 else 4279 memset(&extent_op->key, 0, sizeof(extent_op->key)); 4280 extent_op->flags_to_set = flags; 4281 extent_op->update_key = 1; 4282 extent_op->update_flags = 1; 4283 extent_op->is_data = 0; 4284 4285 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, 4286 ins->offset, parent, root_objectid, 4287 level, BTRFS_ADD_DELAYED_EXTENT, 4288 extent_op); 4289 BUG_ON(ret); 4290 } 4291 return ret; 4292 } 4293 4294 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 4295 struct btrfs_root *root, 4296 u64 bytenr, u32 blocksize, 4297 int level) 4298 { 4299 struct extent_buffer *buf; 4300 4301 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 4302 if (!buf) 4303 return ERR_PTR(-ENOMEM); 4304 btrfs_set_header_generation(buf, trans->transid); 4305 btrfs_set_buffer_lockdep_class(buf, level); 4306 btrfs_tree_lock(buf); 4307 clean_tree_block(trans, root, buf); 4308 4309 btrfs_set_lock_blocking(buf); 4310 btrfs_set_buffer_uptodate(buf); 4311 4312 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4313 set_extent_dirty(&root->dirty_log_pages, buf->start, 4314 buf->start + buf->len - 1, GFP_NOFS); 4315 } else { 4316 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4317 buf->start + buf->len - 1, GFP_NOFS); 4318 } 4319 trans->blocks_used++; 4320 /* this returns a buffer locked for blocking */ 4321 return buf; 4322 } 4323 4324 /* 4325 * helper function to allocate a block for a given tree 4326 * returns the tree buffer or NULL. 4327 */ 4328 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4329 struct btrfs_root *root, u32 blocksize, 4330 u64 parent, u64 root_objectid, 4331 struct btrfs_disk_key *key, int level, 4332 u64 hint, u64 empty_size) 4333 { 4334 struct btrfs_key ins; 4335 int ret; 4336 struct extent_buffer *buf; 4337 4338 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 4339 key, level, empty_size, hint, (u64)-1, &ins); 4340 if (ret) { 4341 BUG_ON(ret > 0); 4342 return ERR_PTR(ret); 4343 } 4344 4345 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 4346 blocksize, level); 4347 return buf; 4348 } 4349 4350 #if 0 4351 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4352 struct btrfs_root *root, struct extent_buffer *leaf) 4353 { 4354 u64 disk_bytenr; 4355 u64 num_bytes; 4356 struct btrfs_key key; 4357 struct btrfs_file_extent_item *fi; 4358 u32 nritems; 4359 int i; 4360 int ret; 4361 4362 BUG_ON(!btrfs_is_leaf(leaf)); 4363 nritems = btrfs_header_nritems(leaf); 4364 4365 for (i = 0; i < nritems; i++) { 4366 cond_resched(); 4367 btrfs_item_key_to_cpu(leaf, &key, i); 4368 4369 /* only extents have references, skip everything else */ 4370 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 4371 continue; 4372 4373 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 4374 4375 /* inline extents live in the btree, they don't have refs */ 4376 if (btrfs_file_extent_type(leaf, fi) == 4377 BTRFS_FILE_EXTENT_INLINE) 4378 continue; 4379 4380 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 4381 4382 /* holes don't have refs */ 4383 if (disk_bytenr == 0) 4384 continue; 4385 4386 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 4387 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, 4388 leaf->start, 0, key.objectid, 0); 4389 BUG_ON(ret); 4390 } 4391 return 0; 4392 } 4393 4394 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4395 struct btrfs_root *root, 4396 struct btrfs_leaf_ref *ref) 4397 { 4398 int i; 4399 int ret; 4400 struct btrfs_extent_info *info; 4401 struct refsort *sorted; 4402 4403 if (ref->nritems == 0) 4404 return 0; 4405 4406 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); 4407 for (i = 0; i < ref->nritems; i++) { 4408 sorted[i].bytenr = ref->extents[i].bytenr; 4409 sorted[i].slot = i; 4410 } 4411 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); 4412 4413 /* 4414 * the items in the ref were sorted when the ref was inserted 4415 * into the ref cache, so this is already in order 4416 */ 4417 for (i = 0; i < ref->nritems; i++) { 4418 info = ref->extents + sorted[i].slot; 4419 ret = btrfs_free_extent(trans, root, info->bytenr, 4420 info->num_bytes, ref->bytenr, 4421 ref->owner, ref->generation, 4422 info->objectid, 0); 4423 4424 atomic_inc(&root->fs_info->throttle_gen); 4425 wake_up(&root->fs_info->transaction_throttle); 4426 cond_resched(); 4427 4428 BUG_ON(ret); 4429 info++; 4430 } 4431 4432 kfree(sorted); 4433 return 0; 4434 } 4435 4436 4437 static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, 4438 struct btrfs_root *root, u64 start, 4439 u64 len, u32 *refs) 4440 { 4441 int ret; 4442 4443 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); 4444 BUG_ON(ret); 4445 4446 #if 0 /* some debugging code in case we see problems here */ 4447 /* if the refs count is one, it won't get increased again. But 4448 * if the ref count is > 1, someone may be decreasing it at 4449 * the same time we are. 4450 */ 4451 if (*refs != 1) { 4452 struct extent_buffer *eb = NULL; 4453 eb = btrfs_find_create_tree_block(root, start, len); 4454 if (eb) 4455 btrfs_tree_lock(eb); 4456 4457 mutex_lock(&root->fs_info->alloc_mutex); 4458 ret = lookup_extent_ref(NULL, root, start, len, refs); 4459 BUG_ON(ret); 4460 mutex_unlock(&root->fs_info->alloc_mutex); 4461 4462 if (eb) { 4463 btrfs_tree_unlock(eb); 4464 free_extent_buffer(eb); 4465 } 4466 if (*refs == 1) { 4467 printk(KERN_ERR "btrfs block %llu went down to one " 4468 "during drop_snap\n", (unsigned long long)start); 4469 } 4470 4471 } 4472 #endif 4473 4474 cond_resched(); 4475 return ret; 4476 } 4477 4478 4479 /* 4480 * this is used while deleting old snapshots, and it drops the refs 4481 * on a whole subtree starting from a level 1 node. 4482 * 4483 * The idea is to sort all the leaf pointers, and then drop the 4484 * ref on all the leaves in order. Most of the time the leaves 4485 * will have ref cache entries, so no leaf IOs will be required to 4486 * find the extents they have references on. 4487 * 4488 * For each leaf, any references it has are also dropped in order 4489 * 4490 * This ends up dropping the references in something close to optimal 4491 * order for reading and modifying the extent allocation tree. 4492 */ 4493 static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, 4494 struct btrfs_root *root, 4495 struct btrfs_path *path) 4496 { 4497 u64 bytenr; 4498 u64 root_owner; 4499 u64 root_gen; 4500 struct extent_buffer *eb = path->nodes[1]; 4501 struct extent_buffer *leaf; 4502 struct btrfs_leaf_ref *ref; 4503 struct refsort *sorted = NULL; 4504 int nritems = btrfs_header_nritems(eb); 4505 int ret; 4506 int i; 4507 int refi = 0; 4508 int slot = path->slots[1]; 4509 u32 blocksize = btrfs_level_size(root, 0); 4510 u32 refs; 4511 4512 if (nritems == 0) 4513 goto out; 4514 4515 root_owner = btrfs_header_owner(eb); 4516 root_gen = btrfs_header_generation(eb); 4517 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 4518 4519 /* 4520 * step one, sort all the leaf pointers so we don't scribble 4521 * randomly into the extent allocation tree 4522 */ 4523 for (i = slot; i < nritems; i++) { 4524 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 4525 sorted[refi].slot = i; 4526 refi++; 4527 } 4528 4529 /* 4530 * nritems won't be zero, but if we're picking up drop_snapshot 4531 * after a crash, slot might be > 0, so double check things 4532 * just in case. 4533 */ 4534 if (refi == 0) 4535 goto out; 4536 4537 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 4538 4539 /* 4540 * the first loop frees everything the leaves point to 4541 */ 4542 for (i = 0; i < refi; i++) { 4543 u64 ptr_gen; 4544 4545 bytenr = sorted[i].bytenr; 4546 4547 /* 4548 * check the reference count on this leaf. If it is > 1 4549 * we just decrement it below and don't update any 4550 * of the refs the leaf points to. 4551 */ 4552 ret = drop_snap_lookup_refcount(trans, root, bytenr, 4553 blocksize, &refs); 4554 BUG_ON(ret); 4555 if (refs != 1) 4556 continue; 4557 4558 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 4559 4560 /* 4561 * the leaf only had one reference, which means the 4562 * only thing pointing to this leaf is the snapshot 4563 * we're deleting. It isn't possible for the reference 4564 * count to increase again later 4565 * 4566 * The reference cache is checked for the leaf, 4567 * and if found we'll be able to drop any refs held by 4568 * the leaf without needing to read it in. 4569 */ 4570 ref = btrfs_lookup_leaf_ref(root, bytenr); 4571 if (ref && ref->generation != ptr_gen) { 4572 btrfs_free_leaf_ref(root, ref); 4573 ref = NULL; 4574 } 4575 if (ref) { 4576 ret = cache_drop_leaf_ref(trans, root, ref); 4577 BUG_ON(ret); 4578 btrfs_remove_leaf_ref(root, ref); 4579 btrfs_free_leaf_ref(root, ref); 4580 } else { 4581 /* 4582 * the leaf wasn't in the reference cache, so 4583 * we have to read it. 4584 */ 4585 leaf = read_tree_block(root, bytenr, blocksize, 4586 ptr_gen); 4587 ret = btrfs_drop_leaf_ref(trans, root, leaf); 4588 BUG_ON(ret); 4589 free_extent_buffer(leaf); 4590 } 4591 atomic_inc(&root->fs_info->throttle_gen); 4592 wake_up(&root->fs_info->transaction_throttle); 4593 cond_resched(); 4594 } 4595 4596 /* 4597 * run through the loop again to free the refs on the leaves. 4598 * This is faster than doing it in the loop above because 4599 * the leaves are likely to be clustered together. We end up 4600 * working in nice chunks on the extent allocation tree. 4601 */ 4602 for (i = 0; i < refi; i++) { 4603 bytenr = sorted[i].bytenr; 4604 ret = btrfs_free_extent(trans, root, bytenr, 4605 blocksize, eb->start, 4606 root_owner, root_gen, 0, 1); 4607 BUG_ON(ret); 4608 4609 atomic_inc(&root->fs_info->throttle_gen); 4610 wake_up(&root->fs_info->transaction_throttle); 4611 cond_resched(); 4612 } 4613 out: 4614 kfree(sorted); 4615 4616 /* 4617 * update the path to show we've processed the entire level 1 4618 * node. This will get saved into the root's drop_snapshot_progress 4619 * field so these drops are not repeated again if this transaction 4620 * commits. 4621 */ 4622 path->slots[1] = nritems; 4623 return 0; 4624 } 4625 4626 /* 4627 * helper function for drop_snapshot, this walks down the tree dropping ref 4628 * counts as it goes. 4629 */ 4630 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4631 struct btrfs_root *root, 4632 struct btrfs_path *path, int *level) 4633 { 4634 u64 root_owner; 4635 u64 root_gen; 4636 u64 bytenr; 4637 u64 ptr_gen; 4638 struct extent_buffer *next; 4639 struct extent_buffer *cur; 4640 struct extent_buffer *parent; 4641 u32 blocksize; 4642 int ret; 4643 u32 refs; 4644 4645 WARN_ON(*level < 0); 4646 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4647 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, 4648 path->nodes[*level]->len, &refs); 4649 BUG_ON(ret); 4650 if (refs > 1) 4651 goto out; 4652 4653 /* 4654 * walk down to the last node level and free all the leaves 4655 */ 4656 while (*level >= 0) { 4657 WARN_ON(*level < 0); 4658 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4659 cur = path->nodes[*level]; 4660 4661 if (btrfs_header_level(cur) != *level) 4662 WARN_ON(1); 4663 4664 if (path->slots[*level] >= 4665 btrfs_header_nritems(cur)) 4666 break; 4667 4668 /* the new code goes down to level 1 and does all the 4669 * leaves pointed to that node in bulk. So, this check 4670 * for level 0 will always be false. 4671 * 4672 * But, the disk format allows the drop_snapshot_progress 4673 * field in the root to leave things in a state where 4674 * a leaf will need cleaning up here. If someone crashes 4675 * with the old code and then boots with the new code, 4676 * we might find a leaf here. 4677 */ 4678 if (*level == 0) { 4679 ret = btrfs_drop_leaf_ref(trans, root, cur); 4680 BUG_ON(ret); 4681 break; 4682 } 4683 4684 /* 4685 * once we get to level one, process the whole node 4686 * at once, including everything below it. 4687 */ 4688 if (*level == 1) { 4689 ret = drop_level_one_refs(trans, root, path); 4690 BUG_ON(ret); 4691 break; 4692 } 4693 4694 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4695 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4696 blocksize = btrfs_level_size(root, *level - 1); 4697 4698 ret = drop_snap_lookup_refcount(trans, root, bytenr, 4699 blocksize, &refs); 4700 BUG_ON(ret); 4701 4702 /* 4703 * if there is more than one reference, we don't need 4704 * to read that node to drop any references it has. We 4705 * just drop the ref we hold on that node and move on to the 4706 * next slot in this level. 4707 */ 4708 if (refs != 1) { 4709 parent = path->nodes[*level]; 4710 root_owner = btrfs_header_owner(parent); 4711 root_gen = btrfs_header_generation(parent); 4712 path->slots[*level]++; 4713 4714 ret = btrfs_free_extent(trans, root, bytenr, 4715 blocksize, parent->start, 4716 root_owner, root_gen, 4717 *level - 1, 1); 4718 BUG_ON(ret); 4719 4720 atomic_inc(&root->fs_info->throttle_gen); 4721 wake_up(&root->fs_info->transaction_throttle); 4722 cond_resched(); 4723 4724 continue; 4725 } 4726 4727 /* 4728 * we need to keep freeing things in the next level down. 4729 * read the block and loop around to process it 4730 */ 4731 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4732 WARN_ON(*level <= 0); 4733 if (path->nodes[*level-1]) 4734 free_extent_buffer(path->nodes[*level-1]); 4735 path->nodes[*level-1] = next; 4736 *level = btrfs_header_level(next); 4737 path->slots[*level] = 0; 4738 cond_resched(); 4739 } 4740 out: 4741 WARN_ON(*level < 0); 4742 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4743 4744 if (path->nodes[*level] == root->node) { 4745 parent = path->nodes[*level]; 4746 bytenr = path->nodes[*level]->start; 4747 } else { 4748 parent = path->nodes[*level + 1]; 4749 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); 4750 } 4751 4752 blocksize = btrfs_level_size(root, *level); 4753 root_owner = btrfs_header_owner(parent); 4754 root_gen = btrfs_header_generation(parent); 4755 4756 /* 4757 * cleanup and free the reference on the last node 4758 * we processed 4759 */ 4760 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 4761 parent->start, root_owner, root_gen, 4762 *level, 1); 4763 free_extent_buffer(path->nodes[*level]); 4764 path->nodes[*level] = NULL; 4765 4766 *level += 1; 4767 BUG_ON(ret); 4768 4769 cond_resched(); 4770 return 0; 4771 } 4772 #endif 4773 4774 struct walk_control { 4775 u64 refs[BTRFS_MAX_LEVEL]; 4776 u64 flags[BTRFS_MAX_LEVEL]; 4777 struct btrfs_key update_progress; 4778 int stage; 4779 int level; 4780 int shared_level; 4781 int update_ref; 4782 int keep_locks; 4783 }; 4784 4785 #define DROP_REFERENCE 1 4786 #define UPDATE_BACKREF 2 4787 4788 /* 4789 * hepler to process tree block while walking down the tree. 4790 * 4791 * when wc->stage == DROP_REFERENCE, this function checks 4792 * reference count of the block. if the block is shared and 4793 * we need update back refs for the subtree rooted at the 4794 * block, this function changes wc->stage to UPDATE_BACKREF 4795 * 4796 * when wc->stage == UPDATE_BACKREF, this function updates 4797 * back refs for pointers in the block. 4798 * 4799 * NOTE: return value 1 means we should stop walking down. 4800 */ 4801 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 4802 struct btrfs_root *root, 4803 struct btrfs_path *path, 4804 struct walk_control *wc) 4805 { 4806 int level = wc->level; 4807 struct extent_buffer *eb = path->nodes[level]; 4808 struct btrfs_key key; 4809 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4810 int ret; 4811 4812 if (wc->stage == UPDATE_BACKREF && 4813 btrfs_header_owner(eb) != root->root_key.objectid) 4814 return 1; 4815 4816 /* 4817 * when reference count of tree block is 1, it won't increase 4818 * again. once full backref flag is set, we never clear it. 4819 */ 4820 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 4821 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 4822 BUG_ON(!path->locks[level]); 4823 ret = btrfs_lookup_extent_info(trans, root, 4824 eb->start, eb->len, 4825 &wc->refs[level], 4826 &wc->flags[level]); 4827 BUG_ON(ret); 4828 BUG_ON(wc->refs[level] == 0); 4829 } 4830 4831 if (wc->stage == DROP_REFERENCE && 4832 wc->update_ref && wc->refs[level] > 1) { 4833 BUG_ON(eb == root->node); 4834 BUG_ON(path->slots[level] > 0); 4835 if (level == 0) 4836 btrfs_item_key_to_cpu(eb, &key, path->slots[level]); 4837 else 4838 btrfs_node_key_to_cpu(eb, &key, path->slots[level]); 4839 if (btrfs_header_owner(eb) == root->root_key.objectid && 4840 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { 4841 wc->stage = UPDATE_BACKREF; 4842 wc->shared_level = level; 4843 } 4844 } 4845 4846 if (wc->stage == DROP_REFERENCE) { 4847 if (wc->refs[level] > 1) 4848 return 1; 4849 4850 if (path->locks[level] && !wc->keep_locks) { 4851 btrfs_tree_unlock(eb); 4852 path->locks[level] = 0; 4853 } 4854 return 0; 4855 } 4856 4857 /* wc->stage == UPDATE_BACKREF */ 4858 if (!(wc->flags[level] & flag)) { 4859 BUG_ON(!path->locks[level]); 4860 ret = btrfs_inc_ref(trans, root, eb, 1); 4861 BUG_ON(ret); 4862 ret = btrfs_dec_ref(trans, root, eb, 0); 4863 BUG_ON(ret); 4864 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 4865 eb->len, flag, 0); 4866 BUG_ON(ret); 4867 wc->flags[level] |= flag; 4868 } 4869 4870 /* 4871 * the block is shared by multiple trees, so it's not good to 4872 * keep the tree lock 4873 */ 4874 if (path->locks[level] && level > 0) { 4875 btrfs_tree_unlock(eb); 4876 path->locks[level] = 0; 4877 } 4878 return 0; 4879 } 4880 4881 /* 4882 * hepler to process tree block while walking up the tree. 4883 * 4884 * when wc->stage == DROP_REFERENCE, this function drops 4885 * reference count on the block. 4886 * 4887 * when wc->stage == UPDATE_BACKREF, this function changes 4888 * wc->stage back to DROP_REFERENCE if we changed wc->stage 4889 * to UPDATE_BACKREF previously while processing the block. 4890 * 4891 * NOTE: return value 1 means we should stop walking up. 4892 */ 4893 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 4894 struct btrfs_root *root, 4895 struct btrfs_path *path, 4896 struct walk_control *wc) 4897 { 4898 int ret = 0; 4899 int level = wc->level; 4900 struct extent_buffer *eb = path->nodes[level]; 4901 u64 parent = 0; 4902 4903 if (wc->stage == UPDATE_BACKREF) { 4904 BUG_ON(wc->shared_level < level); 4905 if (level < wc->shared_level) 4906 goto out; 4907 4908 BUG_ON(wc->refs[level] <= 1); 4909 ret = find_next_key(path, level + 1, &wc->update_progress); 4910 if (ret > 0) 4911 wc->update_ref = 0; 4912 4913 wc->stage = DROP_REFERENCE; 4914 wc->shared_level = -1; 4915 path->slots[level] = 0; 4916 4917 /* 4918 * check reference count again if the block isn't locked. 4919 * we should start walking down the tree again if reference 4920 * count is one. 4921 */ 4922 if (!path->locks[level]) { 4923 BUG_ON(level == 0); 4924 btrfs_tree_lock(eb); 4925 btrfs_set_lock_blocking(eb); 4926 path->locks[level] = 1; 4927 4928 ret = btrfs_lookup_extent_info(trans, root, 4929 eb->start, eb->len, 4930 &wc->refs[level], 4931 &wc->flags[level]); 4932 BUG_ON(ret); 4933 BUG_ON(wc->refs[level] == 0); 4934 if (wc->refs[level] == 1) { 4935 btrfs_tree_unlock(eb); 4936 path->locks[level] = 0; 4937 return 1; 4938 } 4939 } else { 4940 BUG_ON(level != 0); 4941 } 4942 } 4943 4944 /* wc->stage == DROP_REFERENCE */ 4945 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 4946 4947 if (wc->refs[level] == 1) { 4948 if (level == 0) { 4949 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4950 ret = btrfs_dec_ref(trans, root, eb, 1); 4951 else 4952 ret = btrfs_dec_ref(trans, root, eb, 0); 4953 BUG_ON(ret); 4954 } 4955 /* make block locked assertion in clean_tree_block happy */ 4956 if (!path->locks[level] && 4957 btrfs_header_generation(eb) == trans->transid) { 4958 btrfs_tree_lock(eb); 4959 btrfs_set_lock_blocking(eb); 4960 path->locks[level] = 1; 4961 } 4962 clean_tree_block(trans, root, eb); 4963 } 4964 4965 if (eb == root->node) { 4966 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4967 parent = eb->start; 4968 else 4969 BUG_ON(root->root_key.objectid != 4970 btrfs_header_owner(eb)); 4971 } else { 4972 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4973 parent = path->nodes[level + 1]->start; 4974 else 4975 BUG_ON(root->root_key.objectid != 4976 btrfs_header_owner(path->nodes[level + 1])); 4977 } 4978 4979 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, 4980 root->root_key.objectid, level, 0); 4981 BUG_ON(ret); 4982 out: 4983 wc->refs[level] = 0; 4984 wc->flags[level] = 0; 4985 return ret; 4986 } 4987 4988 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4989 struct btrfs_root *root, 4990 struct btrfs_path *path, 4991 struct walk_control *wc) 4992 { 4993 struct extent_buffer *next; 4994 struct extent_buffer *cur; 4995 u64 bytenr; 4996 u64 ptr_gen; 4997 u32 blocksize; 4998 int level = wc->level; 4999 int ret; 5000 5001 while (level >= 0) { 5002 cur = path->nodes[level]; 5003 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); 5004 5005 ret = walk_down_proc(trans, root, path, wc); 5006 if (ret > 0) 5007 break; 5008 5009 if (level == 0) 5010 break; 5011 5012 bytenr = btrfs_node_blockptr(cur, path->slots[level]); 5013 blocksize = btrfs_level_size(root, level - 1); 5014 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); 5015 5016 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 5017 btrfs_tree_lock(next); 5018 btrfs_set_lock_blocking(next); 5019 5020 level--; 5021 BUG_ON(level != btrfs_header_level(next)); 5022 path->nodes[level] = next; 5023 path->slots[level] = 0; 5024 path->locks[level] = 1; 5025 wc->level = level; 5026 } 5027 return 0; 5028 } 5029 5030 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 5031 struct btrfs_root *root, 5032 struct btrfs_path *path, 5033 struct walk_control *wc, int max_level) 5034 { 5035 int level = wc->level; 5036 int ret; 5037 5038 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 5039 while (level < max_level && path->nodes[level]) { 5040 wc->level = level; 5041 if (path->slots[level] + 1 < 5042 btrfs_header_nritems(path->nodes[level])) { 5043 path->slots[level]++; 5044 return 0; 5045 } else { 5046 ret = walk_up_proc(trans, root, path, wc); 5047 if (ret > 0) 5048 return 0; 5049 5050 if (path->locks[level]) { 5051 btrfs_tree_unlock(path->nodes[level]); 5052 path->locks[level] = 0; 5053 } 5054 free_extent_buffer(path->nodes[level]); 5055 path->nodes[level] = NULL; 5056 level++; 5057 } 5058 } 5059 return 1; 5060 } 5061 5062 /* 5063 * drop a subvolume tree. 5064 * 5065 * this function traverses the tree freeing any blocks that only 5066 * referenced by the tree. 5067 * 5068 * when a shared tree block is found. this function decreases its 5069 * reference count by one. if update_ref is true, this function 5070 * also make sure backrefs for the shared block and all lower level 5071 * blocks are properly updated. 5072 */ 5073 int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) 5074 { 5075 struct btrfs_path *path; 5076 struct btrfs_trans_handle *trans; 5077 struct btrfs_root *tree_root = root->fs_info->tree_root; 5078 struct btrfs_root_item *root_item = &root->root_item; 5079 struct walk_control *wc; 5080 struct btrfs_key key; 5081 int err = 0; 5082 int ret; 5083 int level; 5084 5085 path = btrfs_alloc_path(); 5086 BUG_ON(!path); 5087 5088 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5089 BUG_ON(!wc); 5090 5091 trans = btrfs_start_transaction(tree_root, 1); 5092 5093 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 5094 level = btrfs_header_level(root->node); 5095 path->nodes[level] = btrfs_lock_root_node(root); 5096 btrfs_set_lock_blocking(path->nodes[level]); 5097 path->slots[level] = 0; 5098 path->locks[level] = 1; 5099 memset(&wc->update_progress, 0, 5100 sizeof(wc->update_progress)); 5101 } else { 5102 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 5103 memcpy(&wc->update_progress, &key, 5104 sizeof(wc->update_progress)); 5105 5106 level = root_item->drop_level; 5107 BUG_ON(level == 0); 5108 path->lowest_level = level; 5109 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5110 path->lowest_level = 0; 5111 if (ret < 0) { 5112 err = ret; 5113 goto out; 5114 } 5115 btrfs_node_key_to_cpu(path->nodes[level], &key, 5116 path->slots[level]); 5117 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); 5118 5119 /* 5120 * unlock our path, this is safe because only this 5121 * function is allowed to delete this snapshot 5122 */ 5123 btrfs_unlock_up_safe(path, 0); 5124 5125 level = btrfs_header_level(root->node); 5126 while (1) { 5127 btrfs_tree_lock(path->nodes[level]); 5128 btrfs_set_lock_blocking(path->nodes[level]); 5129 5130 ret = btrfs_lookup_extent_info(trans, root, 5131 path->nodes[level]->start, 5132 path->nodes[level]->len, 5133 &wc->refs[level], 5134 &wc->flags[level]); 5135 BUG_ON(ret); 5136 BUG_ON(wc->refs[level] == 0); 5137 5138 if (level == root_item->drop_level) 5139 break; 5140 5141 btrfs_tree_unlock(path->nodes[level]); 5142 WARN_ON(wc->refs[level] != 1); 5143 level--; 5144 } 5145 } 5146 5147 wc->level = level; 5148 wc->shared_level = -1; 5149 wc->stage = DROP_REFERENCE; 5150 wc->update_ref = update_ref; 5151 wc->keep_locks = 0; 5152 5153 while (1) { 5154 ret = walk_down_tree(trans, root, path, wc); 5155 if (ret < 0) { 5156 err = ret; 5157 break; 5158 } 5159 5160 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 5161 if (ret < 0) { 5162 err = ret; 5163 break; 5164 } 5165 5166 if (ret > 0) { 5167 BUG_ON(wc->stage != DROP_REFERENCE); 5168 break; 5169 } 5170 5171 if (wc->stage == DROP_REFERENCE) { 5172 level = wc->level; 5173 btrfs_node_key(path->nodes[level], 5174 &root_item->drop_progress, 5175 path->slots[level]); 5176 root_item->drop_level = level; 5177 } 5178 5179 BUG_ON(wc->level == 0); 5180 if (trans->transaction->in_commit || 5181 trans->transaction->delayed_refs.flushing) { 5182 ret = btrfs_update_root(trans, tree_root, 5183 &root->root_key, 5184 root_item); 5185 BUG_ON(ret); 5186 5187 btrfs_end_transaction(trans, tree_root); 5188 trans = btrfs_start_transaction(tree_root, 1); 5189 } else { 5190 unsigned long update; 5191 update = trans->delayed_ref_updates; 5192 trans->delayed_ref_updates = 0; 5193 if (update) 5194 btrfs_run_delayed_refs(trans, tree_root, 5195 update); 5196 } 5197 } 5198 btrfs_release_path(root, path); 5199 BUG_ON(err); 5200 5201 ret = btrfs_del_root(trans, tree_root, &root->root_key); 5202 BUG_ON(ret); 5203 5204 free_extent_buffer(root->node); 5205 free_extent_buffer(root->commit_root); 5206 kfree(root); 5207 out: 5208 btrfs_end_transaction(trans, tree_root); 5209 kfree(wc); 5210 btrfs_free_path(path); 5211 return err; 5212 } 5213 5214 /* 5215 * drop subtree rooted at tree block 'node'. 5216 * 5217 * NOTE: this function will unlock and release tree block 'node' 5218 */ 5219 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 5220 struct btrfs_root *root, 5221 struct extent_buffer *node, 5222 struct extent_buffer *parent) 5223 { 5224 struct btrfs_path *path; 5225 struct walk_control *wc; 5226 int level; 5227 int parent_level; 5228 int ret = 0; 5229 int wret; 5230 5231 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5232 5233 path = btrfs_alloc_path(); 5234 BUG_ON(!path); 5235 5236 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5237 BUG_ON(!wc); 5238 5239 btrfs_assert_tree_locked(parent); 5240 parent_level = btrfs_header_level(parent); 5241 extent_buffer_get(parent); 5242 path->nodes[parent_level] = parent; 5243 path->slots[parent_level] = btrfs_header_nritems(parent); 5244 5245 btrfs_assert_tree_locked(node); 5246 level = btrfs_header_level(node); 5247 path->nodes[level] = node; 5248 path->slots[level] = 0; 5249 path->locks[level] = 1; 5250 5251 wc->refs[parent_level] = 1; 5252 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5253 wc->level = level; 5254 wc->shared_level = -1; 5255 wc->stage = DROP_REFERENCE; 5256 wc->update_ref = 0; 5257 wc->keep_locks = 1; 5258 5259 while (1) { 5260 wret = walk_down_tree(trans, root, path, wc); 5261 if (wret < 0) { 5262 ret = wret; 5263 break; 5264 } 5265 5266 wret = walk_up_tree(trans, root, path, wc, parent_level); 5267 if (wret < 0) 5268 ret = wret; 5269 if (wret != 0) 5270 break; 5271 } 5272 5273 kfree(wc); 5274 btrfs_free_path(path); 5275 return ret; 5276 } 5277 5278 #if 0 5279 static unsigned long calc_ra(unsigned long start, unsigned long last, 5280 unsigned long nr) 5281 { 5282 return min(last, start + nr - 1); 5283 } 5284 5285 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 5286 u64 len) 5287 { 5288 u64 page_start; 5289 u64 page_end; 5290 unsigned long first_index; 5291 unsigned long last_index; 5292 unsigned long i; 5293 struct page *page; 5294 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5295 struct file_ra_state *ra; 5296 struct btrfs_ordered_extent *ordered; 5297 unsigned int total_read = 0; 5298 unsigned int total_dirty = 0; 5299 int ret = 0; 5300 5301 ra = kzalloc(sizeof(*ra), GFP_NOFS); 5302 5303 mutex_lock(&inode->i_mutex); 5304 first_index = start >> PAGE_CACHE_SHIFT; 5305 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 5306 5307 /* make sure the dirty trick played by the caller work */ 5308 ret = invalidate_inode_pages2_range(inode->i_mapping, 5309 first_index, last_index); 5310 if (ret) 5311 goto out_unlock; 5312 5313 file_ra_state_init(ra, inode->i_mapping); 5314 5315 for (i = first_index ; i <= last_index; i++) { 5316 if (total_read % ra->ra_pages == 0) { 5317 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 5318 calc_ra(i, last_index, ra->ra_pages)); 5319 } 5320 total_read++; 5321 again: 5322 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 5323 BUG_ON(1); 5324 page = grab_cache_page(inode->i_mapping, i); 5325 if (!page) { 5326 ret = -ENOMEM; 5327 goto out_unlock; 5328 } 5329 if (!PageUptodate(page)) { 5330 btrfs_readpage(NULL, page); 5331 lock_page(page); 5332 if (!PageUptodate(page)) { 5333 unlock_page(page); 5334 page_cache_release(page); 5335 ret = -EIO; 5336 goto out_unlock; 5337 } 5338 } 5339 wait_on_page_writeback(page); 5340 5341 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 5342 page_end = page_start + PAGE_CACHE_SIZE - 1; 5343 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5344 5345 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5346 if (ordered) { 5347 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5348 unlock_page(page); 5349 page_cache_release(page); 5350 btrfs_start_ordered_extent(inode, ordered, 1); 5351 btrfs_put_ordered_extent(ordered); 5352 goto again; 5353 } 5354 set_page_extent_mapped(page); 5355 5356 if (i == first_index) 5357 set_extent_bits(io_tree, page_start, page_end, 5358 EXTENT_BOUNDARY, GFP_NOFS); 5359 btrfs_set_extent_delalloc(inode, page_start, page_end); 5360 5361 set_page_dirty(page); 5362 total_dirty++; 5363 5364 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5365 unlock_page(page); 5366 page_cache_release(page); 5367 } 5368 5369 out_unlock: 5370 kfree(ra); 5371 mutex_unlock(&inode->i_mutex); 5372 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 5373 return ret; 5374 } 5375 5376 static noinline int relocate_data_extent(struct inode *reloc_inode, 5377 struct btrfs_key *extent_key, 5378 u64 offset) 5379 { 5380 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5381 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 5382 struct extent_map *em; 5383 u64 start = extent_key->objectid - offset; 5384 u64 end = start + extent_key->offset - 1; 5385 5386 em = alloc_extent_map(GFP_NOFS); 5387 BUG_ON(!em || IS_ERR(em)); 5388 5389 em->start = start; 5390 em->len = extent_key->offset; 5391 em->block_len = extent_key->offset; 5392 em->block_start = extent_key->objectid; 5393 em->bdev = root->fs_info->fs_devices->latest_bdev; 5394 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5395 5396 /* setup extent map to cheat btrfs_readpage */ 5397 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5398 while (1) { 5399 int ret; 5400 spin_lock(&em_tree->lock); 5401 ret = add_extent_mapping(em_tree, em); 5402 spin_unlock(&em_tree->lock); 5403 if (ret != -EEXIST) { 5404 free_extent_map(em); 5405 break; 5406 } 5407 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 5408 } 5409 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5410 5411 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 5412 } 5413 5414 struct btrfs_ref_path { 5415 u64 extent_start; 5416 u64 nodes[BTRFS_MAX_LEVEL]; 5417 u64 root_objectid; 5418 u64 root_generation; 5419 u64 owner_objectid; 5420 u32 num_refs; 5421 int lowest_level; 5422 int current_level; 5423 int shared_level; 5424 5425 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 5426 u64 new_nodes[BTRFS_MAX_LEVEL]; 5427 }; 5428 5429 struct disk_extent { 5430 u64 ram_bytes; 5431 u64 disk_bytenr; 5432 u64 disk_num_bytes; 5433 u64 offset; 5434 u64 num_bytes; 5435 u8 compression; 5436 u8 encryption; 5437 u16 other_encoding; 5438 }; 5439 5440 static int is_cowonly_root(u64 root_objectid) 5441 { 5442 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 5443 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 5444 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 5445 root_objectid == BTRFS_DEV_TREE_OBJECTID || 5446 root_objectid == BTRFS_TREE_LOG_OBJECTID || 5447 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 5448 return 1; 5449 return 0; 5450 } 5451 5452 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 5453 struct btrfs_root *extent_root, 5454 struct btrfs_ref_path *ref_path, 5455 int first_time) 5456 { 5457 struct extent_buffer *leaf; 5458 struct btrfs_path *path; 5459 struct btrfs_extent_ref *ref; 5460 struct btrfs_key key; 5461 struct btrfs_key found_key; 5462 u64 bytenr; 5463 u32 nritems; 5464 int level; 5465 int ret = 1; 5466 5467 path = btrfs_alloc_path(); 5468 if (!path) 5469 return -ENOMEM; 5470 5471 if (first_time) { 5472 ref_path->lowest_level = -1; 5473 ref_path->current_level = -1; 5474 ref_path->shared_level = -1; 5475 goto walk_up; 5476 } 5477 walk_down: 5478 level = ref_path->current_level - 1; 5479 while (level >= -1) { 5480 u64 parent; 5481 if (level < ref_path->lowest_level) 5482 break; 5483 5484 if (level >= 0) 5485 bytenr = ref_path->nodes[level]; 5486 else 5487 bytenr = ref_path->extent_start; 5488 BUG_ON(bytenr == 0); 5489 5490 parent = ref_path->nodes[level + 1]; 5491 ref_path->nodes[level + 1] = 0; 5492 ref_path->current_level = level; 5493 BUG_ON(parent == 0); 5494 5495 key.objectid = bytenr; 5496 key.offset = parent + 1; 5497 key.type = BTRFS_EXTENT_REF_KEY; 5498 5499 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5500 if (ret < 0) 5501 goto out; 5502 BUG_ON(ret == 0); 5503 5504 leaf = path->nodes[0]; 5505 nritems = btrfs_header_nritems(leaf); 5506 if (path->slots[0] >= nritems) { 5507 ret = btrfs_next_leaf(extent_root, path); 5508 if (ret < 0) 5509 goto out; 5510 if (ret > 0) 5511 goto next; 5512 leaf = path->nodes[0]; 5513 } 5514 5515 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5516 if (found_key.objectid == bytenr && 5517 found_key.type == BTRFS_EXTENT_REF_KEY) { 5518 if (level < ref_path->shared_level) 5519 ref_path->shared_level = level; 5520 goto found; 5521 } 5522 next: 5523 level--; 5524 btrfs_release_path(extent_root, path); 5525 cond_resched(); 5526 } 5527 /* reached lowest level */ 5528 ret = 1; 5529 goto out; 5530 walk_up: 5531 level = ref_path->current_level; 5532 while (level < BTRFS_MAX_LEVEL - 1) { 5533 u64 ref_objectid; 5534 5535 if (level >= 0) 5536 bytenr = ref_path->nodes[level]; 5537 else 5538 bytenr = ref_path->extent_start; 5539 5540 BUG_ON(bytenr == 0); 5541 5542 key.objectid = bytenr; 5543 key.offset = 0; 5544 key.type = BTRFS_EXTENT_REF_KEY; 5545 5546 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5547 if (ret < 0) 5548 goto out; 5549 5550 leaf = path->nodes[0]; 5551 nritems = btrfs_header_nritems(leaf); 5552 if (path->slots[0] >= nritems) { 5553 ret = btrfs_next_leaf(extent_root, path); 5554 if (ret < 0) 5555 goto out; 5556 if (ret > 0) { 5557 /* the extent was freed by someone */ 5558 if (ref_path->lowest_level == level) 5559 goto out; 5560 btrfs_release_path(extent_root, path); 5561 goto walk_down; 5562 } 5563 leaf = path->nodes[0]; 5564 } 5565 5566 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5567 if (found_key.objectid != bytenr || 5568 found_key.type != BTRFS_EXTENT_REF_KEY) { 5569 /* the extent was freed by someone */ 5570 if (ref_path->lowest_level == level) { 5571 ret = 1; 5572 goto out; 5573 } 5574 btrfs_release_path(extent_root, path); 5575 goto walk_down; 5576 } 5577 found: 5578 ref = btrfs_item_ptr(leaf, path->slots[0], 5579 struct btrfs_extent_ref); 5580 ref_objectid = btrfs_ref_objectid(leaf, ref); 5581 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 5582 if (first_time) { 5583 level = (int)ref_objectid; 5584 BUG_ON(level >= BTRFS_MAX_LEVEL); 5585 ref_path->lowest_level = level; 5586 ref_path->current_level = level; 5587 ref_path->nodes[level] = bytenr; 5588 } else { 5589 WARN_ON(ref_objectid != level); 5590 } 5591 } else { 5592 WARN_ON(level != -1); 5593 } 5594 first_time = 0; 5595 5596 if (ref_path->lowest_level == level) { 5597 ref_path->owner_objectid = ref_objectid; 5598 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 5599 } 5600 5601 /* 5602 * the block is tree root or the block isn't in reference 5603 * counted tree. 5604 */ 5605 if (found_key.objectid == found_key.offset || 5606 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 5607 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5608 ref_path->root_generation = 5609 btrfs_ref_generation(leaf, ref); 5610 if (level < 0) { 5611 /* special reference from the tree log */ 5612 ref_path->nodes[0] = found_key.offset; 5613 ref_path->current_level = 0; 5614 } 5615 ret = 0; 5616 goto out; 5617 } 5618 5619 level++; 5620 BUG_ON(ref_path->nodes[level] != 0); 5621 ref_path->nodes[level] = found_key.offset; 5622 ref_path->current_level = level; 5623 5624 /* 5625 * the reference was created in the running transaction, 5626 * no need to continue walking up. 5627 */ 5628 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 5629 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5630 ref_path->root_generation = 5631 btrfs_ref_generation(leaf, ref); 5632 ret = 0; 5633 goto out; 5634 } 5635 5636 btrfs_release_path(extent_root, path); 5637 cond_resched(); 5638 } 5639 /* reached max tree level, but no tree root found. */ 5640 BUG(); 5641 out: 5642 btrfs_free_path(path); 5643 return ret; 5644 } 5645 5646 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 5647 struct btrfs_root *extent_root, 5648 struct btrfs_ref_path *ref_path, 5649 u64 extent_start) 5650 { 5651 memset(ref_path, 0, sizeof(*ref_path)); 5652 ref_path->extent_start = extent_start; 5653 5654 return __next_ref_path(trans, extent_root, ref_path, 1); 5655 } 5656 5657 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 5658 struct btrfs_root *extent_root, 5659 struct btrfs_ref_path *ref_path) 5660 { 5661 return __next_ref_path(trans, extent_root, ref_path, 0); 5662 } 5663 5664 static noinline int get_new_locations(struct inode *reloc_inode, 5665 struct btrfs_key *extent_key, 5666 u64 offset, int no_fragment, 5667 struct disk_extent **extents, 5668 int *nr_extents) 5669 { 5670 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5671 struct btrfs_path *path; 5672 struct btrfs_file_extent_item *fi; 5673 struct extent_buffer *leaf; 5674 struct disk_extent *exts = *extents; 5675 struct btrfs_key found_key; 5676 u64 cur_pos; 5677 u64 last_byte; 5678 u32 nritems; 5679 int nr = 0; 5680 int max = *nr_extents; 5681 int ret; 5682 5683 WARN_ON(!no_fragment && *extents); 5684 if (!exts) { 5685 max = 1; 5686 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 5687 if (!exts) 5688 return -ENOMEM; 5689 } 5690 5691 path = btrfs_alloc_path(); 5692 BUG_ON(!path); 5693 5694 cur_pos = extent_key->objectid - offset; 5695 last_byte = extent_key->objectid + extent_key->offset; 5696 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 5697 cur_pos, 0); 5698 if (ret < 0) 5699 goto out; 5700 if (ret > 0) { 5701 ret = -ENOENT; 5702 goto out; 5703 } 5704 5705 while (1) { 5706 leaf = path->nodes[0]; 5707 nritems = btrfs_header_nritems(leaf); 5708 if (path->slots[0] >= nritems) { 5709 ret = btrfs_next_leaf(root, path); 5710 if (ret < 0) 5711 goto out; 5712 if (ret > 0) 5713 break; 5714 leaf = path->nodes[0]; 5715 } 5716 5717 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5718 if (found_key.offset != cur_pos || 5719 found_key.type != BTRFS_EXTENT_DATA_KEY || 5720 found_key.objectid != reloc_inode->i_ino) 5721 break; 5722 5723 fi = btrfs_item_ptr(leaf, path->slots[0], 5724 struct btrfs_file_extent_item); 5725 if (btrfs_file_extent_type(leaf, fi) != 5726 BTRFS_FILE_EXTENT_REG || 5727 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5728 break; 5729 5730 if (nr == max) { 5731 struct disk_extent *old = exts; 5732 max *= 2; 5733 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 5734 memcpy(exts, old, sizeof(*exts) * nr); 5735 if (old != *extents) 5736 kfree(old); 5737 } 5738 5739 exts[nr].disk_bytenr = 5740 btrfs_file_extent_disk_bytenr(leaf, fi); 5741 exts[nr].disk_num_bytes = 5742 btrfs_file_extent_disk_num_bytes(leaf, fi); 5743 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 5744 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5745 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 5746 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 5747 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 5748 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 5749 fi); 5750 BUG_ON(exts[nr].offset > 0); 5751 BUG_ON(exts[nr].compression || exts[nr].encryption); 5752 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 5753 5754 cur_pos += exts[nr].num_bytes; 5755 nr++; 5756 5757 if (cur_pos + offset >= last_byte) 5758 break; 5759 5760 if (no_fragment) { 5761 ret = 1; 5762 goto out; 5763 } 5764 path->slots[0]++; 5765 } 5766 5767 BUG_ON(cur_pos + offset > last_byte); 5768 if (cur_pos + offset < last_byte) { 5769 ret = -ENOENT; 5770 goto out; 5771 } 5772 ret = 0; 5773 out: 5774 btrfs_free_path(path); 5775 if (ret) { 5776 if (exts != *extents) 5777 kfree(exts); 5778 } else { 5779 *extents = exts; 5780 *nr_extents = nr; 5781 } 5782 return ret; 5783 } 5784 5785 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 5786 struct btrfs_root *root, 5787 struct btrfs_path *path, 5788 struct btrfs_key *extent_key, 5789 struct btrfs_key *leaf_key, 5790 struct btrfs_ref_path *ref_path, 5791 struct disk_extent *new_extents, 5792 int nr_extents) 5793 { 5794 struct extent_buffer *leaf; 5795 struct btrfs_file_extent_item *fi; 5796 struct inode *inode = NULL; 5797 struct btrfs_key key; 5798 u64 lock_start = 0; 5799 u64 lock_end = 0; 5800 u64 num_bytes; 5801 u64 ext_offset; 5802 u64 search_end = (u64)-1; 5803 u32 nritems; 5804 int nr_scaned = 0; 5805 int extent_locked = 0; 5806 int extent_type; 5807 int ret; 5808 5809 memcpy(&key, leaf_key, sizeof(key)); 5810 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5811 if (key.objectid < ref_path->owner_objectid || 5812 (key.objectid == ref_path->owner_objectid && 5813 key.type < BTRFS_EXTENT_DATA_KEY)) { 5814 key.objectid = ref_path->owner_objectid; 5815 key.type = BTRFS_EXTENT_DATA_KEY; 5816 key.offset = 0; 5817 } 5818 } 5819 5820 while (1) { 5821 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 5822 if (ret < 0) 5823 goto out; 5824 5825 leaf = path->nodes[0]; 5826 nritems = btrfs_header_nritems(leaf); 5827 next: 5828 if (extent_locked && ret > 0) { 5829 /* 5830 * the file extent item was modified by someone 5831 * before the extent got locked. 5832 */ 5833 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5834 lock_end, GFP_NOFS); 5835 extent_locked = 0; 5836 } 5837 5838 if (path->slots[0] >= nritems) { 5839 if (++nr_scaned > 2) 5840 break; 5841 5842 BUG_ON(extent_locked); 5843 ret = btrfs_next_leaf(root, path); 5844 if (ret < 0) 5845 goto out; 5846 if (ret > 0) 5847 break; 5848 leaf = path->nodes[0]; 5849 nritems = btrfs_header_nritems(leaf); 5850 } 5851 5852 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 5853 5854 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5855 if ((key.objectid > ref_path->owner_objectid) || 5856 (key.objectid == ref_path->owner_objectid && 5857 key.type > BTRFS_EXTENT_DATA_KEY) || 5858 key.offset >= search_end) 5859 break; 5860 } 5861 5862 if (inode && key.objectid != inode->i_ino) { 5863 BUG_ON(extent_locked); 5864 btrfs_release_path(root, path); 5865 mutex_unlock(&inode->i_mutex); 5866 iput(inode); 5867 inode = NULL; 5868 continue; 5869 } 5870 5871 if (key.type != BTRFS_EXTENT_DATA_KEY) { 5872 path->slots[0]++; 5873 ret = 1; 5874 goto next; 5875 } 5876 fi = btrfs_item_ptr(leaf, path->slots[0], 5877 struct btrfs_file_extent_item); 5878 extent_type = btrfs_file_extent_type(leaf, fi); 5879 if ((extent_type != BTRFS_FILE_EXTENT_REG && 5880 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 5881 (btrfs_file_extent_disk_bytenr(leaf, fi) != 5882 extent_key->objectid)) { 5883 path->slots[0]++; 5884 ret = 1; 5885 goto next; 5886 } 5887 5888 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5889 ext_offset = btrfs_file_extent_offset(leaf, fi); 5890 5891 if (search_end == (u64)-1) { 5892 search_end = key.offset - ext_offset + 5893 btrfs_file_extent_ram_bytes(leaf, fi); 5894 } 5895 5896 if (!extent_locked) { 5897 lock_start = key.offset; 5898 lock_end = lock_start + num_bytes - 1; 5899 } else { 5900 if (lock_start > key.offset || 5901 lock_end + 1 < key.offset + num_bytes) { 5902 unlock_extent(&BTRFS_I(inode)->io_tree, 5903 lock_start, lock_end, GFP_NOFS); 5904 extent_locked = 0; 5905 } 5906 } 5907 5908 if (!inode) { 5909 btrfs_release_path(root, path); 5910 5911 inode = btrfs_iget_locked(root->fs_info->sb, 5912 key.objectid, root); 5913 if (inode->i_state & I_NEW) { 5914 BTRFS_I(inode)->root = root; 5915 BTRFS_I(inode)->location.objectid = 5916 key.objectid; 5917 BTRFS_I(inode)->location.type = 5918 BTRFS_INODE_ITEM_KEY; 5919 BTRFS_I(inode)->location.offset = 0; 5920 btrfs_read_locked_inode(inode); 5921 unlock_new_inode(inode); 5922 } 5923 /* 5924 * some code call btrfs_commit_transaction while 5925 * holding the i_mutex, so we can't use mutex_lock 5926 * here. 5927 */ 5928 if (is_bad_inode(inode) || 5929 !mutex_trylock(&inode->i_mutex)) { 5930 iput(inode); 5931 inode = NULL; 5932 key.offset = (u64)-1; 5933 goto skip; 5934 } 5935 } 5936 5937 if (!extent_locked) { 5938 struct btrfs_ordered_extent *ordered; 5939 5940 btrfs_release_path(root, path); 5941 5942 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5943 lock_end, GFP_NOFS); 5944 ordered = btrfs_lookup_first_ordered_extent(inode, 5945 lock_end); 5946 if (ordered && 5947 ordered->file_offset <= lock_end && 5948 ordered->file_offset + ordered->len > lock_start) { 5949 unlock_extent(&BTRFS_I(inode)->io_tree, 5950 lock_start, lock_end, GFP_NOFS); 5951 btrfs_start_ordered_extent(inode, ordered, 1); 5952 btrfs_put_ordered_extent(ordered); 5953 key.offset += num_bytes; 5954 goto skip; 5955 } 5956 if (ordered) 5957 btrfs_put_ordered_extent(ordered); 5958 5959 extent_locked = 1; 5960 continue; 5961 } 5962 5963 if (nr_extents == 1) { 5964 /* update extent pointer in place */ 5965 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5966 new_extents[0].disk_bytenr); 5967 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5968 new_extents[0].disk_num_bytes); 5969 btrfs_mark_buffer_dirty(leaf); 5970 5971 btrfs_drop_extent_cache(inode, key.offset, 5972 key.offset + num_bytes - 1, 0); 5973 5974 ret = btrfs_inc_extent_ref(trans, root, 5975 new_extents[0].disk_bytenr, 5976 new_extents[0].disk_num_bytes, 5977 leaf->start, 5978 root->root_key.objectid, 5979 trans->transid, 5980 key.objectid); 5981 BUG_ON(ret); 5982 5983 ret = btrfs_free_extent(trans, root, 5984 extent_key->objectid, 5985 extent_key->offset, 5986 leaf->start, 5987 btrfs_header_owner(leaf), 5988 btrfs_header_generation(leaf), 5989 key.objectid, 0); 5990 BUG_ON(ret); 5991 5992 btrfs_release_path(root, path); 5993 key.offset += num_bytes; 5994 } else { 5995 BUG_ON(1); 5996 #if 0 5997 u64 alloc_hint; 5998 u64 extent_len; 5999 int i; 6000 /* 6001 * drop old extent pointer at first, then insert the 6002 * new pointers one bye one 6003 */ 6004 btrfs_release_path(root, path); 6005 ret = btrfs_drop_extents(trans, root, inode, key.offset, 6006 key.offset + num_bytes, 6007 key.offset, &alloc_hint); 6008 BUG_ON(ret); 6009 6010 for (i = 0; i < nr_extents; i++) { 6011 if (ext_offset >= new_extents[i].num_bytes) { 6012 ext_offset -= new_extents[i].num_bytes; 6013 continue; 6014 } 6015 extent_len = min(new_extents[i].num_bytes - 6016 ext_offset, num_bytes); 6017 6018 ret = btrfs_insert_empty_item(trans, root, 6019 path, &key, 6020 sizeof(*fi)); 6021 BUG_ON(ret); 6022 6023 leaf = path->nodes[0]; 6024 fi = btrfs_item_ptr(leaf, path->slots[0], 6025 struct btrfs_file_extent_item); 6026 btrfs_set_file_extent_generation(leaf, fi, 6027 trans->transid); 6028 btrfs_set_file_extent_type(leaf, fi, 6029 BTRFS_FILE_EXTENT_REG); 6030 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6031 new_extents[i].disk_bytenr); 6032 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6033 new_extents[i].disk_num_bytes); 6034 btrfs_set_file_extent_ram_bytes(leaf, fi, 6035 new_extents[i].ram_bytes); 6036 6037 btrfs_set_file_extent_compression(leaf, fi, 6038 new_extents[i].compression); 6039 btrfs_set_file_extent_encryption(leaf, fi, 6040 new_extents[i].encryption); 6041 btrfs_set_file_extent_other_encoding(leaf, fi, 6042 new_extents[i].other_encoding); 6043 6044 btrfs_set_file_extent_num_bytes(leaf, fi, 6045 extent_len); 6046 ext_offset += new_extents[i].offset; 6047 btrfs_set_file_extent_offset(leaf, fi, 6048 ext_offset); 6049 btrfs_mark_buffer_dirty(leaf); 6050 6051 btrfs_drop_extent_cache(inode, key.offset, 6052 key.offset + extent_len - 1, 0); 6053 6054 ret = btrfs_inc_extent_ref(trans, root, 6055 new_extents[i].disk_bytenr, 6056 new_extents[i].disk_num_bytes, 6057 leaf->start, 6058 root->root_key.objectid, 6059 trans->transid, key.objectid); 6060 BUG_ON(ret); 6061 btrfs_release_path(root, path); 6062 6063 inode_add_bytes(inode, extent_len); 6064 6065 ext_offset = 0; 6066 num_bytes -= extent_len; 6067 key.offset += extent_len; 6068 6069 if (num_bytes == 0) 6070 break; 6071 } 6072 BUG_ON(i >= nr_extents); 6073 #endif 6074 } 6075 6076 if (extent_locked) { 6077 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6078 lock_end, GFP_NOFS); 6079 extent_locked = 0; 6080 } 6081 skip: 6082 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 6083 key.offset >= search_end) 6084 break; 6085 6086 cond_resched(); 6087 } 6088 ret = 0; 6089 out: 6090 btrfs_release_path(root, path); 6091 if (inode) { 6092 mutex_unlock(&inode->i_mutex); 6093 if (extent_locked) { 6094 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6095 lock_end, GFP_NOFS); 6096 } 6097 iput(inode); 6098 } 6099 return ret; 6100 } 6101 6102 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 6103 struct btrfs_root *root, 6104 struct extent_buffer *buf, u64 orig_start) 6105 { 6106 int level; 6107 int ret; 6108 6109 BUG_ON(btrfs_header_generation(buf) != trans->transid); 6110 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6111 6112 level = btrfs_header_level(buf); 6113 if (level == 0) { 6114 struct btrfs_leaf_ref *ref; 6115 struct btrfs_leaf_ref *orig_ref; 6116 6117 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 6118 if (!orig_ref) 6119 return -ENOENT; 6120 6121 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 6122 if (!ref) { 6123 btrfs_free_leaf_ref(root, orig_ref); 6124 return -ENOMEM; 6125 } 6126 6127 ref->nritems = orig_ref->nritems; 6128 memcpy(ref->extents, orig_ref->extents, 6129 sizeof(ref->extents[0]) * ref->nritems); 6130 6131 btrfs_free_leaf_ref(root, orig_ref); 6132 6133 ref->root_gen = trans->transid; 6134 ref->bytenr = buf->start; 6135 ref->owner = btrfs_header_owner(buf); 6136 ref->generation = btrfs_header_generation(buf); 6137 6138 ret = btrfs_add_leaf_ref(root, ref, 0); 6139 WARN_ON(ret); 6140 btrfs_free_leaf_ref(root, ref); 6141 } 6142 return 0; 6143 } 6144 6145 static noinline int invalidate_extent_cache(struct btrfs_root *root, 6146 struct extent_buffer *leaf, 6147 struct btrfs_block_group_cache *group, 6148 struct btrfs_root *target_root) 6149 { 6150 struct btrfs_key key; 6151 struct inode *inode = NULL; 6152 struct btrfs_file_extent_item *fi; 6153 u64 num_bytes; 6154 u64 skip_objectid = 0; 6155 u32 nritems; 6156 u32 i; 6157 6158 nritems = btrfs_header_nritems(leaf); 6159 for (i = 0; i < nritems; i++) { 6160 btrfs_item_key_to_cpu(leaf, &key, i); 6161 if (key.objectid == skip_objectid || 6162 key.type != BTRFS_EXTENT_DATA_KEY) 6163 continue; 6164 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6165 if (btrfs_file_extent_type(leaf, fi) == 6166 BTRFS_FILE_EXTENT_INLINE) 6167 continue; 6168 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 6169 continue; 6170 if (!inode || inode->i_ino != key.objectid) { 6171 iput(inode); 6172 inode = btrfs_ilookup(target_root->fs_info->sb, 6173 key.objectid, target_root, 1); 6174 } 6175 if (!inode) { 6176 skip_objectid = key.objectid; 6177 continue; 6178 } 6179 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6180 6181 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6182 key.offset + num_bytes - 1, GFP_NOFS); 6183 btrfs_drop_extent_cache(inode, key.offset, 6184 key.offset + num_bytes - 1, 1); 6185 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6186 key.offset + num_bytes - 1, GFP_NOFS); 6187 cond_resched(); 6188 } 6189 iput(inode); 6190 return 0; 6191 } 6192 6193 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 6194 struct btrfs_root *root, 6195 struct extent_buffer *leaf, 6196 struct btrfs_block_group_cache *group, 6197 struct inode *reloc_inode) 6198 { 6199 struct btrfs_key key; 6200 struct btrfs_key extent_key; 6201 struct btrfs_file_extent_item *fi; 6202 struct btrfs_leaf_ref *ref; 6203 struct disk_extent *new_extent; 6204 u64 bytenr; 6205 u64 num_bytes; 6206 u32 nritems; 6207 u32 i; 6208 int ext_index; 6209 int nr_extent; 6210 int ret; 6211 6212 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 6213 BUG_ON(!new_extent); 6214 6215 ref = btrfs_lookup_leaf_ref(root, leaf->start); 6216 BUG_ON(!ref); 6217 6218 ext_index = -1; 6219 nritems = btrfs_header_nritems(leaf); 6220 for (i = 0; i < nritems; i++) { 6221 btrfs_item_key_to_cpu(leaf, &key, i); 6222 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 6223 continue; 6224 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6225 if (btrfs_file_extent_type(leaf, fi) == 6226 BTRFS_FILE_EXTENT_INLINE) 6227 continue; 6228 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6229 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 6230 if (bytenr == 0) 6231 continue; 6232 6233 ext_index++; 6234 if (bytenr >= group->key.objectid + group->key.offset || 6235 bytenr + num_bytes <= group->key.objectid) 6236 continue; 6237 6238 extent_key.objectid = bytenr; 6239 extent_key.offset = num_bytes; 6240 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 6241 nr_extent = 1; 6242 ret = get_new_locations(reloc_inode, &extent_key, 6243 group->key.objectid, 1, 6244 &new_extent, &nr_extent); 6245 if (ret > 0) 6246 continue; 6247 BUG_ON(ret < 0); 6248 6249 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 6250 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 6251 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 6252 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 6253 6254 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6255 new_extent->disk_bytenr); 6256 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6257 new_extent->disk_num_bytes); 6258 btrfs_mark_buffer_dirty(leaf); 6259 6260 ret = btrfs_inc_extent_ref(trans, root, 6261 new_extent->disk_bytenr, 6262 new_extent->disk_num_bytes, 6263 leaf->start, 6264 root->root_key.objectid, 6265 trans->transid, key.objectid); 6266 BUG_ON(ret); 6267 6268 ret = btrfs_free_extent(trans, root, 6269 bytenr, num_bytes, leaf->start, 6270 btrfs_header_owner(leaf), 6271 btrfs_header_generation(leaf), 6272 key.objectid, 0); 6273 BUG_ON(ret); 6274 cond_resched(); 6275 } 6276 kfree(new_extent); 6277 BUG_ON(ext_index + 1 != ref->nritems); 6278 btrfs_free_leaf_ref(root, ref); 6279 return 0; 6280 } 6281 6282 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 6283 struct btrfs_root *root) 6284 { 6285 struct btrfs_root *reloc_root; 6286 int ret; 6287 6288 if (root->reloc_root) { 6289 reloc_root = root->reloc_root; 6290 root->reloc_root = NULL; 6291 list_add(&reloc_root->dead_list, 6292 &root->fs_info->dead_reloc_roots); 6293 6294 btrfs_set_root_bytenr(&reloc_root->root_item, 6295 reloc_root->node->start); 6296 btrfs_set_root_level(&root->root_item, 6297 btrfs_header_level(reloc_root->node)); 6298 memset(&reloc_root->root_item.drop_progress, 0, 6299 sizeof(struct btrfs_disk_key)); 6300 reloc_root->root_item.drop_level = 0; 6301 6302 ret = btrfs_update_root(trans, root->fs_info->tree_root, 6303 &reloc_root->root_key, 6304 &reloc_root->root_item); 6305 BUG_ON(ret); 6306 } 6307 return 0; 6308 } 6309 6310 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 6311 { 6312 struct btrfs_trans_handle *trans; 6313 struct btrfs_root *reloc_root; 6314 struct btrfs_root *prev_root = NULL; 6315 struct list_head dead_roots; 6316 int ret; 6317 unsigned long nr; 6318 6319 INIT_LIST_HEAD(&dead_roots); 6320 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 6321 6322 while (!list_empty(&dead_roots)) { 6323 reloc_root = list_entry(dead_roots.prev, 6324 struct btrfs_root, dead_list); 6325 list_del_init(&reloc_root->dead_list); 6326 6327 BUG_ON(reloc_root->commit_root != NULL); 6328 while (1) { 6329 trans = btrfs_join_transaction(root, 1); 6330 BUG_ON(!trans); 6331 6332 mutex_lock(&root->fs_info->drop_mutex); 6333 ret = btrfs_drop_snapshot(trans, reloc_root); 6334 if (ret != -EAGAIN) 6335 break; 6336 mutex_unlock(&root->fs_info->drop_mutex); 6337 6338 nr = trans->blocks_used; 6339 ret = btrfs_end_transaction(trans, root); 6340 BUG_ON(ret); 6341 btrfs_btree_balance_dirty(root, nr); 6342 } 6343 6344 free_extent_buffer(reloc_root->node); 6345 6346 ret = btrfs_del_root(trans, root->fs_info->tree_root, 6347 &reloc_root->root_key); 6348 BUG_ON(ret); 6349 mutex_unlock(&root->fs_info->drop_mutex); 6350 6351 nr = trans->blocks_used; 6352 ret = btrfs_end_transaction(trans, root); 6353 BUG_ON(ret); 6354 btrfs_btree_balance_dirty(root, nr); 6355 6356 kfree(prev_root); 6357 prev_root = reloc_root; 6358 } 6359 if (prev_root) { 6360 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 6361 kfree(prev_root); 6362 } 6363 return 0; 6364 } 6365 6366 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 6367 { 6368 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 6369 return 0; 6370 } 6371 6372 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 6373 { 6374 struct btrfs_root *reloc_root; 6375 struct btrfs_trans_handle *trans; 6376 struct btrfs_key location; 6377 int found; 6378 int ret; 6379 6380 mutex_lock(&root->fs_info->tree_reloc_mutex); 6381 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 6382 BUG_ON(ret); 6383 found = !list_empty(&root->fs_info->dead_reloc_roots); 6384 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6385 6386 if (found) { 6387 trans = btrfs_start_transaction(root, 1); 6388 BUG_ON(!trans); 6389 ret = btrfs_commit_transaction(trans, root); 6390 BUG_ON(ret); 6391 } 6392 6393 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6394 location.offset = (u64)-1; 6395 location.type = BTRFS_ROOT_ITEM_KEY; 6396 6397 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 6398 BUG_ON(!reloc_root); 6399 btrfs_orphan_cleanup(reloc_root); 6400 return 0; 6401 } 6402 6403 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 6404 struct btrfs_root *root) 6405 { 6406 struct btrfs_root *reloc_root; 6407 struct extent_buffer *eb; 6408 struct btrfs_root_item *root_item; 6409 struct btrfs_key root_key; 6410 int ret; 6411 6412 BUG_ON(!root->ref_cows); 6413 if (root->reloc_root) 6414 return 0; 6415 6416 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 6417 BUG_ON(!root_item); 6418 6419 ret = btrfs_copy_root(trans, root, root->commit_root, 6420 &eb, BTRFS_TREE_RELOC_OBJECTID); 6421 BUG_ON(ret); 6422 6423 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 6424 root_key.offset = root->root_key.objectid; 6425 root_key.type = BTRFS_ROOT_ITEM_KEY; 6426 6427 memcpy(root_item, &root->root_item, sizeof(root_item)); 6428 btrfs_set_root_refs(root_item, 0); 6429 btrfs_set_root_bytenr(root_item, eb->start); 6430 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 6431 btrfs_set_root_generation(root_item, trans->transid); 6432 6433 btrfs_tree_unlock(eb); 6434 free_extent_buffer(eb); 6435 6436 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 6437 &root_key, root_item); 6438 BUG_ON(ret); 6439 kfree(root_item); 6440 6441 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 6442 &root_key); 6443 BUG_ON(!reloc_root); 6444 reloc_root->last_trans = trans->transid; 6445 reloc_root->commit_root = NULL; 6446 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 6447 6448 root->reloc_root = reloc_root; 6449 return 0; 6450 } 6451 6452 /* 6453 * Core function of space balance. 6454 * 6455 * The idea is using reloc trees to relocate tree blocks in reference 6456 * counted roots. There is one reloc tree for each subvol, and all 6457 * reloc trees share same root key objectid. Reloc trees are snapshots 6458 * of the latest committed roots of subvols (root->commit_root). 6459 * 6460 * To relocate a tree block referenced by a subvol, there are two steps. 6461 * COW the block through subvol's reloc tree, then update block pointer 6462 * in the subvol to point to the new block. Since all reloc trees share 6463 * same root key objectid, doing special handing for tree blocks owned 6464 * by them is easy. Once a tree block has been COWed in one reloc tree, 6465 * we can use the resulting new block directly when the same block is 6466 * required to COW again through other reloc trees. By this way, relocated 6467 * tree blocks are shared between reloc trees, so they are also shared 6468 * between subvols. 6469 */ 6470 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 6471 struct btrfs_root *root, 6472 struct btrfs_path *path, 6473 struct btrfs_key *first_key, 6474 struct btrfs_ref_path *ref_path, 6475 struct btrfs_block_group_cache *group, 6476 struct inode *reloc_inode) 6477 { 6478 struct btrfs_root *reloc_root; 6479 struct extent_buffer *eb = NULL; 6480 struct btrfs_key *keys; 6481 u64 *nodes; 6482 int level; 6483 int shared_level; 6484 int lowest_level = 0; 6485 int ret; 6486 6487 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 6488 lowest_level = ref_path->owner_objectid; 6489 6490 if (!root->ref_cows) { 6491 path->lowest_level = lowest_level; 6492 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 6493 BUG_ON(ret < 0); 6494 path->lowest_level = 0; 6495 btrfs_release_path(root, path); 6496 return 0; 6497 } 6498 6499 mutex_lock(&root->fs_info->tree_reloc_mutex); 6500 ret = init_reloc_tree(trans, root); 6501 BUG_ON(ret); 6502 reloc_root = root->reloc_root; 6503 6504 shared_level = ref_path->shared_level; 6505 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 6506 6507 keys = ref_path->node_keys; 6508 nodes = ref_path->new_nodes; 6509 memset(&keys[shared_level + 1], 0, 6510 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6511 memset(&nodes[shared_level + 1], 0, 6512 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6513 6514 if (nodes[lowest_level] == 0) { 6515 path->lowest_level = lowest_level; 6516 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6517 0, 1); 6518 BUG_ON(ret); 6519 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 6520 eb = path->nodes[level]; 6521 if (!eb || eb == reloc_root->node) 6522 break; 6523 nodes[level] = eb->start; 6524 if (level == 0) 6525 btrfs_item_key_to_cpu(eb, &keys[level], 0); 6526 else 6527 btrfs_node_key_to_cpu(eb, &keys[level], 0); 6528 } 6529 if (nodes[0] && 6530 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6531 eb = path->nodes[0]; 6532 ret = replace_extents_in_leaf(trans, reloc_root, eb, 6533 group, reloc_inode); 6534 BUG_ON(ret); 6535 } 6536 btrfs_release_path(reloc_root, path); 6537 } else { 6538 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 6539 lowest_level); 6540 BUG_ON(ret); 6541 } 6542 6543 /* 6544 * replace tree blocks in the fs tree with tree blocks in 6545 * the reloc tree. 6546 */ 6547 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 6548 BUG_ON(ret < 0); 6549 6550 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6551 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6552 0, 0); 6553 BUG_ON(ret); 6554 extent_buffer_get(path->nodes[0]); 6555 eb = path->nodes[0]; 6556 btrfs_release_path(reloc_root, path); 6557 ret = invalidate_extent_cache(reloc_root, eb, group, root); 6558 BUG_ON(ret); 6559 free_extent_buffer(eb); 6560 } 6561 6562 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6563 path->lowest_level = 0; 6564 return 0; 6565 } 6566 6567 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 6568 struct btrfs_root *root, 6569 struct btrfs_path *path, 6570 struct btrfs_key *first_key, 6571 struct btrfs_ref_path *ref_path) 6572 { 6573 int ret; 6574 6575 ret = relocate_one_path(trans, root, path, first_key, 6576 ref_path, NULL, NULL); 6577 BUG_ON(ret); 6578 6579 return 0; 6580 } 6581 6582 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 6583 struct btrfs_root *extent_root, 6584 struct btrfs_path *path, 6585 struct btrfs_key *extent_key) 6586 { 6587 int ret; 6588 6589 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 6590 if (ret) 6591 goto out; 6592 ret = btrfs_del_item(trans, extent_root, path); 6593 out: 6594 btrfs_release_path(extent_root, path); 6595 return ret; 6596 } 6597 6598 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 6599 struct btrfs_ref_path *ref_path) 6600 { 6601 struct btrfs_key root_key; 6602 6603 root_key.objectid = ref_path->root_objectid; 6604 root_key.type = BTRFS_ROOT_ITEM_KEY; 6605 if (is_cowonly_root(ref_path->root_objectid)) 6606 root_key.offset = 0; 6607 else 6608 root_key.offset = (u64)-1; 6609 6610 return btrfs_read_fs_root_no_name(fs_info, &root_key); 6611 } 6612 6613 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 6614 struct btrfs_path *path, 6615 struct btrfs_key *extent_key, 6616 struct btrfs_block_group_cache *group, 6617 struct inode *reloc_inode, int pass) 6618 { 6619 struct btrfs_trans_handle *trans; 6620 struct btrfs_root *found_root; 6621 struct btrfs_ref_path *ref_path = NULL; 6622 struct disk_extent *new_extents = NULL; 6623 int nr_extents = 0; 6624 int loops; 6625 int ret; 6626 int level; 6627 struct btrfs_key first_key; 6628 u64 prev_block = 0; 6629 6630 6631 trans = btrfs_start_transaction(extent_root, 1); 6632 BUG_ON(!trans); 6633 6634 if (extent_key->objectid == 0) { 6635 ret = del_extent_zero(trans, extent_root, path, extent_key); 6636 goto out; 6637 } 6638 6639 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 6640 if (!ref_path) { 6641 ret = -ENOMEM; 6642 goto out; 6643 } 6644 6645 for (loops = 0; ; loops++) { 6646 if (loops == 0) { 6647 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 6648 extent_key->objectid); 6649 } else { 6650 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 6651 } 6652 if (ret < 0) 6653 goto out; 6654 if (ret > 0) 6655 break; 6656 6657 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 6658 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 6659 continue; 6660 6661 found_root = read_ref_root(extent_root->fs_info, ref_path); 6662 BUG_ON(!found_root); 6663 /* 6664 * for reference counted tree, only process reference paths 6665 * rooted at the latest committed root. 6666 */ 6667 if (found_root->ref_cows && 6668 ref_path->root_generation != found_root->root_key.offset) 6669 continue; 6670 6671 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6672 if (pass == 0) { 6673 /* 6674 * copy data extents to new locations 6675 */ 6676 u64 group_start = group->key.objectid; 6677 ret = relocate_data_extent(reloc_inode, 6678 extent_key, 6679 group_start); 6680 if (ret < 0) 6681 goto out; 6682 break; 6683 } 6684 level = 0; 6685 } else { 6686 level = ref_path->owner_objectid; 6687 } 6688 6689 if (prev_block != ref_path->nodes[level]) { 6690 struct extent_buffer *eb; 6691 u64 block_start = ref_path->nodes[level]; 6692 u64 block_size = btrfs_level_size(found_root, level); 6693 6694 eb = read_tree_block(found_root, block_start, 6695 block_size, 0); 6696 btrfs_tree_lock(eb); 6697 BUG_ON(level != btrfs_header_level(eb)); 6698 6699 if (level == 0) 6700 btrfs_item_key_to_cpu(eb, &first_key, 0); 6701 else 6702 btrfs_node_key_to_cpu(eb, &first_key, 0); 6703 6704 btrfs_tree_unlock(eb); 6705 free_extent_buffer(eb); 6706 prev_block = block_start; 6707 } 6708 6709 mutex_lock(&extent_root->fs_info->trans_mutex); 6710 btrfs_record_root_in_trans(found_root); 6711 mutex_unlock(&extent_root->fs_info->trans_mutex); 6712 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6713 /* 6714 * try to update data extent references while 6715 * keeping metadata shared between snapshots. 6716 */ 6717 if (pass == 1) { 6718 ret = relocate_one_path(trans, found_root, 6719 path, &first_key, ref_path, 6720 group, reloc_inode); 6721 if (ret < 0) 6722 goto out; 6723 continue; 6724 } 6725 /* 6726 * use fallback method to process the remaining 6727 * references. 6728 */ 6729 if (!new_extents) { 6730 u64 group_start = group->key.objectid; 6731 new_extents = kmalloc(sizeof(*new_extents), 6732 GFP_NOFS); 6733 nr_extents = 1; 6734 ret = get_new_locations(reloc_inode, 6735 extent_key, 6736 group_start, 1, 6737 &new_extents, 6738 &nr_extents); 6739 if (ret) 6740 goto out; 6741 } 6742 ret = replace_one_extent(trans, found_root, 6743 path, extent_key, 6744 &first_key, ref_path, 6745 new_extents, nr_extents); 6746 } else { 6747 ret = relocate_tree_block(trans, found_root, path, 6748 &first_key, ref_path); 6749 } 6750 if (ret < 0) 6751 goto out; 6752 } 6753 ret = 0; 6754 out: 6755 btrfs_end_transaction(trans, extent_root); 6756 kfree(new_extents); 6757 kfree(ref_path); 6758 return ret; 6759 } 6760 #endif 6761 6762 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6763 { 6764 u64 num_devices; 6765 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6766 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6767 6768 num_devices = root->fs_info->fs_devices->rw_devices; 6769 if (num_devices == 1) { 6770 stripped |= BTRFS_BLOCK_GROUP_DUP; 6771 stripped = flags & ~stripped; 6772 6773 /* turn raid0 into single device chunks */ 6774 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6775 return stripped; 6776 6777 /* turn mirroring into duplication */ 6778 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6779 BTRFS_BLOCK_GROUP_RAID10)) 6780 return stripped | BTRFS_BLOCK_GROUP_DUP; 6781 return flags; 6782 } else { 6783 /* they already had raid on here, just return */ 6784 if (flags & stripped) 6785 return flags; 6786 6787 stripped |= BTRFS_BLOCK_GROUP_DUP; 6788 stripped = flags & ~stripped; 6789 6790 /* switch duplicated blocks with raid1 */ 6791 if (flags & BTRFS_BLOCK_GROUP_DUP) 6792 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6793 6794 /* turn single device chunks into raid0 */ 6795 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6796 } 6797 return flags; 6798 } 6799 6800 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 6801 struct btrfs_block_group_cache *shrink_block_group, 6802 int force) 6803 { 6804 struct btrfs_trans_handle *trans; 6805 u64 new_alloc_flags; 6806 u64 calc; 6807 6808 spin_lock(&shrink_block_group->lock); 6809 if (btrfs_block_group_used(&shrink_block_group->item) + 6810 shrink_block_group->reserved > 0) { 6811 spin_unlock(&shrink_block_group->lock); 6812 6813 trans = btrfs_start_transaction(root, 1); 6814 spin_lock(&shrink_block_group->lock); 6815 6816 new_alloc_flags = update_block_group_flags(root, 6817 shrink_block_group->flags); 6818 if (new_alloc_flags != shrink_block_group->flags) { 6819 calc = 6820 btrfs_block_group_used(&shrink_block_group->item); 6821 } else { 6822 calc = shrink_block_group->key.offset; 6823 } 6824 spin_unlock(&shrink_block_group->lock); 6825 6826 do_chunk_alloc(trans, root->fs_info->extent_root, 6827 calc + 2 * 1024 * 1024, new_alloc_flags, force); 6828 6829 btrfs_end_transaction(trans, root); 6830 } else 6831 spin_unlock(&shrink_block_group->lock); 6832 return 0; 6833 } 6834 6835 6836 int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 6837 struct btrfs_block_group_cache *group) 6838 6839 { 6840 __alloc_chunk_for_shrink(root, group, 1); 6841 set_block_group_readonly(group); 6842 return 0; 6843 } 6844 6845 #if 0 6846 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6847 struct btrfs_root *root, 6848 u64 objectid, u64 size) 6849 { 6850 struct btrfs_path *path; 6851 struct btrfs_inode_item *item; 6852 struct extent_buffer *leaf; 6853 int ret; 6854 6855 path = btrfs_alloc_path(); 6856 if (!path) 6857 return -ENOMEM; 6858 6859 path->leave_spinning = 1; 6860 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 6861 if (ret) 6862 goto out; 6863 6864 leaf = path->nodes[0]; 6865 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 6866 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 6867 btrfs_set_inode_generation(leaf, item, 1); 6868 btrfs_set_inode_size(leaf, item, size); 6869 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 6870 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 6871 btrfs_mark_buffer_dirty(leaf); 6872 btrfs_release_path(root, path); 6873 out: 6874 btrfs_free_path(path); 6875 return ret; 6876 } 6877 6878 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 6879 struct btrfs_block_group_cache *group) 6880 { 6881 struct inode *inode = NULL; 6882 struct btrfs_trans_handle *trans; 6883 struct btrfs_root *root; 6884 struct btrfs_key root_key; 6885 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 6886 int err = 0; 6887 6888 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6889 root_key.type = BTRFS_ROOT_ITEM_KEY; 6890 root_key.offset = (u64)-1; 6891 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 6892 if (IS_ERR(root)) 6893 return ERR_CAST(root); 6894 6895 trans = btrfs_start_transaction(root, 1); 6896 BUG_ON(!trans); 6897 6898 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 6899 if (err) 6900 goto out; 6901 6902 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 6903 BUG_ON(err); 6904 6905 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 6906 group->key.offset, 0, group->key.offset, 6907 0, 0, 0); 6908 BUG_ON(err); 6909 6910 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); 6911 if (inode->i_state & I_NEW) { 6912 BTRFS_I(inode)->root = root; 6913 BTRFS_I(inode)->location.objectid = objectid; 6914 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 6915 BTRFS_I(inode)->location.offset = 0; 6916 btrfs_read_locked_inode(inode); 6917 unlock_new_inode(inode); 6918 BUG_ON(is_bad_inode(inode)); 6919 } else { 6920 BUG_ON(1); 6921 } 6922 BTRFS_I(inode)->index_cnt = group->key.objectid; 6923 6924 err = btrfs_orphan_add(trans, inode); 6925 out: 6926 btrfs_end_transaction(trans, root); 6927 if (err) { 6928 if (inode) 6929 iput(inode); 6930 inode = ERR_PTR(err); 6931 } 6932 return inode; 6933 } 6934 6935 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 6936 { 6937 6938 struct btrfs_ordered_sum *sums; 6939 struct btrfs_sector_sum *sector_sum; 6940 struct btrfs_ordered_extent *ordered; 6941 struct btrfs_root *root = BTRFS_I(inode)->root; 6942 struct list_head list; 6943 size_t offset; 6944 int ret; 6945 u64 disk_bytenr; 6946 6947 INIT_LIST_HEAD(&list); 6948 6949 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 6950 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 6951 6952 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 6953 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 6954 disk_bytenr + len - 1, &list); 6955 6956 while (!list_empty(&list)) { 6957 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 6958 list_del_init(&sums->list); 6959 6960 sector_sum = sums->sums; 6961 sums->bytenr = ordered->start; 6962 6963 offset = 0; 6964 while (offset < sums->len) { 6965 sector_sum->bytenr += ordered->start - disk_bytenr; 6966 sector_sum++; 6967 offset += root->sectorsize; 6968 } 6969 6970 btrfs_add_ordered_sum(inode, ordered, sums); 6971 } 6972 btrfs_put_ordered_extent(ordered); 6973 return 0; 6974 } 6975 6976 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) 6977 { 6978 struct btrfs_trans_handle *trans; 6979 struct btrfs_path *path; 6980 struct btrfs_fs_info *info = root->fs_info; 6981 struct extent_buffer *leaf; 6982 struct inode *reloc_inode; 6983 struct btrfs_block_group_cache *block_group; 6984 struct btrfs_key key; 6985 u64 skipped; 6986 u64 cur_byte; 6987 u64 total_found; 6988 u32 nritems; 6989 int ret; 6990 int progress; 6991 int pass = 0; 6992 6993 root = root->fs_info->extent_root; 6994 6995 block_group = btrfs_lookup_block_group(info, group_start); 6996 BUG_ON(!block_group); 6997 6998 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", 6999 (unsigned long long)block_group->key.objectid, 7000 (unsigned long long)block_group->flags); 7001 7002 path = btrfs_alloc_path(); 7003 BUG_ON(!path); 7004 7005 reloc_inode = create_reloc_inode(info, block_group); 7006 BUG_ON(IS_ERR(reloc_inode)); 7007 7008 __alloc_chunk_for_shrink(root, block_group, 1); 7009 set_block_group_readonly(block_group); 7010 7011 btrfs_start_delalloc_inodes(info->tree_root); 7012 btrfs_wait_ordered_extents(info->tree_root, 0); 7013 again: 7014 skipped = 0; 7015 total_found = 0; 7016 progress = 0; 7017 key.objectid = block_group->key.objectid; 7018 key.offset = 0; 7019 key.type = 0; 7020 cur_byte = key.objectid; 7021 7022 trans = btrfs_start_transaction(info->tree_root, 1); 7023 btrfs_commit_transaction(trans, info->tree_root); 7024 7025 mutex_lock(&root->fs_info->cleaner_mutex); 7026 btrfs_clean_old_snapshots(info->tree_root); 7027 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 7028 mutex_unlock(&root->fs_info->cleaner_mutex); 7029 7030 trans = btrfs_start_transaction(info->tree_root, 1); 7031 btrfs_commit_transaction(trans, info->tree_root); 7032 7033 while (1) { 7034 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 7035 if (ret < 0) 7036 goto out; 7037 next: 7038 leaf = path->nodes[0]; 7039 nritems = btrfs_header_nritems(leaf); 7040 if (path->slots[0] >= nritems) { 7041 ret = btrfs_next_leaf(root, path); 7042 if (ret < 0) 7043 goto out; 7044 if (ret == 1) { 7045 ret = 0; 7046 break; 7047 } 7048 leaf = path->nodes[0]; 7049 nritems = btrfs_header_nritems(leaf); 7050 } 7051 7052 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 7053 7054 if (key.objectid >= block_group->key.objectid + 7055 block_group->key.offset) 7056 break; 7057 7058 if (progress && need_resched()) { 7059 btrfs_release_path(root, path); 7060 cond_resched(); 7061 progress = 0; 7062 continue; 7063 } 7064 progress = 1; 7065 7066 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || 7067 key.objectid + key.offset <= cur_byte) { 7068 path->slots[0]++; 7069 goto next; 7070 } 7071 7072 total_found++; 7073 cur_byte = key.objectid + key.offset; 7074 btrfs_release_path(root, path); 7075 7076 __alloc_chunk_for_shrink(root, block_group, 0); 7077 ret = relocate_one_extent(root, path, &key, block_group, 7078 reloc_inode, pass); 7079 BUG_ON(ret < 0); 7080 if (ret > 0) 7081 skipped++; 7082 7083 key.objectid = cur_byte; 7084 key.type = 0; 7085 key.offset = 0; 7086 } 7087 7088 btrfs_release_path(root, path); 7089 7090 if (pass == 0) { 7091 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); 7092 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); 7093 } 7094 7095 if (total_found > 0) { 7096 printk(KERN_INFO "btrfs found %llu extents in pass %d\n", 7097 (unsigned long long)total_found, pass); 7098 pass++; 7099 if (total_found == skipped && pass > 2) { 7100 iput(reloc_inode); 7101 reloc_inode = create_reloc_inode(info, block_group); 7102 pass = 0; 7103 } 7104 goto again; 7105 } 7106 7107 /* delete reloc_inode */ 7108 iput(reloc_inode); 7109 7110 /* unpin extents in this range */ 7111 trans = btrfs_start_transaction(info->tree_root, 1); 7112 btrfs_commit_transaction(trans, info->tree_root); 7113 7114 spin_lock(&block_group->lock); 7115 WARN_ON(block_group->pinned > 0); 7116 WARN_ON(block_group->reserved > 0); 7117 WARN_ON(btrfs_block_group_used(&block_group->item) > 0); 7118 spin_unlock(&block_group->lock); 7119 btrfs_put_block_group(block_group); 7120 ret = 0; 7121 out: 7122 btrfs_free_path(path); 7123 return ret; 7124 } 7125 #endif 7126 7127 static int find_first_block_group(struct btrfs_root *root, 7128 struct btrfs_path *path, struct btrfs_key *key) 7129 { 7130 int ret = 0; 7131 struct btrfs_key found_key; 7132 struct extent_buffer *leaf; 7133 int slot; 7134 7135 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 7136 if (ret < 0) 7137 goto out; 7138 7139 while (1) { 7140 slot = path->slots[0]; 7141 leaf = path->nodes[0]; 7142 if (slot >= btrfs_header_nritems(leaf)) { 7143 ret = btrfs_next_leaf(root, path); 7144 if (ret == 0) 7145 continue; 7146 if (ret < 0) 7147 goto out; 7148 break; 7149 } 7150 btrfs_item_key_to_cpu(leaf, &found_key, slot); 7151 7152 if (found_key.objectid >= key->objectid && 7153 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 7154 ret = 0; 7155 goto out; 7156 } 7157 path->slots[0]++; 7158 } 7159 ret = -ENOENT; 7160 out: 7161 return ret; 7162 } 7163 7164 int btrfs_free_block_groups(struct btrfs_fs_info *info) 7165 { 7166 struct btrfs_block_group_cache *block_group; 7167 struct btrfs_space_info *space_info; 7168 struct rb_node *n; 7169 7170 spin_lock(&info->block_group_cache_lock); 7171 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 7172 block_group = rb_entry(n, struct btrfs_block_group_cache, 7173 cache_node); 7174 rb_erase(&block_group->cache_node, 7175 &info->block_group_cache_tree); 7176 spin_unlock(&info->block_group_cache_lock); 7177 7178 down_write(&block_group->space_info->groups_sem); 7179 list_del(&block_group->list); 7180 up_write(&block_group->space_info->groups_sem); 7181 7182 if (block_group->cached == BTRFS_CACHE_STARTED) 7183 wait_event(block_group->caching_q, 7184 block_group_cache_done(block_group)); 7185 7186 btrfs_remove_free_space_cache(block_group); 7187 7188 WARN_ON(atomic_read(&block_group->count) != 1); 7189 kfree(block_group); 7190 7191 spin_lock(&info->block_group_cache_lock); 7192 } 7193 spin_unlock(&info->block_group_cache_lock); 7194 7195 /* now that all the block groups are freed, go through and 7196 * free all the space_info structs. This is only called during 7197 * the final stages of unmount, and so we know nobody is 7198 * using them. We call synchronize_rcu() once before we start, 7199 * just to be on the safe side. 7200 */ 7201 synchronize_rcu(); 7202 7203 while(!list_empty(&info->space_info)) { 7204 space_info = list_entry(info->space_info.next, 7205 struct btrfs_space_info, 7206 list); 7207 7208 list_del(&space_info->list); 7209 kfree(space_info); 7210 } 7211 return 0; 7212 } 7213 7214 int btrfs_read_block_groups(struct btrfs_root *root) 7215 { 7216 struct btrfs_path *path; 7217 int ret; 7218 struct btrfs_block_group_cache *cache; 7219 struct btrfs_fs_info *info = root->fs_info; 7220 struct btrfs_space_info *space_info; 7221 struct btrfs_key key; 7222 struct btrfs_key found_key; 7223 struct extent_buffer *leaf; 7224 7225 root = info->extent_root; 7226 key.objectid = 0; 7227 key.offset = 0; 7228 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 7229 path = btrfs_alloc_path(); 7230 if (!path) 7231 return -ENOMEM; 7232 7233 while (1) { 7234 ret = find_first_block_group(root, path, &key); 7235 if (ret > 0) { 7236 ret = 0; 7237 goto error; 7238 } 7239 if (ret != 0) 7240 goto error; 7241 7242 leaf = path->nodes[0]; 7243 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 7244 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7245 if (!cache) { 7246 ret = -ENOMEM; 7247 break; 7248 } 7249 7250 atomic_set(&cache->count, 1); 7251 spin_lock_init(&cache->lock); 7252 spin_lock_init(&cache->tree_lock); 7253 cache->fs_info = info; 7254 init_waitqueue_head(&cache->caching_q); 7255 INIT_LIST_HEAD(&cache->list); 7256 INIT_LIST_HEAD(&cache->cluster_list); 7257 7258 /* 7259 * we only want to have 32k of ram per block group for keeping 7260 * track of free space, and if we pass 1/2 of that we want to 7261 * start converting things over to using bitmaps 7262 */ 7263 cache->extents_thresh = ((1024 * 32) / 2) / 7264 sizeof(struct btrfs_free_space); 7265 7266 read_extent_buffer(leaf, &cache->item, 7267 btrfs_item_ptr_offset(leaf, path->slots[0]), 7268 sizeof(cache->item)); 7269 memcpy(&cache->key, &found_key, sizeof(found_key)); 7270 7271 key.objectid = found_key.objectid + found_key.offset; 7272 btrfs_release_path(root, path); 7273 cache->flags = btrfs_block_group_flags(&cache->item); 7274 cache->sectorsize = root->sectorsize; 7275 7276 remove_sb_from_cache(root, cache); 7277 7278 /* 7279 * check for two cases, either we are full, and therefore 7280 * don't need to bother with the caching work since we won't 7281 * find any space, or we are empty, and we can just add all 7282 * the space in and be done with it. This saves us _alot_ of 7283 * time, particularly in the full case. 7284 */ 7285 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7286 cache->cached = BTRFS_CACHE_FINISHED; 7287 } else if (btrfs_block_group_used(&cache->item) == 0) { 7288 cache->cached = BTRFS_CACHE_FINISHED; 7289 add_new_free_space(cache, root->fs_info, 7290 found_key.objectid, 7291 found_key.objectid + 7292 found_key.offset); 7293 } 7294 7295 ret = update_space_info(info, cache->flags, found_key.offset, 7296 btrfs_block_group_used(&cache->item), 7297 &space_info); 7298 BUG_ON(ret); 7299 cache->space_info = space_info; 7300 down_write(&space_info->groups_sem); 7301 list_add_tail(&cache->list, &space_info->block_groups); 7302 up_write(&space_info->groups_sem); 7303 7304 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7305 BUG_ON(ret); 7306 7307 set_avail_alloc_bits(root->fs_info, cache->flags); 7308 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7309 set_block_group_readonly(cache); 7310 } 7311 ret = 0; 7312 error: 7313 btrfs_free_path(path); 7314 return ret; 7315 } 7316 7317 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7318 struct btrfs_root *root, u64 bytes_used, 7319 u64 type, u64 chunk_objectid, u64 chunk_offset, 7320 u64 size) 7321 { 7322 int ret; 7323 struct btrfs_root *extent_root; 7324 struct btrfs_block_group_cache *cache; 7325 7326 extent_root = root->fs_info->extent_root; 7327 7328 root->fs_info->last_trans_log_full_commit = trans->transid; 7329 7330 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7331 if (!cache) 7332 return -ENOMEM; 7333 7334 cache->key.objectid = chunk_offset; 7335 cache->key.offset = size; 7336 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7337 cache->sectorsize = root->sectorsize; 7338 7339 /* 7340 * we only want to have 32k of ram per block group for keeping track 7341 * of free space, and if we pass 1/2 of that we want to start 7342 * converting things over to using bitmaps 7343 */ 7344 cache->extents_thresh = ((1024 * 32) / 2) / 7345 sizeof(struct btrfs_free_space); 7346 atomic_set(&cache->count, 1); 7347 spin_lock_init(&cache->lock); 7348 spin_lock_init(&cache->tree_lock); 7349 init_waitqueue_head(&cache->caching_q); 7350 INIT_LIST_HEAD(&cache->list); 7351 INIT_LIST_HEAD(&cache->cluster_list); 7352 7353 btrfs_set_block_group_used(&cache->item, bytes_used); 7354 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7355 cache->flags = type; 7356 btrfs_set_block_group_flags(&cache->item, type); 7357 7358 cache->cached = BTRFS_CACHE_FINISHED; 7359 remove_sb_from_cache(root, cache); 7360 7361 add_new_free_space(cache, root->fs_info, chunk_offset, 7362 chunk_offset + size); 7363 7364 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7365 &cache->space_info); 7366 BUG_ON(ret); 7367 down_write(&cache->space_info->groups_sem); 7368 list_add_tail(&cache->list, &cache->space_info->block_groups); 7369 up_write(&cache->space_info->groups_sem); 7370 7371 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7372 BUG_ON(ret); 7373 7374 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7375 sizeof(cache->item)); 7376 BUG_ON(ret); 7377 7378 set_avail_alloc_bits(extent_root->fs_info, type); 7379 7380 return 0; 7381 } 7382 7383 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7384 struct btrfs_root *root, u64 group_start) 7385 { 7386 struct btrfs_path *path; 7387 struct btrfs_block_group_cache *block_group; 7388 struct btrfs_free_cluster *cluster; 7389 struct btrfs_key key; 7390 int ret; 7391 7392 root = root->fs_info->extent_root; 7393 7394 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7395 BUG_ON(!block_group); 7396 BUG_ON(!block_group->ro); 7397 7398 memcpy(&key, &block_group->key, sizeof(key)); 7399 7400 /* make sure this block group isn't part of an allocation cluster */ 7401 cluster = &root->fs_info->data_alloc_cluster; 7402 spin_lock(&cluster->refill_lock); 7403 btrfs_return_cluster_to_free_space(block_group, cluster); 7404 spin_unlock(&cluster->refill_lock); 7405 7406 /* 7407 * make sure this block group isn't part of a metadata 7408 * allocation cluster 7409 */ 7410 cluster = &root->fs_info->meta_alloc_cluster; 7411 spin_lock(&cluster->refill_lock); 7412 btrfs_return_cluster_to_free_space(block_group, cluster); 7413 spin_unlock(&cluster->refill_lock); 7414 7415 path = btrfs_alloc_path(); 7416 BUG_ON(!path); 7417 7418 spin_lock(&root->fs_info->block_group_cache_lock); 7419 rb_erase(&block_group->cache_node, 7420 &root->fs_info->block_group_cache_tree); 7421 spin_unlock(&root->fs_info->block_group_cache_lock); 7422 7423 down_write(&block_group->space_info->groups_sem); 7424 /* 7425 * we must use list_del_init so people can check to see if they 7426 * are still on the list after taking the semaphore 7427 */ 7428 list_del_init(&block_group->list); 7429 up_write(&block_group->space_info->groups_sem); 7430 7431 if (block_group->cached == BTRFS_CACHE_STARTED) 7432 wait_event(block_group->caching_q, 7433 block_group_cache_done(block_group)); 7434 7435 btrfs_remove_free_space_cache(block_group); 7436 7437 spin_lock(&block_group->space_info->lock); 7438 block_group->space_info->total_bytes -= block_group->key.offset; 7439 block_group->space_info->bytes_readonly -= block_group->key.offset; 7440 spin_unlock(&block_group->space_info->lock); 7441 7442 btrfs_clear_space_info_full(root->fs_info); 7443 7444 btrfs_put_block_group(block_group); 7445 btrfs_put_block_group(block_group); 7446 7447 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7448 if (ret > 0) 7449 ret = -EIO; 7450 if (ret < 0) 7451 goto out; 7452 7453 ret = btrfs_del_item(trans, root, path); 7454 out: 7455 btrfs_free_path(path); 7456 return ret; 7457 } 7458