1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include <linux/slab.h> 26 #include "compat.h" 27 #include "hash.h" 28 #include "ctree.h" 29 #include "disk-io.h" 30 #include "print-tree.h" 31 #include "transaction.h" 32 #include "volumes.h" 33 #include "locking.h" 34 #include "free-space-cache.h" 35 36 /* control flags for do_chunk_alloc's force field 37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk 38 * if we really need one. 39 * 40 * CHUNK_ALLOC_FORCE means it must try to allocate one 41 * 42 * CHUNK_ALLOC_LIMITED means to only try and allocate one 43 * if we have very few chunks already allocated. This is 44 * used as part of the clustering code to help make sure 45 * we have a good pool of storage to cluster in, without 46 * filling the FS with empty chunks 47 * 48 */ 49 enum { 50 CHUNK_ALLOC_NO_FORCE = 0, 51 CHUNK_ALLOC_FORCE = 1, 52 CHUNK_ALLOC_LIMITED = 2, 53 }; 54 55 static int update_block_group(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 bytenr, u64 num_bytes, int alloc); 58 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 59 struct btrfs_root *root, 60 u64 bytenr, u64 num_bytes, u64 parent, 61 u64 root_objectid, u64 owner_objectid, 62 u64 owner_offset, int refs_to_drop, 63 struct btrfs_delayed_extent_op *extra_op); 64 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 65 struct extent_buffer *leaf, 66 struct btrfs_extent_item *ei); 67 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 68 struct btrfs_root *root, 69 u64 parent, u64 root_objectid, 70 u64 flags, u64 owner, u64 offset, 71 struct btrfs_key *ins, int ref_mod); 72 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 73 struct btrfs_root *root, 74 u64 parent, u64 root_objectid, 75 u64 flags, struct btrfs_disk_key *key, 76 int level, struct btrfs_key *ins); 77 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 78 struct btrfs_root *extent_root, u64 alloc_bytes, 79 u64 flags, int force); 80 static int find_next_key(struct btrfs_path *path, int level, 81 struct btrfs_key *key); 82 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 83 int dump_block_groups); 84 85 static noinline int 86 block_group_cache_done(struct btrfs_block_group_cache *cache) 87 { 88 smp_mb(); 89 return cache->cached == BTRFS_CACHE_FINISHED; 90 } 91 92 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 93 { 94 return (cache->flags & bits) == bits; 95 } 96 97 static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 98 { 99 atomic_inc(&cache->count); 100 } 101 102 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 103 { 104 if (atomic_dec_and_test(&cache->count)) { 105 WARN_ON(cache->pinned > 0); 106 WARN_ON(cache->reserved > 0); 107 WARN_ON(cache->reserved_pinned > 0); 108 kfree(cache->free_space_ctl); 109 kfree(cache); 110 } 111 } 112 113 /* 114 * this adds the block group to the fs_info rb tree for the block group 115 * cache 116 */ 117 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 118 struct btrfs_block_group_cache *block_group) 119 { 120 struct rb_node **p; 121 struct rb_node *parent = NULL; 122 struct btrfs_block_group_cache *cache; 123 124 spin_lock(&info->block_group_cache_lock); 125 p = &info->block_group_cache_tree.rb_node; 126 127 while (*p) { 128 parent = *p; 129 cache = rb_entry(parent, struct btrfs_block_group_cache, 130 cache_node); 131 if (block_group->key.objectid < cache->key.objectid) { 132 p = &(*p)->rb_left; 133 } else if (block_group->key.objectid > cache->key.objectid) { 134 p = &(*p)->rb_right; 135 } else { 136 spin_unlock(&info->block_group_cache_lock); 137 return -EEXIST; 138 } 139 } 140 141 rb_link_node(&block_group->cache_node, parent, p); 142 rb_insert_color(&block_group->cache_node, 143 &info->block_group_cache_tree); 144 spin_unlock(&info->block_group_cache_lock); 145 146 return 0; 147 } 148 149 /* 150 * This will return the block group at or after bytenr if contains is 0, else 151 * it will return the block group that contains the bytenr 152 */ 153 static struct btrfs_block_group_cache * 154 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 155 int contains) 156 { 157 struct btrfs_block_group_cache *cache, *ret = NULL; 158 struct rb_node *n; 159 u64 end, start; 160 161 spin_lock(&info->block_group_cache_lock); 162 n = info->block_group_cache_tree.rb_node; 163 164 while (n) { 165 cache = rb_entry(n, struct btrfs_block_group_cache, 166 cache_node); 167 end = cache->key.objectid + cache->key.offset - 1; 168 start = cache->key.objectid; 169 170 if (bytenr < start) { 171 if (!contains && (!ret || start < ret->key.objectid)) 172 ret = cache; 173 n = n->rb_left; 174 } else if (bytenr > start) { 175 if (contains && bytenr <= end) { 176 ret = cache; 177 break; 178 } 179 n = n->rb_right; 180 } else { 181 ret = cache; 182 break; 183 } 184 } 185 if (ret) 186 btrfs_get_block_group(ret); 187 spin_unlock(&info->block_group_cache_lock); 188 189 return ret; 190 } 191 192 static int add_excluded_extent(struct btrfs_root *root, 193 u64 start, u64 num_bytes) 194 { 195 u64 end = start + num_bytes - 1; 196 set_extent_bits(&root->fs_info->freed_extents[0], 197 start, end, EXTENT_UPTODATE, GFP_NOFS); 198 set_extent_bits(&root->fs_info->freed_extents[1], 199 start, end, EXTENT_UPTODATE, GFP_NOFS); 200 return 0; 201 } 202 203 static void free_excluded_extents(struct btrfs_root *root, 204 struct btrfs_block_group_cache *cache) 205 { 206 u64 start, end; 207 208 start = cache->key.objectid; 209 end = start + cache->key.offset - 1; 210 211 clear_extent_bits(&root->fs_info->freed_extents[0], 212 start, end, EXTENT_UPTODATE, GFP_NOFS); 213 clear_extent_bits(&root->fs_info->freed_extents[1], 214 start, end, EXTENT_UPTODATE, GFP_NOFS); 215 } 216 217 static int exclude_super_stripes(struct btrfs_root *root, 218 struct btrfs_block_group_cache *cache) 219 { 220 u64 bytenr; 221 u64 *logical; 222 int stripe_len; 223 int i, nr, ret; 224 225 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { 226 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; 227 cache->bytes_super += stripe_len; 228 ret = add_excluded_extent(root, cache->key.objectid, 229 stripe_len); 230 BUG_ON(ret); 231 } 232 233 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 234 bytenr = btrfs_sb_offset(i); 235 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 236 cache->key.objectid, bytenr, 237 0, &logical, &nr, &stripe_len); 238 BUG_ON(ret); 239 240 while (nr--) { 241 cache->bytes_super += stripe_len; 242 ret = add_excluded_extent(root, logical[nr], 243 stripe_len); 244 BUG_ON(ret); 245 } 246 247 kfree(logical); 248 } 249 return 0; 250 } 251 252 static struct btrfs_caching_control * 253 get_caching_control(struct btrfs_block_group_cache *cache) 254 { 255 struct btrfs_caching_control *ctl; 256 257 spin_lock(&cache->lock); 258 if (cache->cached != BTRFS_CACHE_STARTED) { 259 spin_unlock(&cache->lock); 260 return NULL; 261 } 262 263 /* We're loading it the fast way, so we don't have a caching_ctl. */ 264 if (!cache->caching_ctl) { 265 spin_unlock(&cache->lock); 266 return NULL; 267 } 268 269 ctl = cache->caching_ctl; 270 atomic_inc(&ctl->count); 271 spin_unlock(&cache->lock); 272 return ctl; 273 } 274 275 static void put_caching_control(struct btrfs_caching_control *ctl) 276 { 277 if (atomic_dec_and_test(&ctl->count)) 278 kfree(ctl); 279 } 280 281 /* 282 * this is only called by cache_block_group, since we could have freed extents 283 * we need to check the pinned_extents for any extents that can't be used yet 284 * since their free space will be released as soon as the transaction commits. 285 */ 286 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 287 struct btrfs_fs_info *info, u64 start, u64 end) 288 { 289 u64 extent_start, extent_end, size, total_added = 0; 290 int ret; 291 292 while (start < end) { 293 ret = find_first_extent_bit(info->pinned_extents, start, 294 &extent_start, &extent_end, 295 EXTENT_DIRTY | EXTENT_UPTODATE); 296 if (ret) 297 break; 298 299 if (extent_start <= start) { 300 start = extent_end + 1; 301 } else if (extent_start > start && extent_start < end) { 302 size = extent_start - start; 303 total_added += size; 304 ret = btrfs_add_free_space(block_group, start, 305 size); 306 BUG_ON(ret); 307 start = extent_end + 1; 308 } else { 309 break; 310 } 311 } 312 313 if (start < end) { 314 size = end - start; 315 total_added += size; 316 ret = btrfs_add_free_space(block_group, start, size); 317 BUG_ON(ret); 318 } 319 320 return total_added; 321 } 322 323 static int caching_kthread(void *data) 324 { 325 struct btrfs_block_group_cache *block_group = data; 326 struct btrfs_fs_info *fs_info = block_group->fs_info; 327 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 328 struct btrfs_root *extent_root = fs_info->extent_root; 329 struct btrfs_path *path; 330 struct extent_buffer *leaf; 331 struct btrfs_key key; 332 u64 total_found = 0; 333 u64 last = 0; 334 u32 nritems; 335 int ret = 0; 336 337 path = btrfs_alloc_path(); 338 if (!path) 339 return -ENOMEM; 340 341 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 342 343 /* 344 * We don't want to deadlock with somebody trying to allocate a new 345 * extent for the extent root while also trying to search the extent 346 * root to add free space. So we skip locking and search the commit 347 * root, since its read-only 348 */ 349 path->skip_locking = 1; 350 path->search_commit_root = 1; 351 path->reada = 1; 352 353 key.objectid = last; 354 key.offset = 0; 355 key.type = BTRFS_EXTENT_ITEM_KEY; 356 again: 357 mutex_lock(&caching_ctl->mutex); 358 /* need to make sure the commit_root doesn't disappear */ 359 down_read(&fs_info->extent_commit_sem); 360 361 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 362 if (ret < 0) 363 goto err; 364 365 leaf = path->nodes[0]; 366 nritems = btrfs_header_nritems(leaf); 367 368 while (1) { 369 if (btrfs_fs_closing(fs_info) > 1) { 370 last = (u64)-1; 371 break; 372 } 373 374 if (path->slots[0] < nritems) { 375 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 376 } else { 377 ret = find_next_key(path, 0, &key); 378 if (ret) 379 break; 380 381 if (need_resched() || 382 btrfs_next_leaf(extent_root, path)) { 383 caching_ctl->progress = last; 384 btrfs_release_path(path); 385 up_read(&fs_info->extent_commit_sem); 386 mutex_unlock(&caching_ctl->mutex); 387 cond_resched(); 388 goto again; 389 } 390 leaf = path->nodes[0]; 391 nritems = btrfs_header_nritems(leaf); 392 continue; 393 } 394 395 if (key.objectid < block_group->key.objectid) { 396 path->slots[0]++; 397 continue; 398 } 399 400 if (key.objectid >= block_group->key.objectid + 401 block_group->key.offset) 402 break; 403 404 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 405 total_found += add_new_free_space(block_group, 406 fs_info, last, 407 key.objectid); 408 last = key.objectid + key.offset; 409 410 if (total_found > (1024 * 1024 * 2)) { 411 total_found = 0; 412 wake_up(&caching_ctl->wait); 413 } 414 } 415 path->slots[0]++; 416 } 417 ret = 0; 418 419 total_found += add_new_free_space(block_group, fs_info, last, 420 block_group->key.objectid + 421 block_group->key.offset); 422 caching_ctl->progress = (u64)-1; 423 424 spin_lock(&block_group->lock); 425 block_group->caching_ctl = NULL; 426 block_group->cached = BTRFS_CACHE_FINISHED; 427 spin_unlock(&block_group->lock); 428 429 err: 430 btrfs_free_path(path); 431 up_read(&fs_info->extent_commit_sem); 432 433 free_excluded_extents(extent_root, block_group); 434 435 mutex_unlock(&caching_ctl->mutex); 436 wake_up(&caching_ctl->wait); 437 438 put_caching_control(caching_ctl); 439 atomic_dec(&block_group->space_info->caching_threads); 440 btrfs_put_block_group(block_group); 441 442 return 0; 443 } 444 445 static int cache_block_group(struct btrfs_block_group_cache *cache, 446 struct btrfs_trans_handle *trans, 447 struct btrfs_root *root, 448 int load_cache_only) 449 { 450 struct btrfs_fs_info *fs_info = cache->fs_info; 451 struct btrfs_caching_control *caching_ctl; 452 struct task_struct *tsk; 453 int ret = 0; 454 455 smp_mb(); 456 if (cache->cached != BTRFS_CACHE_NO) 457 return 0; 458 459 /* 460 * We can't do the read from on-disk cache during a commit since we need 461 * to have the normal tree locking. Also if we are currently trying to 462 * allocate blocks for the tree root we can't do the fast caching since 463 * we likely hold important locks. 464 */ 465 if (trans && (!trans->transaction->in_commit) && 466 (root && root != root->fs_info->tree_root)) { 467 spin_lock(&cache->lock); 468 if (cache->cached != BTRFS_CACHE_NO) { 469 spin_unlock(&cache->lock); 470 return 0; 471 } 472 cache->cached = BTRFS_CACHE_STARTED; 473 spin_unlock(&cache->lock); 474 475 ret = load_free_space_cache(fs_info, cache); 476 477 spin_lock(&cache->lock); 478 if (ret == 1) { 479 cache->cached = BTRFS_CACHE_FINISHED; 480 cache->last_byte_to_unpin = (u64)-1; 481 } else { 482 cache->cached = BTRFS_CACHE_NO; 483 } 484 spin_unlock(&cache->lock); 485 if (ret == 1) { 486 free_excluded_extents(fs_info->extent_root, cache); 487 return 0; 488 } 489 } 490 491 if (load_cache_only) 492 return 0; 493 494 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); 495 BUG_ON(!caching_ctl); 496 497 INIT_LIST_HEAD(&caching_ctl->list); 498 mutex_init(&caching_ctl->mutex); 499 init_waitqueue_head(&caching_ctl->wait); 500 caching_ctl->block_group = cache; 501 caching_ctl->progress = cache->key.objectid; 502 /* one for caching kthread, one for caching block group list */ 503 atomic_set(&caching_ctl->count, 2); 504 505 spin_lock(&cache->lock); 506 if (cache->cached != BTRFS_CACHE_NO) { 507 spin_unlock(&cache->lock); 508 kfree(caching_ctl); 509 return 0; 510 } 511 cache->caching_ctl = caching_ctl; 512 cache->cached = BTRFS_CACHE_STARTED; 513 spin_unlock(&cache->lock); 514 515 down_write(&fs_info->extent_commit_sem); 516 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 517 up_write(&fs_info->extent_commit_sem); 518 519 atomic_inc(&cache->space_info->caching_threads); 520 btrfs_get_block_group(cache); 521 522 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 523 cache->key.objectid); 524 if (IS_ERR(tsk)) { 525 ret = PTR_ERR(tsk); 526 printk(KERN_ERR "error running thread %d\n", ret); 527 BUG(); 528 } 529 530 return ret; 531 } 532 533 /* 534 * return the block group that starts at or after bytenr 535 */ 536 static struct btrfs_block_group_cache * 537 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 538 { 539 struct btrfs_block_group_cache *cache; 540 541 cache = block_group_cache_tree_search(info, bytenr, 0); 542 543 return cache; 544 } 545 546 /* 547 * return the block group that contains the given bytenr 548 */ 549 struct btrfs_block_group_cache *btrfs_lookup_block_group( 550 struct btrfs_fs_info *info, 551 u64 bytenr) 552 { 553 struct btrfs_block_group_cache *cache; 554 555 cache = block_group_cache_tree_search(info, bytenr, 1); 556 557 return cache; 558 } 559 560 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 561 u64 flags) 562 { 563 struct list_head *head = &info->space_info; 564 struct btrfs_space_info *found; 565 566 flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | 567 BTRFS_BLOCK_GROUP_METADATA; 568 569 rcu_read_lock(); 570 list_for_each_entry_rcu(found, head, list) { 571 if (found->flags & flags) { 572 rcu_read_unlock(); 573 return found; 574 } 575 } 576 rcu_read_unlock(); 577 return NULL; 578 } 579 580 /* 581 * after adding space to the filesystem, we need to clear the full flags 582 * on all the space infos. 583 */ 584 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 585 { 586 struct list_head *head = &info->space_info; 587 struct btrfs_space_info *found; 588 589 rcu_read_lock(); 590 list_for_each_entry_rcu(found, head, list) 591 found->full = 0; 592 rcu_read_unlock(); 593 } 594 595 static u64 div_factor(u64 num, int factor) 596 { 597 if (factor == 10) 598 return num; 599 num *= factor; 600 do_div(num, 10); 601 return num; 602 } 603 604 static u64 div_factor_fine(u64 num, int factor) 605 { 606 if (factor == 100) 607 return num; 608 num *= factor; 609 do_div(num, 100); 610 return num; 611 } 612 613 u64 btrfs_find_block_group(struct btrfs_root *root, 614 u64 search_start, u64 search_hint, int owner) 615 { 616 struct btrfs_block_group_cache *cache; 617 u64 used; 618 u64 last = max(search_hint, search_start); 619 u64 group_start = 0; 620 int full_search = 0; 621 int factor = 9; 622 int wrapped = 0; 623 again: 624 while (1) { 625 cache = btrfs_lookup_first_block_group(root->fs_info, last); 626 if (!cache) 627 break; 628 629 spin_lock(&cache->lock); 630 last = cache->key.objectid + cache->key.offset; 631 used = btrfs_block_group_used(&cache->item); 632 633 if ((full_search || !cache->ro) && 634 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 635 if (used + cache->pinned + cache->reserved < 636 div_factor(cache->key.offset, factor)) { 637 group_start = cache->key.objectid; 638 spin_unlock(&cache->lock); 639 btrfs_put_block_group(cache); 640 goto found; 641 } 642 } 643 spin_unlock(&cache->lock); 644 btrfs_put_block_group(cache); 645 cond_resched(); 646 } 647 if (!wrapped) { 648 last = search_start; 649 wrapped = 1; 650 goto again; 651 } 652 if (!full_search && factor < 10) { 653 last = search_start; 654 full_search = 1; 655 factor = 10; 656 goto again; 657 } 658 found: 659 return group_start; 660 } 661 662 /* simple helper to search for an existing extent at a given offset */ 663 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 664 { 665 int ret; 666 struct btrfs_key key; 667 struct btrfs_path *path; 668 669 path = btrfs_alloc_path(); 670 BUG_ON(!path); 671 key.objectid = start; 672 key.offset = len; 673 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 674 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 675 0, 0); 676 btrfs_free_path(path); 677 return ret; 678 } 679 680 /* 681 * helper function to lookup reference count and flags of extent. 682 * 683 * the head node for delayed ref is used to store the sum of all the 684 * reference count modifications queued up in the rbtree. the head 685 * node may also store the extent flags to set. This way you can check 686 * to see what the reference count and extent flags would be if all of 687 * the delayed refs are not processed. 688 */ 689 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 690 struct btrfs_root *root, u64 bytenr, 691 u64 num_bytes, u64 *refs, u64 *flags) 692 { 693 struct btrfs_delayed_ref_head *head; 694 struct btrfs_delayed_ref_root *delayed_refs; 695 struct btrfs_path *path; 696 struct btrfs_extent_item *ei; 697 struct extent_buffer *leaf; 698 struct btrfs_key key; 699 u32 item_size; 700 u64 num_refs; 701 u64 extent_flags; 702 int ret; 703 704 path = btrfs_alloc_path(); 705 if (!path) 706 return -ENOMEM; 707 708 key.objectid = bytenr; 709 key.type = BTRFS_EXTENT_ITEM_KEY; 710 key.offset = num_bytes; 711 if (!trans) { 712 path->skip_locking = 1; 713 path->search_commit_root = 1; 714 } 715 again: 716 ret = btrfs_search_slot(trans, root->fs_info->extent_root, 717 &key, path, 0, 0); 718 if (ret < 0) 719 goto out_free; 720 721 if (ret == 0) { 722 leaf = path->nodes[0]; 723 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 724 if (item_size >= sizeof(*ei)) { 725 ei = btrfs_item_ptr(leaf, path->slots[0], 726 struct btrfs_extent_item); 727 num_refs = btrfs_extent_refs(leaf, ei); 728 extent_flags = btrfs_extent_flags(leaf, ei); 729 } else { 730 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 731 struct btrfs_extent_item_v0 *ei0; 732 BUG_ON(item_size != sizeof(*ei0)); 733 ei0 = btrfs_item_ptr(leaf, path->slots[0], 734 struct btrfs_extent_item_v0); 735 num_refs = btrfs_extent_refs_v0(leaf, ei0); 736 /* FIXME: this isn't correct for data */ 737 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 738 #else 739 BUG(); 740 #endif 741 } 742 BUG_ON(num_refs == 0); 743 } else { 744 num_refs = 0; 745 extent_flags = 0; 746 ret = 0; 747 } 748 749 if (!trans) 750 goto out; 751 752 delayed_refs = &trans->transaction->delayed_refs; 753 spin_lock(&delayed_refs->lock); 754 head = btrfs_find_delayed_ref_head(trans, bytenr); 755 if (head) { 756 if (!mutex_trylock(&head->mutex)) { 757 atomic_inc(&head->node.refs); 758 spin_unlock(&delayed_refs->lock); 759 760 btrfs_release_path(path); 761 762 /* 763 * Mutex was contended, block until it's released and try 764 * again 765 */ 766 mutex_lock(&head->mutex); 767 mutex_unlock(&head->mutex); 768 btrfs_put_delayed_ref(&head->node); 769 goto again; 770 } 771 if (head->extent_op && head->extent_op->update_flags) 772 extent_flags |= head->extent_op->flags_to_set; 773 else 774 BUG_ON(num_refs == 0); 775 776 num_refs += head->node.ref_mod; 777 mutex_unlock(&head->mutex); 778 } 779 spin_unlock(&delayed_refs->lock); 780 out: 781 WARN_ON(num_refs == 0); 782 if (refs) 783 *refs = num_refs; 784 if (flags) 785 *flags = extent_flags; 786 out_free: 787 btrfs_free_path(path); 788 return ret; 789 } 790 791 /* 792 * Back reference rules. Back refs have three main goals: 793 * 794 * 1) differentiate between all holders of references to an extent so that 795 * when a reference is dropped we can make sure it was a valid reference 796 * before freeing the extent. 797 * 798 * 2) Provide enough information to quickly find the holders of an extent 799 * if we notice a given block is corrupted or bad. 800 * 801 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 802 * maintenance. This is actually the same as #2, but with a slightly 803 * different use case. 804 * 805 * There are two kinds of back refs. The implicit back refs is optimized 806 * for pointers in non-shared tree blocks. For a given pointer in a block, 807 * back refs of this kind provide information about the block's owner tree 808 * and the pointer's key. These information allow us to find the block by 809 * b-tree searching. The full back refs is for pointers in tree blocks not 810 * referenced by their owner trees. The location of tree block is recorded 811 * in the back refs. Actually the full back refs is generic, and can be 812 * used in all cases the implicit back refs is used. The major shortcoming 813 * of the full back refs is its overhead. Every time a tree block gets 814 * COWed, we have to update back refs entry for all pointers in it. 815 * 816 * For a newly allocated tree block, we use implicit back refs for 817 * pointers in it. This means most tree related operations only involve 818 * implicit back refs. For a tree block created in old transaction, the 819 * only way to drop a reference to it is COW it. So we can detect the 820 * event that tree block loses its owner tree's reference and do the 821 * back refs conversion. 822 * 823 * When a tree block is COW'd through a tree, there are four cases: 824 * 825 * The reference count of the block is one and the tree is the block's 826 * owner tree. Nothing to do in this case. 827 * 828 * The reference count of the block is one and the tree is not the 829 * block's owner tree. In this case, full back refs is used for pointers 830 * in the block. Remove these full back refs, add implicit back refs for 831 * every pointers in the new block. 832 * 833 * The reference count of the block is greater than one and the tree is 834 * the block's owner tree. In this case, implicit back refs is used for 835 * pointers in the block. Add full back refs for every pointers in the 836 * block, increase lower level extents' reference counts. The original 837 * implicit back refs are entailed to the new block. 838 * 839 * The reference count of the block is greater than one and the tree is 840 * not the block's owner tree. Add implicit back refs for every pointer in 841 * the new block, increase lower level extents' reference count. 842 * 843 * Back Reference Key composing: 844 * 845 * The key objectid corresponds to the first byte in the extent, 846 * The key type is used to differentiate between types of back refs. 847 * There are different meanings of the key offset for different types 848 * of back refs. 849 * 850 * File extents can be referenced by: 851 * 852 * - multiple snapshots, subvolumes, or different generations in one subvol 853 * - different files inside a single subvolume 854 * - different offsets inside a file (bookend extents in file.c) 855 * 856 * The extent ref structure for the implicit back refs has fields for: 857 * 858 * - Objectid of the subvolume root 859 * - objectid of the file holding the reference 860 * - original offset in the file 861 * - how many bookend extents 862 * 863 * The key offset for the implicit back refs is hash of the first 864 * three fields. 865 * 866 * The extent ref structure for the full back refs has field for: 867 * 868 * - number of pointers in the tree leaf 869 * 870 * The key offset for the implicit back refs is the first byte of 871 * the tree leaf 872 * 873 * When a file extent is allocated, The implicit back refs is used. 874 * the fields are filled in: 875 * 876 * (root_key.objectid, inode objectid, offset in file, 1) 877 * 878 * When a file extent is removed file truncation, we find the 879 * corresponding implicit back refs and check the following fields: 880 * 881 * (btrfs_header_owner(leaf), inode objectid, offset in file) 882 * 883 * Btree extents can be referenced by: 884 * 885 * - Different subvolumes 886 * 887 * Both the implicit back refs and the full back refs for tree blocks 888 * only consist of key. The key offset for the implicit back refs is 889 * objectid of block's owner tree. The key offset for the full back refs 890 * is the first byte of parent block. 891 * 892 * When implicit back refs is used, information about the lowest key and 893 * level of the tree block are required. These information are stored in 894 * tree block info structure. 895 */ 896 897 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 898 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 899 struct btrfs_root *root, 900 struct btrfs_path *path, 901 u64 owner, u32 extra_size) 902 { 903 struct btrfs_extent_item *item; 904 struct btrfs_extent_item_v0 *ei0; 905 struct btrfs_extent_ref_v0 *ref0; 906 struct btrfs_tree_block_info *bi; 907 struct extent_buffer *leaf; 908 struct btrfs_key key; 909 struct btrfs_key found_key; 910 u32 new_size = sizeof(*item); 911 u64 refs; 912 int ret; 913 914 leaf = path->nodes[0]; 915 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 916 917 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 918 ei0 = btrfs_item_ptr(leaf, path->slots[0], 919 struct btrfs_extent_item_v0); 920 refs = btrfs_extent_refs_v0(leaf, ei0); 921 922 if (owner == (u64)-1) { 923 while (1) { 924 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 925 ret = btrfs_next_leaf(root, path); 926 if (ret < 0) 927 return ret; 928 BUG_ON(ret > 0); 929 leaf = path->nodes[0]; 930 } 931 btrfs_item_key_to_cpu(leaf, &found_key, 932 path->slots[0]); 933 BUG_ON(key.objectid != found_key.objectid); 934 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 935 path->slots[0]++; 936 continue; 937 } 938 ref0 = btrfs_item_ptr(leaf, path->slots[0], 939 struct btrfs_extent_ref_v0); 940 owner = btrfs_ref_objectid_v0(leaf, ref0); 941 break; 942 } 943 } 944 btrfs_release_path(path); 945 946 if (owner < BTRFS_FIRST_FREE_OBJECTID) 947 new_size += sizeof(*bi); 948 949 new_size -= sizeof(*ei0); 950 ret = btrfs_search_slot(trans, root, &key, path, 951 new_size + extra_size, 1); 952 if (ret < 0) 953 return ret; 954 BUG_ON(ret); 955 956 ret = btrfs_extend_item(trans, root, path, new_size); 957 958 leaf = path->nodes[0]; 959 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 960 btrfs_set_extent_refs(leaf, item, refs); 961 /* FIXME: get real generation */ 962 btrfs_set_extent_generation(leaf, item, 0); 963 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 964 btrfs_set_extent_flags(leaf, item, 965 BTRFS_EXTENT_FLAG_TREE_BLOCK | 966 BTRFS_BLOCK_FLAG_FULL_BACKREF); 967 bi = (struct btrfs_tree_block_info *)(item + 1); 968 /* FIXME: get first key of the block */ 969 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 970 btrfs_set_tree_block_level(leaf, bi, (int)owner); 971 } else { 972 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 973 } 974 btrfs_mark_buffer_dirty(leaf); 975 return 0; 976 } 977 #endif 978 979 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 980 { 981 u32 high_crc = ~(u32)0; 982 u32 low_crc = ~(u32)0; 983 __le64 lenum; 984 985 lenum = cpu_to_le64(root_objectid); 986 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 987 lenum = cpu_to_le64(owner); 988 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 989 lenum = cpu_to_le64(offset); 990 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 991 992 return ((u64)high_crc << 31) ^ (u64)low_crc; 993 } 994 995 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 996 struct btrfs_extent_data_ref *ref) 997 { 998 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 999 btrfs_extent_data_ref_objectid(leaf, ref), 1000 btrfs_extent_data_ref_offset(leaf, ref)); 1001 } 1002 1003 static int match_extent_data_ref(struct extent_buffer *leaf, 1004 struct btrfs_extent_data_ref *ref, 1005 u64 root_objectid, u64 owner, u64 offset) 1006 { 1007 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 1008 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 1009 btrfs_extent_data_ref_offset(leaf, ref) != offset) 1010 return 0; 1011 return 1; 1012 } 1013 1014 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 1015 struct btrfs_root *root, 1016 struct btrfs_path *path, 1017 u64 bytenr, u64 parent, 1018 u64 root_objectid, 1019 u64 owner, u64 offset) 1020 { 1021 struct btrfs_key key; 1022 struct btrfs_extent_data_ref *ref; 1023 struct extent_buffer *leaf; 1024 u32 nritems; 1025 int ret; 1026 int recow; 1027 int err = -ENOENT; 1028 1029 key.objectid = bytenr; 1030 if (parent) { 1031 key.type = BTRFS_SHARED_DATA_REF_KEY; 1032 key.offset = parent; 1033 } else { 1034 key.type = BTRFS_EXTENT_DATA_REF_KEY; 1035 key.offset = hash_extent_data_ref(root_objectid, 1036 owner, offset); 1037 } 1038 again: 1039 recow = 0; 1040 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1041 if (ret < 0) { 1042 err = ret; 1043 goto fail; 1044 } 1045 1046 if (parent) { 1047 if (!ret) 1048 return 0; 1049 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1050 key.type = BTRFS_EXTENT_REF_V0_KEY; 1051 btrfs_release_path(path); 1052 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1053 if (ret < 0) { 1054 err = ret; 1055 goto fail; 1056 } 1057 if (!ret) 1058 return 0; 1059 #endif 1060 goto fail; 1061 } 1062 1063 leaf = path->nodes[0]; 1064 nritems = btrfs_header_nritems(leaf); 1065 while (1) { 1066 if (path->slots[0] >= nritems) { 1067 ret = btrfs_next_leaf(root, path); 1068 if (ret < 0) 1069 err = ret; 1070 if (ret) 1071 goto fail; 1072 1073 leaf = path->nodes[0]; 1074 nritems = btrfs_header_nritems(leaf); 1075 recow = 1; 1076 } 1077 1078 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1079 if (key.objectid != bytenr || 1080 key.type != BTRFS_EXTENT_DATA_REF_KEY) 1081 goto fail; 1082 1083 ref = btrfs_item_ptr(leaf, path->slots[0], 1084 struct btrfs_extent_data_ref); 1085 1086 if (match_extent_data_ref(leaf, ref, root_objectid, 1087 owner, offset)) { 1088 if (recow) { 1089 btrfs_release_path(path); 1090 goto again; 1091 } 1092 err = 0; 1093 break; 1094 } 1095 path->slots[0]++; 1096 } 1097 fail: 1098 return err; 1099 } 1100 1101 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 1102 struct btrfs_root *root, 1103 struct btrfs_path *path, 1104 u64 bytenr, u64 parent, 1105 u64 root_objectid, u64 owner, 1106 u64 offset, int refs_to_add) 1107 { 1108 struct btrfs_key key; 1109 struct extent_buffer *leaf; 1110 u32 size; 1111 u32 num_refs; 1112 int ret; 1113 1114 key.objectid = bytenr; 1115 if (parent) { 1116 key.type = BTRFS_SHARED_DATA_REF_KEY; 1117 key.offset = parent; 1118 size = sizeof(struct btrfs_shared_data_ref); 1119 } else { 1120 key.type = BTRFS_EXTENT_DATA_REF_KEY; 1121 key.offset = hash_extent_data_ref(root_objectid, 1122 owner, offset); 1123 size = sizeof(struct btrfs_extent_data_ref); 1124 } 1125 1126 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 1127 if (ret && ret != -EEXIST) 1128 goto fail; 1129 1130 leaf = path->nodes[0]; 1131 if (parent) { 1132 struct btrfs_shared_data_ref *ref; 1133 ref = btrfs_item_ptr(leaf, path->slots[0], 1134 struct btrfs_shared_data_ref); 1135 if (ret == 0) { 1136 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 1137 } else { 1138 num_refs = btrfs_shared_data_ref_count(leaf, ref); 1139 num_refs += refs_to_add; 1140 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 1141 } 1142 } else { 1143 struct btrfs_extent_data_ref *ref; 1144 while (ret == -EEXIST) { 1145 ref = btrfs_item_ptr(leaf, path->slots[0], 1146 struct btrfs_extent_data_ref); 1147 if (match_extent_data_ref(leaf, ref, root_objectid, 1148 owner, offset)) 1149 break; 1150 btrfs_release_path(path); 1151 key.offset++; 1152 ret = btrfs_insert_empty_item(trans, root, path, &key, 1153 size); 1154 if (ret && ret != -EEXIST) 1155 goto fail; 1156 1157 leaf = path->nodes[0]; 1158 } 1159 ref = btrfs_item_ptr(leaf, path->slots[0], 1160 struct btrfs_extent_data_ref); 1161 if (ret == 0) { 1162 btrfs_set_extent_data_ref_root(leaf, ref, 1163 root_objectid); 1164 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 1165 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 1166 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 1167 } else { 1168 num_refs = btrfs_extent_data_ref_count(leaf, ref); 1169 num_refs += refs_to_add; 1170 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 1171 } 1172 } 1173 btrfs_mark_buffer_dirty(leaf); 1174 ret = 0; 1175 fail: 1176 btrfs_release_path(path); 1177 return ret; 1178 } 1179 1180 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1181 struct btrfs_root *root, 1182 struct btrfs_path *path, 1183 int refs_to_drop) 1184 { 1185 struct btrfs_key key; 1186 struct btrfs_extent_data_ref *ref1 = NULL; 1187 struct btrfs_shared_data_ref *ref2 = NULL; 1188 struct extent_buffer *leaf; 1189 u32 num_refs = 0; 1190 int ret = 0; 1191 1192 leaf = path->nodes[0]; 1193 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1194 1195 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1196 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1197 struct btrfs_extent_data_ref); 1198 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1199 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1200 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1201 struct btrfs_shared_data_ref); 1202 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1203 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1204 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1205 struct btrfs_extent_ref_v0 *ref0; 1206 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1207 struct btrfs_extent_ref_v0); 1208 num_refs = btrfs_ref_count_v0(leaf, ref0); 1209 #endif 1210 } else { 1211 BUG(); 1212 } 1213 1214 BUG_ON(num_refs < refs_to_drop); 1215 num_refs -= refs_to_drop; 1216 1217 if (num_refs == 0) { 1218 ret = btrfs_del_item(trans, root, path); 1219 } else { 1220 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1221 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1222 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1223 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1224 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1225 else { 1226 struct btrfs_extent_ref_v0 *ref0; 1227 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1228 struct btrfs_extent_ref_v0); 1229 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 1230 } 1231 #endif 1232 btrfs_mark_buffer_dirty(leaf); 1233 } 1234 return ret; 1235 } 1236 1237 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 1238 struct btrfs_path *path, 1239 struct btrfs_extent_inline_ref *iref) 1240 { 1241 struct btrfs_key key; 1242 struct extent_buffer *leaf; 1243 struct btrfs_extent_data_ref *ref1; 1244 struct btrfs_shared_data_ref *ref2; 1245 u32 num_refs = 0; 1246 1247 leaf = path->nodes[0]; 1248 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1249 if (iref) { 1250 if (btrfs_extent_inline_ref_type(leaf, iref) == 1251 BTRFS_EXTENT_DATA_REF_KEY) { 1252 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 1253 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1254 } else { 1255 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 1256 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1257 } 1258 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1259 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1260 struct btrfs_extent_data_ref); 1261 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1262 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1263 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1264 struct btrfs_shared_data_ref); 1265 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1266 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1267 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1268 struct btrfs_extent_ref_v0 *ref0; 1269 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1270 struct btrfs_extent_ref_v0); 1271 num_refs = btrfs_ref_count_v0(leaf, ref0); 1272 #endif 1273 } else { 1274 WARN_ON(1); 1275 } 1276 return num_refs; 1277 } 1278 1279 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1280 struct btrfs_root *root, 1281 struct btrfs_path *path, 1282 u64 bytenr, u64 parent, 1283 u64 root_objectid) 1284 { 1285 struct btrfs_key key; 1286 int ret; 1287 1288 key.objectid = bytenr; 1289 if (parent) { 1290 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1291 key.offset = parent; 1292 } else { 1293 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1294 key.offset = root_objectid; 1295 } 1296 1297 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1298 if (ret > 0) 1299 ret = -ENOENT; 1300 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1301 if (ret == -ENOENT && parent) { 1302 btrfs_release_path(path); 1303 key.type = BTRFS_EXTENT_REF_V0_KEY; 1304 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1305 if (ret > 0) 1306 ret = -ENOENT; 1307 } 1308 #endif 1309 return ret; 1310 } 1311 1312 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1313 struct btrfs_root *root, 1314 struct btrfs_path *path, 1315 u64 bytenr, u64 parent, 1316 u64 root_objectid) 1317 { 1318 struct btrfs_key key; 1319 int ret; 1320 1321 key.objectid = bytenr; 1322 if (parent) { 1323 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1324 key.offset = parent; 1325 } else { 1326 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1327 key.offset = root_objectid; 1328 } 1329 1330 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1331 btrfs_release_path(path); 1332 return ret; 1333 } 1334 1335 static inline int extent_ref_type(u64 parent, u64 owner) 1336 { 1337 int type; 1338 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1339 if (parent > 0) 1340 type = BTRFS_SHARED_BLOCK_REF_KEY; 1341 else 1342 type = BTRFS_TREE_BLOCK_REF_KEY; 1343 } else { 1344 if (parent > 0) 1345 type = BTRFS_SHARED_DATA_REF_KEY; 1346 else 1347 type = BTRFS_EXTENT_DATA_REF_KEY; 1348 } 1349 return type; 1350 } 1351 1352 static int find_next_key(struct btrfs_path *path, int level, 1353 struct btrfs_key *key) 1354 1355 { 1356 for (; level < BTRFS_MAX_LEVEL; level++) { 1357 if (!path->nodes[level]) 1358 break; 1359 if (path->slots[level] + 1 >= 1360 btrfs_header_nritems(path->nodes[level])) 1361 continue; 1362 if (level == 0) 1363 btrfs_item_key_to_cpu(path->nodes[level], key, 1364 path->slots[level] + 1); 1365 else 1366 btrfs_node_key_to_cpu(path->nodes[level], key, 1367 path->slots[level] + 1); 1368 return 0; 1369 } 1370 return 1; 1371 } 1372 1373 /* 1374 * look for inline back ref. if back ref is found, *ref_ret is set 1375 * to the address of inline back ref, and 0 is returned. 1376 * 1377 * if back ref isn't found, *ref_ret is set to the address where it 1378 * should be inserted, and -ENOENT is returned. 1379 * 1380 * if insert is true and there are too many inline back refs, the path 1381 * points to the extent item, and -EAGAIN is returned. 1382 * 1383 * NOTE: inline back refs are ordered in the same way that back ref 1384 * items in the tree are ordered. 1385 */ 1386 static noinline_for_stack 1387 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1388 struct btrfs_root *root, 1389 struct btrfs_path *path, 1390 struct btrfs_extent_inline_ref **ref_ret, 1391 u64 bytenr, u64 num_bytes, 1392 u64 parent, u64 root_objectid, 1393 u64 owner, u64 offset, int insert) 1394 { 1395 struct btrfs_key key; 1396 struct extent_buffer *leaf; 1397 struct btrfs_extent_item *ei; 1398 struct btrfs_extent_inline_ref *iref; 1399 u64 flags; 1400 u64 item_size; 1401 unsigned long ptr; 1402 unsigned long end; 1403 int extra_size; 1404 int type; 1405 int want; 1406 int ret; 1407 int err = 0; 1408 1409 key.objectid = bytenr; 1410 key.type = BTRFS_EXTENT_ITEM_KEY; 1411 key.offset = num_bytes; 1412 1413 want = extent_ref_type(parent, owner); 1414 if (insert) { 1415 extra_size = btrfs_extent_inline_ref_size(want); 1416 path->keep_locks = 1; 1417 } else 1418 extra_size = -1; 1419 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1420 if (ret < 0) { 1421 err = ret; 1422 goto out; 1423 } 1424 BUG_ON(ret); 1425 1426 leaf = path->nodes[0]; 1427 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1428 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1429 if (item_size < sizeof(*ei)) { 1430 if (!insert) { 1431 err = -ENOENT; 1432 goto out; 1433 } 1434 ret = convert_extent_item_v0(trans, root, path, owner, 1435 extra_size); 1436 if (ret < 0) { 1437 err = ret; 1438 goto out; 1439 } 1440 leaf = path->nodes[0]; 1441 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1442 } 1443 #endif 1444 BUG_ON(item_size < sizeof(*ei)); 1445 1446 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1447 flags = btrfs_extent_flags(leaf, ei); 1448 1449 ptr = (unsigned long)(ei + 1); 1450 end = (unsigned long)ei + item_size; 1451 1452 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1453 ptr += sizeof(struct btrfs_tree_block_info); 1454 BUG_ON(ptr > end); 1455 } else { 1456 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1457 } 1458 1459 err = -ENOENT; 1460 while (1) { 1461 if (ptr >= end) { 1462 WARN_ON(ptr > end); 1463 break; 1464 } 1465 iref = (struct btrfs_extent_inline_ref *)ptr; 1466 type = btrfs_extent_inline_ref_type(leaf, iref); 1467 if (want < type) 1468 break; 1469 if (want > type) { 1470 ptr += btrfs_extent_inline_ref_size(type); 1471 continue; 1472 } 1473 1474 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1475 struct btrfs_extent_data_ref *dref; 1476 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1477 if (match_extent_data_ref(leaf, dref, root_objectid, 1478 owner, offset)) { 1479 err = 0; 1480 break; 1481 } 1482 if (hash_extent_data_ref_item(leaf, dref) < 1483 hash_extent_data_ref(root_objectid, owner, offset)) 1484 break; 1485 } else { 1486 u64 ref_offset; 1487 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1488 if (parent > 0) { 1489 if (parent == ref_offset) { 1490 err = 0; 1491 break; 1492 } 1493 if (ref_offset < parent) 1494 break; 1495 } else { 1496 if (root_objectid == ref_offset) { 1497 err = 0; 1498 break; 1499 } 1500 if (ref_offset < root_objectid) 1501 break; 1502 } 1503 } 1504 ptr += btrfs_extent_inline_ref_size(type); 1505 } 1506 if (err == -ENOENT && insert) { 1507 if (item_size + extra_size >= 1508 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1509 err = -EAGAIN; 1510 goto out; 1511 } 1512 /* 1513 * To add new inline back ref, we have to make sure 1514 * there is no corresponding back ref item. 1515 * For simplicity, we just do not add new inline back 1516 * ref if there is any kind of item for this block 1517 */ 1518 if (find_next_key(path, 0, &key) == 0 && 1519 key.objectid == bytenr && 1520 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1521 err = -EAGAIN; 1522 goto out; 1523 } 1524 } 1525 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1526 out: 1527 if (insert) { 1528 path->keep_locks = 0; 1529 btrfs_unlock_up_safe(path, 1); 1530 } 1531 return err; 1532 } 1533 1534 /* 1535 * helper to add new inline back ref 1536 */ 1537 static noinline_for_stack 1538 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1539 struct btrfs_root *root, 1540 struct btrfs_path *path, 1541 struct btrfs_extent_inline_ref *iref, 1542 u64 parent, u64 root_objectid, 1543 u64 owner, u64 offset, int refs_to_add, 1544 struct btrfs_delayed_extent_op *extent_op) 1545 { 1546 struct extent_buffer *leaf; 1547 struct btrfs_extent_item *ei; 1548 unsigned long ptr; 1549 unsigned long end; 1550 unsigned long item_offset; 1551 u64 refs; 1552 int size; 1553 int type; 1554 int ret; 1555 1556 leaf = path->nodes[0]; 1557 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1558 item_offset = (unsigned long)iref - (unsigned long)ei; 1559 1560 type = extent_ref_type(parent, owner); 1561 size = btrfs_extent_inline_ref_size(type); 1562 1563 ret = btrfs_extend_item(trans, root, path, size); 1564 1565 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1566 refs = btrfs_extent_refs(leaf, ei); 1567 refs += refs_to_add; 1568 btrfs_set_extent_refs(leaf, ei, refs); 1569 if (extent_op) 1570 __run_delayed_extent_op(extent_op, leaf, ei); 1571 1572 ptr = (unsigned long)ei + item_offset; 1573 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1574 if (ptr < end - size) 1575 memmove_extent_buffer(leaf, ptr + size, ptr, 1576 end - size - ptr); 1577 1578 iref = (struct btrfs_extent_inline_ref *)ptr; 1579 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1580 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1581 struct btrfs_extent_data_ref *dref; 1582 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1583 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1584 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1585 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1586 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1587 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1588 struct btrfs_shared_data_ref *sref; 1589 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1590 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1591 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1592 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1593 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1594 } else { 1595 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1596 } 1597 btrfs_mark_buffer_dirty(leaf); 1598 return 0; 1599 } 1600 1601 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1602 struct btrfs_root *root, 1603 struct btrfs_path *path, 1604 struct btrfs_extent_inline_ref **ref_ret, 1605 u64 bytenr, u64 num_bytes, u64 parent, 1606 u64 root_objectid, u64 owner, u64 offset) 1607 { 1608 int ret; 1609 1610 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1611 bytenr, num_bytes, parent, 1612 root_objectid, owner, offset, 0); 1613 if (ret != -ENOENT) 1614 return ret; 1615 1616 btrfs_release_path(path); 1617 *ref_ret = NULL; 1618 1619 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1620 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1621 root_objectid); 1622 } else { 1623 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1624 root_objectid, owner, offset); 1625 } 1626 return ret; 1627 } 1628 1629 /* 1630 * helper to update/remove inline back ref 1631 */ 1632 static noinline_for_stack 1633 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1634 struct btrfs_root *root, 1635 struct btrfs_path *path, 1636 struct btrfs_extent_inline_ref *iref, 1637 int refs_to_mod, 1638 struct btrfs_delayed_extent_op *extent_op) 1639 { 1640 struct extent_buffer *leaf; 1641 struct btrfs_extent_item *ei; 1642 struct btrfs_extent_data_ref *dref = NULL; 1643 struct btrfs_shared_data_ref *sref = NULL; 1644 unsigned long ptr; 1645 unsigned long end; 1646 u32 item_size; 1647 int size; 1648 int type; 1649 int ret; 1650 u64 refs; 1651 1652 leaf = path->nodes[0]; 1653 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1654 refs = btrfs_extent_refs(leaf, ei); 1655 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1656 refs += refs_to_mod; 1657 btrfs_set_extent_refs(leaf, ei, refs); 1658 if (extent_op) 1659 __run_delayed_extent_op(extent_op, leaf, ei); 1660 1661 type = btrfs_extent_inline_ref_type(leaf, iref); 1662 1663 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1664 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1665 refs = btrfs_extent_data_ref_count(leaf, dref); 1666 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1667 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1668 refs = btrfs_shared_data_ref_count(leaf, sref); 1669 } else { 1670 refs = 1; 1671 BUG_ON(refs_to_mod != -1); 1672 } 1673 1674 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1675 refs += refs_to_mod; 1676 1677 if (refs > 0) { 1678 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1679 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1680 else 1681 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1682 } else { 1683 size = btrfs_extent_inline_ref_size(type); 1684 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1685 ptr = (unsigned long)iref; 1686 end = (unsigned long)ei + item_size; 1687 if (ptr + size < end) 1688 memmove_extent_buffer(leaf, ptr, ptr + size, 1689 end - ptr - size); 1690 item_size -= size; 1691 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1692 } 1693 btrfs_mark_buffer_dirty(leaf); 1694 return 0; 1695 } 1696 1697 static noinline_for_stack 1698 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1699 struct btrfs_root *root, 1700 struct btrfs_path *path, 1701 u64 bytenr, u64 num_bytes, u64 parent, 1702 u64 root_objectid, u64 owner, 1703 u64 offset, int refs_to_add, 1704 struct btrfs_delayed_extent_op *extent_op) 1705 { 1706 struct btrfs_extent_inline_ref *iref; 1707 int ret; 1708 1709 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1710 bytenr, num_bytes, parent, 1711 root_objectid, owner, offset, 1); 1712 if (ret == 0) { 1713 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1714 ret = update_inline_extent_backref(trans, root, path, iref, 1715 refs_to_add, extent_op); 1716 } else if (ret == -ENOENT) { 1717 ret = setup_inline_extent_backref(trans, root, path, iref, 1718 parent, root_objectid, 1719 owner, offset, refs_to_add, 1720 extent_op); 1721 } 1722 return ret; 1723 } 1724 1725 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1726 struct btrfs_root *root, 1727 struct btrfs_path *path, 1728 u64 bytenr, u64 parent, u64 root_objectid, 1729 u64 owner, u64 offset, int refs_to_add) 1730 { 1731 int ret; 1732 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1733 BUG_ON(refs_to_add != 1); 1734 ret = insert_tree_block_ref(trans, root, path, bytenr, 1735 parent, root_objectid); 1736 } else { 1737 ret = insert_extent_data_ref(trans, root, path, bytenr, 1738 parent, root_objectid, 1739 owner, offset, refs_to_add); 1740 } 1741 return ret; 1742 } 1743 1744 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1745 struct btrfs_root *root, 1746 struct btrfs_path *path, 1747 struct btrfs_extent_inline_ref *iref, 1748 int refs_to_drop, int is_data) 1749 { 1750 int ret; 1751 1752 BUG_ON(!is_data && refs_to_drop != 1); 1753 if (iref) { 1754 ret = update_inline_extent_backref(trans, root, path, iref, 1755 -refs_to_drop, NULL); 1756 } else if (is_data) { 1757 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1758 } else { 1759 ret = btrfs_del_item(trans, root, path); 1760 } 1761 return ret; 1762 } 1763 1764 static int btrfs_issue_discard(struct block_device *bdev, 1765 u64 start, u64 len) 1766 { 1767 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); 1768 } 1769 1770 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1771 u64 num_bytes, u64 *actual_bytes) 1772 { 1773 int ret; 1774 u64 discarded_bytes = 0; 1775 struct btrfs_multi_bio *multi = NULL; 1776 1777 1778 /* Tell the block device(s) that the sectors can be discarded */ 1779 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, 1780 bytenr, &num_bytes, &multi, 0); 1781 if (!ret) { 1782 struct btrfs_bio_stripe *stripe = multi->stripes; 1783 int i; 1784 1785 1786 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1787 ret = btrfs_issue_discard(stripe->dev->bdev, 1788 stripe->physical, 1789 stripe->length); 1790 if (!ret) 1791 discarded_bytes += stripe->length; 1792 else if (ret != -EOPNOTSUPP) 1793 break; 1794 } 1795 kfree(multi); 1796 } 1797 if (discarded_bytes && ret == -EOPNOTSUPP) 1798 ret = 0; 1799 1800 if (actual_bytes) 1801 *actual_bytes = discarded_bytes; 1802 1803 1804 return ret; 1805 } 1806 1807 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1808 struct btrfs_root *root, 1809 u64 bytenr, u64 num_bytes, u64 parent, 1810 u64 root_objectid, u64 owner, u64 offset) 1811 { 1812 int ret; 1813 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1814 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1815 1816 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1817 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1818 parent, root_objectid, (int)owner, 1819 BTRFS_ADD_DELAYED_REF, NULL); 1820 } else { 1821 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1822 parent, root_objectid, owner, offset, 1823 BTRFS_ADD_DELAYED_REF, NULL); 1824 } 1825 return ret; 1826 } 1827 1828 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1829 struct btrfs_root *root, 1830 u64 bytenr, u64 num_bytes, 1831 u64 parent, u64 root_objectid, 1832 u64 owner, u64 offset, int refs_to_add, 1833 struct btrfs_delayed_extent_op *extent_op) 1834 { 1835 struct btrfs_path *path; 1836 struct extent_buffer *leaf; 1837 struct btrfs_extent_item *item; 1838 u64 refs; 1839 int ret; 1840 int err = 0; 1841 1842 path = btrfs_alloc_path(); 1843 if (!path) 1844 return -ENOMEM; 1845 1846 path->reada = 1; 1847 path->leave_spinning = 1; 1848 /* this will setup the path even if it fails to insert the back ref */ 1849 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1850 path, bytenr, num_bytes, parent, 1851 root_objectid, owner, offset, 1852 refs_to_add, extent_op); 1853 if (ret == 0) 1854 goto out; 1855 1856 if (ret != -EAGAIN) { 1857 err = ret; 1858 goto out; 1859 } 1860 1861 leaf = path->nodes[0]; 1862 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1863 refs = btrfs_extent_refs(leaf, item); 1864 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1865 if (extent_op) 1866 __run_delayed_extent_op(extent_op, leaf, item); 1867 1868 btrfs_mark_buffer_dirty(leaf); 1869 btrfs_release_path(path); 1870 1871 path->reada = 1; 1872 path->leave_spinning = 1; 1873 1874 /* now insert the actual backref */ 1875 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1876 path, bytenr, parent, root_objectid, 1877 owner, offset, refs_to_add); 1878 BUG_ON(ret); 1879 out: 1880 btrfs_free_path(path); 1881 return err; 1882 } 1883 1884 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1885 struct btrfs_root *root, 1886 struct btrfs_delayed_ref_node *node, 1887 struct btrfs_delayed_extent_op *extent_op, 1888 int insert_reserved) 1889 { 1890 int ret = 0; 1891 struct btrfs_delayed_data_ref *ref; 1892 struct btrfs_key ins; 1893 u64 parent = 0; 1894 u64 ref_root = 0; 1895 u64 flags = 0; 1896 1897 ins.objectid = node->bytenr; 1898 ins.offset = node->num_bytes; 1899 ins.type = BTRFS_EXTENT_ITEM_KEY; 1900 1901 ref = btrfs_delayed_node_to_data_ref(node); 1902 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1903 parent = ref->parent; 1904 else 1905 ref_root = ref->root; 1906 1907 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1908 if (extent_op) { 1909 BUG_ON(extent_op->update_key); 1910 flags |= extent_op->flags_to_set; 1911 } 1912 ret = alloc_reserved_file_extent(trans, root, 1913 parent, ref_root, flags, 1914 ref->objectid, ref->offset, 1915 &ins, node->ref_mod); 1916 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1917 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1918 node->num_bytes, parent, 1919 ref_root, ref->objectid, 1920 ref->offset, node->ref_mod, 1921 extent_op); 1922 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1923 ret = __btrfs_free_extent(trans, root, node->bytenr, 1924 node->num_bytes, parent, 1925 ref_root, ref->objectid, 1926 ref->offset, node->ref_mod, 1927 extent_op); 1928 } else { 1929 BUG(); 1930 } 1931 return ret; 1932 } 1933 1934 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1935 struct extent_buffer *leaf, 1936 struct btrfs_extent_item *ei) 1937 { 1938 u64 flags = btrfs_extent_flags(leaf, ei); 1939 if (extent_op->update_flags) { 1940 flags |= extent_op->flags_to_set; 1941 btrfs_set_extent_flags(leaf, ei, flags); 1942 } 1943 1944 if (extent_op->update_key) { 1945 struct btrfs_tree_block_info *bi; 1946 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1947 bi = (struct btrfs_tree_block_info *)(ei + 1); 1948 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1949 } 1950 } 1951 1952 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1953 struct btrfs_root *root, 1954 struct btrfs_delayed_ref_node *node, 1955 struct btrfs_delayed_extent_op *extent_op) 1956 { 1957 struct btrfs_key key; 1958 struct btrfs_path *path; 1959 struct btrfs_extent_item *ei; 1960 struct extent_buffer *leaf; 1961 u32 item_size; 1962 int ret; 1963 int err = 0; 1964 1965 path = btrfs_alloc_path(); 1966 if (!path) 1967 return -ENOMEM; 1968 1969 key.objectid = node->bytenr; 1970 key.type = BTRFS_EXTENT_ITEM_KEY; 1971 key.offset = node->num_bytes; 1972 1973 path->reada = 1; 1974 path->leave_spinning = 1; 1975 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1976 path, 0, 1); 1977 if (ret < 0) { 1978 err = ret; 1979 goto out; 1980 } 1981 if (ret > 0) { 1982 err = -EIO; 1983 goto out; 1984 } 1985 1986 leaf = path->nodes[0]; 1987 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1988 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1989 if (item_size < sizeof(*ei)) { 1990 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1991 path, (u64)-1, 0); 1992 if (ret < 0) { 1993 err = ret; 1994 goto out; 1995 } 1996 leaf = path->nodes[0]; 1997 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1998 } 1999 #endif 2000 BUG_ON(item_size < sizeof(*ei)); 2001 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2002 __run_delayed_extent_op(extent_op, leaf, ei); 2003 2004 btrfs_mark_buffer_dirty(leaf); 2005 out: 2006 btrfs_free_path(path); 2007 return err; 2008 } 2009 2010 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 2011 struct btrfs_root *root, 2012 struct btrfs_delayed_ref_node *node, 2013 struct btrfs_delayed_extent_op *extent_op, 2014 int insert_reserved) 2015 { 2016 int ret = 0; 2017 struct btrfs_delayed_tree_ref *ref; 2018 struct btrfs_key ins; 2019 u64 parent = 0; 2020 u64 ref_root = 0; 2021 2022 ins.objectid = node->bytenr; 2023 ins.offset = node->num_bytes; 2024 ins.type = BTRFS_EXTENT_ITEM_KEY; 2025 2026 ref = btrfs_delayed_node_to_tree_ref(node); 2027 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2028 parent = ref->parent; 2029 else 2030 ref_root = ref->root; 2031 2032 BUG_ON(node->ref_mod != 1); 2033 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2034 BUG_ON(!extent_op || !extent_op->update_flags || 2035 !extent_op->update_key); 2036 ret = alloc_reserved_tree_block(trans, root, 2037 parent, ref_root, 2038 extent_op->flags_to_set, 2039 &extent_op->key, 2040 ref->level, &ins); 2041 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2042 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 2043 node->num_bytes, parent, ref_root, 2044 ref->level, 0, 1, extent_op); 2045 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2046 ret = __btrfs_free_extent(trans, root, node->bytenr, 2047 node->num_bytes, parent, ref_root, 2048 ref->level, 0, 1, extent_op); 2049 } else { 2050 BUG(); 2051 } 2052 return ret; 2053 } 2054 2055 /* helper function to actually process a single delayed ref entry */ 2056 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 2057 struct btrfs_root *root, 2058 struct btrfs_delayed_ref_node *node, 2059 struct btrfs_delayed_extent_op *extent_op, 2060 int insert_reserved) 2061 { 2062 int ret; 2063 if (btrfs_delayed_ref_is_head(node)) { 2064 struct btrfs_delayed_ref_head *head; 2065 /* 2066 * we've hit the end of the chain and we were supposed 2067 * to insert this extent into the tree. But, it got 2068 * deleted before we ever needed to insert it, so all 2069 * we have to do is clean up the accounting 2070 */ 2071 BUG_ON(extent_op); 2072 head = btrfs_delayed_node_to_head(node); 2073 if (insert_reserved) { 2074 btrfs_pin_extent(root, node->bytenr, 2075 node->num_bytes, 1); 2076 if (head->is_data) { 2077 ret = btrfs_del_csums(trans, root, 2078 node->bytenr, 2079 node->num_bytes); 2080 BUG_ON(ret); 2081 } 2082 } 2083 mutex_unlock(&head->mutex); 2084 return 0; 2085 } 2086 2087 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2088 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2089 ret = run_delayed_tree_ref(trans, root, node, extent_op, 2090 insert_reserved); 2091 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 2092 node->type == BTRFS_SHARED_DATA_REF_KEY) 2093 ret = run_delayed_data_ref(trans, root, node, extent_op, 2094 insert_reserved); 2095 else 2096 BUG(); 2097 return ret; 2098 } 2099 2100 static noinline struct btrfs_delayed_ref_node * 2101 select_delayed_ref(struct btrfs_delayed_ref_head *head) 2102 { 2103 struct rb_node *node; 2104 struct btrfs_delayed_ref_node *ref; 2105 int action = BTRFS_ADD_DELAYED_REF; 2106 again: 2107 /* 2108 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 2109 * this prevents ref count from going down to zero when 2110 * there still are pending delayed ref. 2111 */ 2112 node = rb_prev(&head->node.rb_node); 2113 while (1) { 2114 if (!node) 2115 break; 2116 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2117 rb_node); 2118 if (ref->bytenr != head->node.bytenr) 2119 break; 2120 if (ref->action == action) 2121 return ref; 2122 node = rb_prev(node); 2123 } 2124 if (action == BTRFS_ADD_DELAYED_REF) { 2125 action = BTRFS_DROP_DELAYED_REF; 2126 goto again; 2127 } 2128 return NULL; 2129 } 2130 2131 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2132 struct btrfs_root *root, 2133 struct list_head *cluster) 2134 { 2135 struct btrfs_delayed_ref_root *delayed_refs; 2136 struct btrfs_delayed_ref_node *ref; 2137 struct btrfs_delayed_ref_head *locked_ref = NULL; 2138 struct btrfs_delayed_extent_op *extent_op; 2139 int ret; 2140 int count = 0; 2141 int must_insert_reserved = 0; 2142 2143 delayed_refs = &trans->transaction->delayed_refs; 2144 while (1) { 2145 if (!locked_ref) { 2146 /* pick a new head ref from the cluster list */ 2147 if (list_empty(cluster)) 2148 break; 2149 2150 locked_ref = list_entry(cluster->next, 2151 struct btrfs_delayed_ref_head, cluster); 2152 2153 /* grab the lock that says we are going to process 2154 * all the refs for this head */ 2155 ret = btrfs_delayed_ref_lock(trans, locked_ref); 2156 2157 /* 2158 * we may have dropped the spin lock to get the head 2159 * mutex lock, and that might have given someone else 2160 * time to free the head. If that's true, it has been 2161 * removed from our list and we can move on. 2162 */ 2163 if (ret == -EAGAIN) { 2164 locked_ref = NULL; 2165 count++; 2166 continue; 2167 } 2168 } 2169 2170 /* 2171 * record the must insert reserved flag before we 2172 * drop the spin lock. 2173 */ 2174 must_insert_reserved = locked_ref->must_insert_reserved; 2175 locked_ref->must_insert_reserved = 0; 2176 2177 extent_op = locked_ref->extent_op; 2178 locked_ref->extent_op = NULL; 2179 2180 /* 2181 * locked_ref is the head node, so we have to go one 2182 * node back for any delayed ref updates 2183 */ 2184 ref = select_delayed_ref(locked_ref); 2185 if (!ref) { 2186 /* All delayed refs have been processed, Go ahead 2187 * and send the head node to run_one_delayed_ref, 2188 * so that any accounting fixes can happen 2189 */ 2190 ref = &locked_ref->node; 2191 2192 if (extent_op && must_insert_reserved) { 2193 kfree(extent_op); 2194 extent_op = NULL; 2195 } 2196 2197 if (extent_op) { 2198 spin_unlock(&delayed_refs->lock); 2199 2200 ret = run_delayed_extent_op(trans, root, 2201 ref, extent_op); 2202 BUG_ON(ret); 2203 kfree(extent_op); 2204 2205 cond_resched(); 2206 spin_lock(&delayed_refs->lock); 2207 continue; 2208 } 2209 2210 list_del_init(&locked_ref->cluster); 2211 locked_ref = NULL; 2212 } 2213 2214 ref->in_tree = 0; 2215 rb_erase(&ref->rb_node, &delayed_refs->root); 2216 delayed_refs->num_entries--; 2217 2218 spin_unlock(&delayed_refs->lock); 2219 2220 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2221 must_insert_reserved); 2222 BUG_ON(ret); 2223 2224 btrfs_put_delayed_ref(ref); 2225 kfree(extent_op); 2226 count++; 2227 2228 cond_resched(); 2229 spin_lock(&delayed_refs->lock); 2230 } 2231 return count; 2232 } 2233 2234 /* 2235 * this starts processing the delayed reference count updates and 2236 * extent insertions we have queued up so far. count can be 2237 * 0, which means to process everything in the tree at the start 2238 * of the run (but not newly added entries), or it can be some target 2239 * number you'd like to process. 2240 */ 2241 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2242 struct btrfs_root *root, unsigned long count) 2243 { 2244 struct rb_node *node; 2245 struct btrfs_delayed_ref_root *delayed_refs; 2246 struct btrfs_delayed_ref_node *ref; 2247 struct list_head cluster; 2248 int ret; 2249 int run_all = count == (unsigned long)-1; 2250 int run_most = 0; 2251 2252 if (root == root->fs_info->extent_root) 2253 root = root->fs_info->tree_root; 2254 2255 delayed_refs = &trans->transaction->delayed_refs; 2256 INIT_LIST_HEAD(&cluster); 2257 again: 2258 spin_lock(&delayed_refs->lock); 2259 if (count == 0) { 2260 count = delayed_refs->num_entries * 2; 2261 run_most = 1; 2262 } 2263 while (1) { 2264 if (!(run_all || run_most) && 2265 delayed_refs->num_heads_ready < 64) 2266 break; 2267 2268 /* 2269 * go find something we can process in the rbtree. We start at 2270 * the beginning of the tree, and then build a cluster 2271 * of refs to process starting at the first one we are able to 2272 * lock 2273 */ 2274 ret = btrfs_find_ref_cluster(trans, &cluster, 2275 delayed_refs->run_delayed_start); 2276 if (ret) 2277 break; 2278 2279 ret = run_clustered_refs(trans, root, &cluster); 2280 BUG_ON(ret < 0); 2281 2282 count -= min_t(unsigned long, ret, count); 2283 2284 if (count == 0) 2285 break; 2286 } 2287 2288 if (run_all) { 2289 node = rb_first(&delayed_refs->root); 2290 if (!node) 2291 goto out; 2292 count = (unsigned long)-1; 2293 2294 while (node) { 2295 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2296 rb_node); 2297 if (btrfs_delayed_ref_is_head(ref)) { 2298 struct btrfs_delayed_ref_head *head; 2299 2300 head = btrfs_delayed_node_to_head(ref); 2301 atomic_inc(&ref->refs); 2302 2303 spin_unlock(&delayed_refs->lock); 2304 /* 2305 * Mutex was contended, block until it's 2306 * released and try again 2307 */ 2308 mutex_lock(&head->mutex); 2309 mutex_unlock(&head->mutex); 2310 2311 btrfs_put_delayed_ref(ref); 2312 cond_resched(); 2313 goto again; 2314 } 2315 node = rb_next(node); 2316 } 2317 spin_unlock(&delayed_refs->lock); 2318 schedule_timeout(1); 2319 goto again; 2320 } 2321 out: 2322 spin_unlock(&delayed_refs->lock); 2323 return 0; 2324 } 2325 2326 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2327 struct btrfs_root *root, 2328 u64 bytenr, u64 num_bytes, u64 flags, 2329 int is_data) 2330 { 2331 struct btrfs_delayed_extent_op *extent_op; 2332 int ret; 2333 2334 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2335 if (!extent_op) 2336 return -ENOMEM; 2337 2338 extent_op->flags_to_set = flags; 2339 extent_op->update_flags = 1; 2340 extent_op->update_key = 0; 2341 extent_op->is_data = is_data ? 1 : 0; 2342 2343 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2344 if (ret) 2345 kfree(extent_op); 2346 return ret; 2347 } 2348 2349 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2350 struct btrfs_root *root, 2351 struct btrfs_path *path, 2352 u64 objectid, u64 offset, u64 bytenr) 2353 { 2354 struct btrfs_delayed_ref_head *head; 2355 struct btrfs_delayed_ref_node *ref; 2356 struct btrfs_delayed_data_ref *data_ref; 2357 struct btrfs_delayed_ref_root *delayed_refs; 2358 struct rb_node *node; 2359 int ret = 0; 2360 2361 ret = -ENOENT; 2362 delayed_refs = &trans->transaction->delayed_refs; 2363 spin_lock(&delayed_refs->lock); 2364 head = btrfs_find_delayed_ref_head(trans, bytenr); 2365 if (!head) 2366 goto out; 2367 2368 if (!mutex_trylock(&head->mutex)) { 2369 atomic_inc(&head->node.refs); 2370 spin_unlock(&delayed_refs->lock); 2371 2372 btrfs_release_path(path); 2373 2374 /* 2375 * Mutex was contended, block until it's released and let 2376 * caller try again 2377 */ 2378 mutex_lock(&head->mutex); 2379 mutex_unlock(&head->mutex); 2380 btrfs_put_delayed_ref(&head->node); 2381 return -EAGAIN; 2382 } 2383 2384 node = rb_prev(&head->node.rb_node); 2385 if (!node) 2386 goto out_unlock; 2387 2388 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2389 2390 if (ref->bytenr != bytenr) 2391 goto out_unlock; 2392 2393 ret = 1; 2394 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2395 goto out_unlock; 2396 2397 data_ref = btrfs_delayed_node_to_data_ref(ref); 2398 2399 node = rb_prev(node); 2400 if (node) { 2401 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2402 if (ref->bytenr == bytenr) 2403 goto out_unlock; 2404 } 2405 2406 if (data_ref->root != root->root_key.objectid || 2407 data_ref->objectid != objectid || data_ref->offset != offset) 2408 goto out_unlock; 2409 2410 ret = 0; 2411 out_unlock: 2412 mutex_unlock(&head->mutex); 2413 out: 2414 spin_unlock(&delayed_refs->lock); 2415 return ret; 2416 } 2417 2418 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2419 struct btrfs_root *root, 2420 struct btrfs_path *path, 2421 u64 objectid, u64 offset, u64 bytenr) 2422 { 2423 struct btrfs_root *extent_root = root->fs_info->extent_root; 2424 struct extent_buffer *leaf; 2425 struct btrfs_extent_data_ref *ref; 2426 struct btrfs_extent_inline_ref *iref; 2427 struct btrfs_extent_item *ei; 2428 struct btrfs_key key; 2429 u32 item_size; 2430 int ret; 2431 2432 key.objectid = bytenr; 2433 key.offset = (u64)-1; 2434 key.type = BTRFS_EXTENT_ITEM_KEY; 2435 2436 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2437 if (ret < 0) 2438 goto out; 2439 BUG_ON(ret == 0); 2440 2441 ret = -ENOENT; 2442 if (path->slots[0] == 0) 2443 goto out; 2444 2445 path->slots[0]--; 2446 leaf = path->nodes[0]; 2447 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2448 2449 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2450 goto out; 2451 2452 ret = 1; 2453 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2454 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2455 if (item_size < sizeof(*ei)) { 2456 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2457 goto out; 2458 } 2459 #endif 2460 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2461 2462 if (item_size != sizeof(*ei) + 2463 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2464 goto out; 2465 2466 if (btrfs_extent_generation(leaf, ei) <= 2467 btrfs_root_last_snapshot(&root->root_item)) 2468 goto out; 2469 2470 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2471 if (btrfs_extent_inline_ref_type(leaf, iref) != 2472 BTRFS_EXTENT_DATA_REF_KEY) 2473 goto out; 2474 2475 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2476 if (btrfs_extent_refs(leaf, ei) != 2477 btrfs_extent_data_ref_count(leaf, ref) || 2478 btrfs_extent_data_ref_root(leaf, ref) != 2479 root->root_key.objectid || 2480 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2481 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2482 goto out; 2483 2484 ret = 0; 2485 out: 2486 return ret; 2487 } 2488 2489 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2490 struct btrfs_root *root, 2491 u64 objectid, u64 offset, u64 bytenr) 2492 { 2493 struct btrfs_path *path; 2494 int ret; 2495 int ret2; 2496 2497 path = btrfs_alloc_path(); 2498 if (!path) 2499 return -ENOENT; 2500 2501 do { 2502 ret = check_committed_ref(trans, root, path, objectid, 2503 offset, bytenr); 2504 if (ret && ret != -ENOENT) 2505 goto out; 2506 2507 ret2 = check_delayed_ref(trans, root, path, objectid, 2508 offset, bytenr); 2509 } while (ret2 == -EAGAIN); 2510 2511 if (ret2 && ret2 != -ENOENT) { 2512 ret = ret2; 2513 goto out; 2514 } 2515 2516 if (ret != -ENOENT || ret2 != -ENOENT) 2517 ret = 0; 2518 out: 2519 btrfs_free_path(path); 2520 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) 2521 WARN_ON(ret > 0); 2522 return ret; 2523 } 2524 2525 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2526 struct btrfs_root *root, 2527 struct extent_buffer *buf, 2528 int full_backref, int inc) 2529 { 2530 u64 bytenr; 2531 u64 num_bytes; 2532 u64 parent; 2533 u64 ref_root; 2534 u32 nritems; 2535 struct btrfs_key key; 2536 struct btrfs_file_extent_item *fi; 2537 int i; 2538 int level; 2539 int ret = 0; 2540 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2541 u64, u64, u64, u64, u64, u64); 2542 2543 ref_root = btrfs_header_owner(buf); 2544 nritems = btrfs_header_nritems(buf); 2545 level = btrfs_header_level(buf); 2546 2547 if (!root->ref_cows && level == 0) 2548 return 0; 2549 2550 if (inc) 2551 process_func = btrfs_inc_extent_ref; 2552 else 2553 process_func = btrfs_free_extent; 2554 2555 if (full_backref) 2556 parent = buf->start; 2557 else 2558 parent = 0; 2559 2560 for (i = 0; i < nritems; i++) { 2561 if (level == 0) { 2562 btrfs_item_key_to_cpu(buf, &key, i); 2563 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2564 continue; 2565 fi = btrfs_item_ptr(buf, i, 2566 struct btrfs_file_extent_item); 2567 if (btrfs_file_extent_type(buf, fi) == 2568 BTRFS_FILE_EXTENT_INLINE) 2569 continue; 2570 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2571 if (bytenr == 0) 2572 continue; 2573 2574 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2575 key.offset -= btrfs_file_extent_offset(buf, fi); 2576 ret = process_func(trans, root, bytenr, num_bytes, 2577 parent, ref_root, key.objectid, 2578 key.offset); 2579 if (ret) 2580 goto fail; 2581 } else { 2582 bytenr = btrfs_node_blockptr(buf, i); 2583 num_bytes = btrfs_level_size(root, level - 1); 2584 ret = process_func(trans, root, bytenr, num_bytes, 2585 parent, ref_root, level - 1, 0); 2586 if (ret) 2587 goto fail; 2588 } 2589 } 2590 return 0; 2591 fail: 2592 BUG(); 2593 return ret; 2594 } 2595 2596 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2597 struct extent_buffer *buf, int full_backref) 2598 { 2599 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2600 } 2601 2602 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2603 struct extent_buffer *buf, int full_backref) 2604 { 2605 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2606 } 2607 2608 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2609 struct btrfs_root *root, 2610 struct btrfs_path *path, 2611 struct btrfs_block_group_cache *cache) 2612 { 2613 int ret; 2614 struct btrfs_root *extent_root = root->fs_info->extent_root; 2615 unsigned long bi; 2616 struct extent_buffer *leaf; 2617 2618 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2619 if (ret < 0) 2620 goto fail; 2621 BUG_ON(ret); 2622 2623 leaf = path->nodes[0]; 2624 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2625 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2626 btrfs_mark_buffer_dirty(leaf); 2627 btrfs_release_path(path); 2628 fail: 2629 if (ret) 2630 return ret; 2631 return 0; 2632 2633 } 2634 2635 static struct btrfs_block_group_cache * 2636 next_block_group(struct btrfs_root *root, 2637 struct btrfs_block_group_cache *cache) 2638 { 2639 struct rb_node *node; 2640 spin_lock(&root->fs_info->block_group_cache_lock); 2641 node = rb_next(&cache->cache_node); 2642 btrfs_put_block_group(cache); 2643 if (node) { 2644 cache = rb_entry(node, struct btrfs_block_group_cache, 2645 cache_node); 2646 btrfs_get_block_group(cache); 2647 } else 2648 cache = NULL; 2649 spin_unlock(&root->fs_info->block_group_cache_lock); 2650 return cache; 2651 } 2652 2653 static int cache_save_setup(struct btrfs_block_group_cache *block_group, 2654 struct btrfs_trans_handle *trans, 2655 struct btrfs_path *path) 2656 { 2657 struct btrfs_root *root = block_group->fs_info->tree_root; 2658 struct inode *inode = NULL; 2659 u64 alloc_hint = 0; 2660 int dcs = BTRFS_DC_ERROR; 2661 int num_pages = 0; 2662 int retries = 0; 2663 int ret = 0; 2664 2665 /* 2666 * If this block group is smaller than 100 megs don't bother caching the 2667 * block group. 2668 */ 2669 if (block_group->key.offset < (100 * 1024 * 1024)) { 2670 spin_lock(&block_group->lock); 2671 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 2672 spin_unlock(&block_group->lock); 2673 return 0; 2674 } 2675 2676 again: 2677 inode = lookup_free_space_inode(root, block_group, path); 2678 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2679 ret = PTR_ERR(inode); 2680 btrfs_release_path(path); 2681 goto out; 2682 } 2683 2684 if (IS_ERR(inode)) { 2685 BUG_ON(retries); 2686 retries++; 2687 2688 if (block_group->ro) 2689 goto out_free; 2690 2691 ret = create_free_space_inode(root, trans, block_group, path); 2692 if (ret) 2693 goto out_free; 2694 goto again; 2695 } 2696 2697 /* 2698 * We want to set the generation to 0, that way if anything goes wrong 2699 * from here on out we know not to trust this cache when we load up next 2700 * time. 2701 */ 2702 BTRFS_I(inode)->generation = 0; 2703 ret = btrfs_update_inode(trans, root, inode); 2704 WARN_ON(ret); 2705 2706 if (i_size_read(inode) > 0) { 2707 ret = btrfs_truncate_free_space_cache(root, trans, path, 2708 inode); 2709 if (ret) 2710 goto out_put; 2711 } 2712 2713 spin_lock(&block_group->lock); 2714 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2715 /* We're not cached, don't bother trying to write stuff out */ 2716 dcs = BTRFS_DC_WRITTEN; 2717 spin_unlock(&block_group->lock); 2718 goto out_put; 2719 } 2720 spin_unlock(&block_group->lock); 2721 2722 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 2723 if (!num_pages) 2724 num_pages = 1; 2725 2726 /* 2727 * Just to make absolutely sure we have enough space, we're going to 2728 * preallocate 12 pages worth of space for each block group. In 2729 * practice we ought to use at most 8, but we need extra space so we can 2730 * add our header and have a terminator between the extents and the 2731 * bitmaps. 2732 */ 2733 num_pages *= 16; 2734 num_pages *= PAGE_CACHE_SIZE; 2735 2736 ret = btrfs_check_data_free_space(inode, num_pages); 2737 if (ret) 2738 goto out_put; 2739 2740 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 2741 num_pages, num_pages, 2742 &alloc_hint); 2743 if (!ret) 2744 dcs = BTRFS_DC_SETUP; 2745 btrfs_free_reserved_data_space(inode, num_pages); 2746 out_put: 2747 iput(inode); 2748 out_free: 2749 btrfs_release_path(path); 2750 out: 2751 spin_lock(&block_group->lock); 2752 block_group->disk_cache_state = dcs; 2753 spin_unlock(&block_group->lock); 2754 2755 return ret; 2756 } 2757 2758 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2759 struct btrfs_root *root) 2760 { 2761 struct btrfs_block_group_cache *cache; 2762 int err = 0; 2763 struct btrfs_path *path; 2764 u64 last = 0; 2765 2766 path = btrfs_alloc_path(); 2767 if (!path) 2768 return -ENOMEM; 2769 2770 again: 2771 while (1) { 2772 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2773 while (cache) { 2774 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 2775 break; 2776 cache = next_block_group(root, cache); 2777 } 2778 if (!cache) { 2779 if (last == 0) 2780 break; 2781 last = 0; 2782 continue; 2783 } 2784 err = cache_save_setup(cache, trans, path); 2785 last = cache->key.objectid + cache->key.offset; 2786 btrfs_put_block_group(cache); 2787 } 2788 2789 while (1) { 2790 if (last == 0) { 2791 err = btrfs_run_delayed_refs(trans, root, 2792 (unsigned long)-1); 2793 BUG_ON(err); 2794 } 2795 2796 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2797 while (cache) { 2798 if (cache->disk_cache_state == BTRFS_DC_CLEAR) { 2799 btrfs_put_block_group(cache); 2800 goto again; 2801 } 2802 2803 if (cache->dirty) 2804 break; 2805 cache = next_block_group(root, cache); 2806 } 2807 if (!cache) { 2808 if (last == 0) 2809 break; 2810 last = 0; 2811 continue; 2812 } 2813 2814 if (cache->disk_cache_state == BTRFS_DC_SETUP) 2815 cache->disk_cache_state = BTRFS_DC_NEED_WRITE; 2816 cache->dirty = 0; 2817 last = cache->key.objectid + cache->key.offset; 2818 2819 err = write_one_cache_group(trans, root, path, cache); 2820 BUG_ON(err); 2821 btrfs_put_block_group(cache); 2822 } 2823 2824 while (1) { 2825 /* 2826 * I don't think this is needed since we're just marking our 2827 * preallocated extent as written, but just in case it can't 2828 * hurt. 2829 */ 2830 if (last == 0) { 2831 err = btrfs_run_delayed_refs(trans, root, 2832 (unsigned long)-1); 2833 BUG_ON(err); 2834 } 2835 2836 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2837 while (cache) { 2838 /* 2839 * Really this shouldn't happen, but it could if we 2840 * couldn't write the entire preallocated extent and 2841 * splitting the extent resulted in a new block. 2842 */ 2843 if (cache->dirty) { 2844 btrfs_put_block_group(cache); 2845 goto again; 2846 } 2847 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2848 break; 2849 cache = next_block_group(root, cache); 2850 } 2851 if (!cache) { 2852 if (last == 0) 2853 break; 2854 last = 0; 2855 continue; 2856 } 2857 2858 btrfs_write_out_cache(root, trans, cache, path); 2859 2860 /* 2861 * If we didn't have an error then the cache state is still 2862 * NEED_WRITE, so we can set it to WRITTEN. 2863 */ 2864 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2865 cache->disk_cache_state = BTRFS_DC_WRITTEN; 2866 last = cache->key.objectid + cache->key.offset; 2867 btrfs_put_block_group(cache); 2868 } 2869 2870 btrfs_free_path(path); 2871 return 0; 2872 } 2873 2874 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2875 { 2876 struct btrfs_block_group_cache *block_group; 2877 int readonly = 0; 2878 2879 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2880 if (!block_group || block_group->ro) 2881 readonly = 1; 2882 if (block_group) 2883 btrfs_put_block_group(block_group); 2884 return readonly; 2885 } 2886 2887 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2888 u64 total_bytes, u64 bytes_used, 2889 struct btrfs_space_info **space_info) 2890 { 2891 struct btrfs_space_info *found; 2892 int i; 2893 int factor; 2894 2895 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 2896 BTRFS_BLOCK_GROUP_RAID10)) 2897 factor = 2; 2898 else 2899 factor = 1; 2900 2901 found = __find_space_info(info, flags); 2902 if (found) { 2903 spin_lock(&found->lock); 2904 found->total_bytes += total_bytes; 2905 found->disk_total += total_bytes * factor; 2906 found->bytes_used += bytes_used; 2907 found->disk_used += bytes_used * factor; 2908 found->full = 0; 2909 spin_unlock(&found->lock); 2910 *space_info = found; 2911 return 0; 2912 } 2913 found = kzalloc(sizeof(*found), GFP_NOFS); 2914 if (!found) 2915 return -ENOMEM; 2916 2917 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 2918 INIT_LIST_HEAD(&found->block_groups[i]); 2919 init_rwsem(&found->groups_sem); 2920 spin_lock_init(&found->lock); 2921 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | 2922 BTRFS_BLOCK_GROUP_SYSTEM | 2923 BTRFS_BLOCK_GROUP_METADATA); 2924 found->total_bytes = total_bytes; 2925 found->disk_total = total_bytes * factor; 2926 found->bytes_used = bytes_used; 2927 found->disk_used = bytes_used * factor; 2928 found->bytes_pinned = 0; 2929 found->bytes_reserved = 0; 2930 found->bytes_readonly = 0; 2931 found->bytes_may_use = 0; 2932 found->full = 0; 2933 found->force_alloc = CHUNK_ALLOC_NO_FORCE; 2934 found->chunk_alloc = 0; 2935 *space_info = found; 2936 list_add_rcu(&found->list, &info->space_info); 2937 atomic_set(&found->caching_threads, 0); 2938 return 0; 2939 } 2940 2941 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2942 { 2943 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2944 BTRFS_BLOCK_GROUP_RAID1 | 2945 BTRFS_BLOCK_GROUP_RAID10 | 2946 BTRFS_BLOCK_GROUP_DUP); 2947 if (extra_flags) { 2948 if (flags & BTRFS_BLOCK_GROUP_DATA) 2949 fs_info->avail_data_alloc_bits |= extra_flags; 2950 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2951 fs_info->avail_metadata_alloc_bits |= extra_flags; 2952 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2953 fs_info->avail_system_alloc_bits |= extra_flags; 2954 } 2955 } 2956 2957 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2958 { 2959 /* 2960 * we add in the count of missing devices because we want 2961 * to make sure that any RAID levels on a degraded FS 2962 * continue to be honored. 2963 */ 2964 u64 num_devices = root->fs_info->fs_devices->rw_devices + 2965 root->fs_info->fs_devices->missing_devices; 2966 2967 if (num_devices == 1) 2968 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2969 if (num_devices < 4) 2970 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2971 2972 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2973 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2974 BTRFS_BLOCK_GROUP_RAID10))) { 2975 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2976 } 2977 2978 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2979 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2980 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2981 } 2982 2983 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2984 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2985 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2986 (flags & BTRFS_BLOCK_GROUP_DUP))) 2987 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2988 return flags; 2989 } 2990 2991 static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 2992 { 2993 if (flags & BTRFS_BLOCK_GROUP_DATA) 2994 flags |= root->fs_info->avail_data_alloc_bits & 2995 root->fs_info->data_alloc_profile; 2996 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2997 flags |= root->fs_info->avail_system_alloc_bits & 2998 root->fs_info->system_alloc_profile; 2999 else if (flags & BTRFS_BLOCK_GROUP_METADATA) 3000 flags |= root->fs_info->avail_metadata_alloc_bits & 3001 root->fs_info->metadata_alloc_profile; 3002 return btrfs_reduce_alloc_profile(root, flags); 3003 } 3004 3005 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) 3006 { 3007 u64 flags; 3008 3009 if (data) 3010 flags = BTRFS_BLOCK_GROUP_DATA; 3011 else if (root == root->fs_info->chunk_root) 3012 flags = BTRFS_BLOCK_GROUP_SYSTEM; 3013 else 3014 flags = BTRFS_BLOCK_GROUP_METADATA; 3015 3016 return get_alloc_profile(root, flags); 3017 } 3018 3019 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 3020 { 3021 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 3022 BTRFS_BLOCK_GROUP_DATA); 3023 } 3024 3025 /* 3026 * This will check the space that the inode allocates from to make sure we have 3027 * enough space for bytes. 3028 */ 3029 int btrfs_check_data_free_space(struct inode *inode, u64 bytes) 3030 { 3031 struct btrfs_space_info *data_sinfo; 3032 struct btrfs_root *root = BTRFS_I(inode)->root; 3033 u64 used; 3034 int ret = 0, committed = 0, alloc_chunk = 1; 3035 3036 /* make sure bytes are sectorsize aligned */ 3037 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3038 3039 if (root == root->fs_info->tree_root || 3040 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { 3041 alloc_chunk = 0; 3042 committed = 1; 3043 } 3044 3045 data_sinfo = BTRFS_I(inode)->space_info; 3046 if (!data_sinfo) 3047 goto alloc; 3048 3049 again: 3050 /* make sure we have enough space to handle the data first */ 3051 spin_lock(&data_sinfo->lock); 3052 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + 3053 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + 3054 data_sinfo->bytes_may_use; 3055 3056 if (used + bytes > data_sinfo->total_bytes) { 3057 struct btrfs_trans_handle *trans; 3058 3059 /* 3060 * if we don't have enough free bytes in this space then we need 3061 * to alloc a new chunk. 3062 */ 3063 if (!data_sinfo->full && alloc_chunk) { 3064 u64 alloc_target; 3065 3066 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; 3067 spin_unlock(&data_sinfo->lock); 3068 alloc: 3069 alloc_target = btrfs_get_alloc_profile(root, 1); 3070 trans = btrfs_join_transaction(root); 3071 if (IS_ERR(trans)) 3072 return PTR_ERR(trans); 3073 3074 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3075 bytes + 2 * 1024 * 1024, 3076 alloc_target, 3077 CHUNK_ALLOC_NO_FORCE); 3078 btrfs_end_transaction(trans, root); 3079 if (ret < 0) { 3080 if (ret != -ENOSPC) 3081 return ret; 3082 else 3083 goto commit_trans; 3084 } 3085 3086 if (!data_sinfo) { 3087 btrfs_set_inode_space_info(root, inode); 3088 data_sinfo = BTRFS_I(inode)->space_info; 3089 } 3090 goto again; 3091 } 3092 3093 /* 3094 * If we have less pinned bytes than we want to allocate then 3095 * don't bother committing the transaction, it won't help us. 3096 */ 3097 if (data_sinfo->bytes_pinned < bytes) 3098 committed = 1; 3099 spin_unlock(&data_sinfo->lock); 3100 3101 /* commit the current transaction and try again */ 3102 commit_trans: 3103 if (!committed && 3104 !atomic_read(&root->fs_info->open_ioctl_trans)) { 3105 committed = 1; 3106 trans = btrfs_join_transaction(root); 3107 if (IS_ERR(trans)) 3108 return PTR_ERR(trans); 3109 ret = btrfs_commit_transaction(trans, root); 3110 if (ret) 3111 return ret; 3112 goto again; 3113 } 3114 3115 return -ENOSPC; 3116 } 3117 data_sinfo->bytes_may_use += bytes; 3118 BTRFS_I(inode)->reserved_bytes += bytes; 3119 spin_unlock(&data_sinfo->lock); 3120 3121 return 0; 3122 } 3123 3124 /* 3125 * called when we are clearing an delalloc extent from the 3126 * inode's io_tree or there was an error for whatever reason 3127 * after calling btrfs_check_data_free_space 3128 */ 3129 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) 3130 { 3131 struct btrfs_root *root = BTRFS_I(inode)->root; 3132 struct btrfs_space_info *data_sinfo; 3133 3134 /* make sure bytes are sectorsize aligned */ 3135 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3136 3137 data_sinfo = BTRFS_I(inode)->space_info; 3138 spin_lock(&data_sinfo->lock); 3139 data_sinfo->bytes_may_use -= bytes; 3140 BTRFS_I(inode)->reserved_bytes -= bytes; 3141 spin_unlock(&data_sinfo->lock); 3142 } 3143 3144 static void force_metadata_allocation(struct btrfs_fs_info *info) 3145 { 3146 struct list_head *head = &info->space_info; 3147 struct btrfs_space_info *found; 3148 3149 rcu_read_lock(); 3150 list_for_each_entry_rcu(found, head, list) { 3151 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3152 found->force_alloc = CHUNK_ALLOC_FORCE; 3153 } 3154 rcu_read_unlock(); 3155 } 3156 3157 static int should_alloc_chunk(struct btrfs_root *root, 3158 struct btrfs_space_info *sinfo, u64 alloc_bytes, 3159 int force) 3160 { 3161 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3162 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; 3163 u64 thresh; 3164 3165 if (force == CHUNK_ALLOC_FORCE) 3166 return 1; 3167 3168 /* 3169 * in limited mode, we want to have some free space up to 3170 * about 1% of the FS size. 3171 */ 3172 if (force == CHUNK_ALLOC_LIMITED) { 3173 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3174 thresh = max_t(u64, 64 * 1024 * 1024, 3175 div_factor_fine(thresh, 1)); 3176 3177 if (num_bytes - num_allocated < thresh) 3178 return 1; 3179 } 3180 3181 /* 3182 * we have two similar checks here, one based on percentage 3183 * and once based on a hard number of 256MB. The idea 3184 * is that if we have a good amount of free 3185 * room, don't allocate a chunk. A good mount is 3186 * less than 80% utilized of the chunks we have allocated, 3187 * or more than 256MB free 3188 */ 3189 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3190 return 0; 3191 3192 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) 3193 return 0; 3194 3195 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3196 3197 /* 256MB or 5% of the FS */ 3198 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3199 3200 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3201 return 0; 3202 return 1; 3203 } 3204 3205 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3206 struct btrfs_root *extent_root, u64 alloc_bytes, 3207 u64 flags, int force) 3208 { 3209 struct btrfs_space_info *space_info; 3210 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3211 int wait_for_alloc = 0; 3212 int ret = 0; 3213 3214 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3215 3216 space_info = __find_space_info(extent_root->fs_info, flags); 3217 if (!space_info) { 3218 ret = update_space_info(extent_root->fs_info, flags, 3219 0, 0, &space_info); 3220 BUG_ON(ret); 3221 } 3222 BUG_ON(!space_info); 3223 3224 again: 3225 spin_lock(&space_info->lock); 3226 if (space_info->force_alloc) 3227 force = space_info->force_alloc; 3228 if (space_info->full) { 3229 spin_unlock(&space_info->lock); 3230 return 0; 3231 } 3232 3233 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { 3234 spin_unlock(&space_info->lock); 3235 return 0; 3236 } else if (space_info->chunk_alloc) { 3237 wait_for_alloc = 1; 3238 } else { 3239 space_info->chunk_alloc = 1; 3240 } 3241 3242 spin_unlock(&space_info->lock); 3243 3244 mutex_lock(&fs_info->chunk_mutex); 3245 3246 /* 3247 * The chunk_mutex is held throughout the entirety of a chunk 3248 * allocation, so once we've acquired the chunk_mutex we know that the 3249 * other guy is done and we need to recheck and see if we should 3250 * allocate. 3251 */ 3252 if (wait_for_alloc) { 3253 mutex_unlock(&fs_info->chunk_mutex); 3254 wait_for_alloc = 0; 3255 goto again; 3256 } 3257 3258 /* 3259 * If we have mixed data/metadata chunks we want to make sure we keep 3260 * allocating mixed chunks instead of individual chunks. 3261 */ 3262 if (btrfs_mixed_space_info(space_info)) 3263 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); 3264 3265 /* 3266 * if we're doing a data chunk, go ahead and make sure that 3267 * we keep a reasonable number of metadata chunks allocated in the 3268 * FS as well. 3269 */ 3270 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { 3271 fs_info->data_chunk_allocations++; 3272 if (!(fs_info->data_chunk_allocations % 3273 fs_info->metadata_ratio)) 3274 force_metadata_allocation(fs_info); 3275 } 3276 3277 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3278 spin_lock(&space_info->lock); 3279 if (ret) 3280 space_info->full = 1; 3281 else 3282 ret = 1; 3283 3284 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 3285 space_info->chunk_alloc = 0; 3286 spin_unlock(&space_info->lock); 3287 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3288 return ret; 3289 } 3290 3291 /* 3292 * shrink metadata reservation for delalloc 3293 */ 3294 static int shrink_delalloc(struct btrfs_trans_handle *trans, 3295 struct btrfs_root *root, u64 to_reclaim, int sync) 3296 { 3297 struct btrfs_block_rsv *block_rsv; 3298 struct btrfs_space_info *space_info; 3299 u64 reserved; 3300 u64 max_reclaim; 3301 u64 reclaimed = 0; 3302 long time_left; 3303 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3304 int loops = 0; 3305 unsigned long progress; 3306 3307 block_rsv = &root->fs_info->delalloc_block_rsv; 3308 space_info = block_rsv->space_info; 3309 3310 smp_mb(); 3311 reserved = space_info->bytes_reserved; 3312 progress = space_info->reservation_progress; 3313 3314 if (reserved == 0) 3315 return 0; 3316 3317 max_reclaim = min(reserved, to_reclaim); 3318 3319 while (loops < 1024) { 3320 /* have the flusher threads jump in and do some IO */ 3321 smp_mb(); 3322 nr_pages = min_t(unsigned long, nr_pages, 3323 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); 3324 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3325 3326 spin_lock(&space_info->lock); 3327 if (reserved > space_info->bytes_reserved) 3328 reclaimed += reserved - space_info->bytes_reserved; 3329 reserved = space_info->bytes_reserved; 3330 spin_unlock(&space_info->lock); 3331 3332 loops++; 3333 3334 if (reserved == 0 || reclaimed >= max_reclaim) 3335 break; 3336 3337 if (trans && trans->transaction->blocked) 3338 return -EAGAIN; 3339 3340 time_left = schedule_timeout_interruptible(1); 3341 3342 /* We were interrupted, exit */ 3343 if (time_left) 3344 break; 3345 3346 /* we've kicked the IO a few times, if anything has been freed, 3347 * exit. There is no sense in looping here for a long time 3348 * when we really need to commit the transaction, or there are 3349 * just too many writers without enough free space 3350 */ 3351 3352 if (loops > 3) { 3353 smp_mb(); 3354 if (progress != space_info->reservation_progress) 3355 break; 3356 } 3357 3358 } 3359 return reclaimed >= to_reclaim; 3360 } 3361 3362 /* 3363 * Retries tells us how many times we've called reserve_metadata_bytes. The 3364 * idea is if this is the first call (retries == 0) then we will add to our 3365 * reserved count if we can't make the allocation in order to hold our place 3366 * while we go and try and free up space. That way for retries > 1 we don't try 3367 * and add space, we just check to see if the amount of unused space is >= the 3368 * total space, meaning that our reservation is valid. 3369 * 3370 * However if we don't intend to retry this reservation, pass -1 as retries so 3371 * that it short circuits this logic. 3372 */ 3373 static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, 3374 struct btrfs_root *root, 3375 struct btrfs_block_rsv *block_rsv, 3376 u64 orig_bytes, int flush) 3377 { 3378 struct btrfs_space_info *space_info = block_rsv->space_info; 3379 u64 unused; 3380 u64 num_bytes = orig_bytes; 3381 int retries = 0; 3382 int ret = 0; 3383 bool reserved = false; 3384 bool committed = false; 3385 3386 again: 3387 ret = -ENOSPC; 3388 if (reserved) 3389 num_bytes = 0; 3390 3391 spin_lock(&space_info->lock); 3392 unused = space_info->bytes_used + space_info->bytes_reserved + 3393 space_info->bytes_pinned + space_info->bytes_readonly + 3394 space_info->bytes_may_use; 3395 3396 /* 3397 * The idea here is that we've not already over-reserved the block group 3398 * then we can go ahead and save our reservation first and then start 3399 * flushing if we need to. Otherwise if we've already overcommitted 3400 * lets start flushing stuff first and then come back and try to make 3401 * our reservation. 3402 */ 3403 if (unused <= space_info->total_bytes) { 3404 unused = space_info->total_bytes - unused; 3405 if (unused >= num_bytes) { 3406 if (!reserved) 3407 space_info->bytes_reserved += orig_bytes; 3408 ret = 0; 3409 } else { 3410 /* 3411 * Ok set num_bytes to orig_bytes since we aren't 3412 * overocmmitted, this way we only try and reclaim what 3413 * we need. 3414 */ 3415 num_bytes = orig_bytes; 3416 } 3417 } else { 3418 /* 3419 * Ok we're over committed, set num_bytes to the overcommitted 3420 * amount plus the amount of bytes that we need for this 3421 * reservation. 3422 */ 3423 num_bytes = unused - space_info->total_bytes + 3424 (orig_bytes * (retries + 1)); 3425 } 3426 3427 /* 3428 * Couldn't make our reservation, save our place so while we're trying 3429 * to reclaim space we can actually use it instead of somebody else 3430 * stealing it from us. 3431 */ 3432 if (ret && !reserved) { 3433 space_info->bytes_reserved += orig_bytes; 3434 reserved = true; 3435 } 3436 3437 spin_unlock(&space_info->lock); 3438 3439 if (!ret) 3440 return 0; 3441 3442 if (!flush) 3443 goto out; 3444 3445 /* 3446 * We do synchronous shrinking since we don't actually unreserve 3447 * metadata until after the IO is completed. 3448 */ 3449 ret = shrink_delalloc(trans, root, num_bytes, 1); 3450 if (ret > 0) 3451 return 0; 3452 else if (ret < 0) 3453 goto out; 3454 3455 /* 3456 * So if we were overcommitted it's possible that somebody else flushed 3457 * out enough space and we simply didn't have enough space to reclaim, 3458 * so go back around and try again. 3459 */ 3460 if (retries < 2) { 3461 retries++; 3462 goto again; 3463 } 3464 3465 spin_lock(&space_info->lock); 3466 /* 3467 * Not enough space to be reclaimed, don't bother committing the 3468 * transaction. 3469 */ 3470 if (space_info->bytes_pinned < orig_bytes) 3471 ret = -ENOSPC; 3472 spin_unlock(&space_info->lock); 3473 if (ret) 3474 goto out; 3475 3476 ret = -EAGAIN; 3477 if (trans || committed) 3478 goto out; 3479 3480 ret = -ENOSPC; 3481 trans = btrfs_join_transaction(root); 3482 if (IS_ERR(trans)) 3483 goto out; 3484 ret = btrfs_commit_transaction(trans, root); 3485 if (!ret) { 3486 trans = NULL; 3487 committed = true; 3488 goto again; 3489 } 3490 3491 out: 3492 if (reserved) { 3493 spin_lock(&space_info->lock); 3494 space_info->bytes_reserved -= orig_bytes; 3495 spin_unlock(&space_info->lock); 3496 } 3497 3498 return ret; 3499 } 3500 3501 static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, 3502 struct btrfs_root *root) 3503 { 3504 struct btrfs_block_rsv *block_rsv; 3505 if (root->ref_cows) 3506 block_rsv = trans->block_rsv; 3507 else 3508 block_rsv = root->block_rsv; 3509 3510 if (!block_rsv) 3511 block_rsv = &root->fs_info->empty_block_rsv; 3512 3513 return block_rsv; 3514 } 3515 3516 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 3517 u64 num_bytes) 3518 { 3519 int ret = -ENOSPC; 3520 spin_lock(&block_rsv->lock); 3521 if (block_rsv->reserved >= num_bytes) { 3522 block_rsv->reserved -= num_bytes; 3523 if (block_rsv->reserved < block_rsv->size) 3524 block_rsv->full = 0; 3525 ret = 0; 3526 } 3527 spin_unlock(&block_rsv->lock); 3528 return ret; 3529 } 3530 3531 static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, 3532 u64 num_bytes, int update_size) 3533 { 3534 spin_lock(&block_rsv->lock); 3535 block_rsv->reserved += num_bytes; 3536 if (update_size) 3537 block_rsv->size += num_bytes; 3538 else if (block_rsv->reserved >= block_rsv->size) 3539 block_rsv->full = 1; 3540 spin_unlock(&block_rsv->lock); 3541 } 3542 3543 static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3544 struct btrfs_block_rsv *dest, u64 num_bytes) 3545 { 3546 struct btrfs_space_info *space_info = block_rsv->space_info; 3547 3548 spin_lock(&block_rsv->lock); 3549 if (num_bytes == (u64)-1) 3550 num_bytes = block_rsv->size; 3551 block_rsv->size -= num_bytes; 3552 if (block_rsv->reserved >= block_rsv->size) { 3553 num_bytes = block_rsv->reserved - block_rsv->size; 3554 block_rsv->reserved = block_rsv->size; 3555 block_rsv->full = 1; 3556 } else { 3557 num_bytes = 0; 3558 } 3559 spin_unlock(&block_rsv->lock); 3560 3561 if (num_bytes > 0) { 3562 if (dest) { 3563 spin_lock(&dest->lock); 3564 if (!dest->full) { 3565 u64 bytes_to_add; 3566 3567 bytes_to_add = dest->size - dest->reserved; 3568 bytes_to_add = min(num_bytes, bytes_to_add); 3569 dest->reserved += bytes_to_add; 3570 if (dest->reserved >= dest->size) 3571 dest->full = 1; 3572 num_bytes -= bytes_to_add; 3573 } 3574 spin_unlock(&dest->lock); 3575 } 3576 if (num_bytes) { 3577 spin_lock(&space_info->lock); 3578 space_info->bytes_reserved -= num_bytes; 3579 space_info->reservation_progress++; 3580 spin_unlock(&space_info->lock); 3581 } 3582 } 3583 } 3584 3585 static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, 3586 struct btrfs_block_rsv *dst, u64 num_bytes) 3587 { 3588 int ret; 3589 3590 ret = block_rsv_use_bytes(src, num_bytes); 3591 if (ret) 3592 return ret; 3593 3594 block_rsv_add_bytes(dst, num_bytes, 1); 3595 return 0; 3596 } 3597 3598 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) 3599 { 3600 memset(rsv, 0, sizeof(*rsv)); 3601 spin_lock_init(&rsv->lock); 3602 atomic_set(&rsv->usage, 1); 3603 rsv->priority = 6; 3604 INIT_LIST_HEAD(&rsv->list); 3605 } 3606 3607 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) 3608 { 3609 struct btrfs_block_rsv *block_rsv; 3610 struct btrfs_fs_info *fs_info = root->fs_info; 3611 3612 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3613 if (!block_rsv) 3614 return NULL; 3615 3616 btrfs_init_block_rsv(block_rsv); 3617 block_rsv->space_info = __find_space_info(fs_info, 3618 BTRFS_BLOCK_GROUP_METADATA); 3619 return block_rsv; 3620 } 3621 3622 void btrfs_free_block_rsv(struct btrfs_root *root, 3623 struct btrfs_block_rsv *rsv) 3624 { 3625 if (rsv && atomic_dec_and_test(&rsv->usage)) { 3626 btrfs_block_rsv_release(root, rsv, (u64)-1); 3627 if (!rsv->durable) 3628 kfree(rsv); 3629 } 3630 } 3631 3632 /* 3633 * make the block_rsv struct be able to capture freed space. 3634 * the captured space will re-add to the the block_rsv struct 3635 * after transaction commit 3636 */ 3637 void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, 3638 struct btrfs_block_rsv *block_rsv) 3639 { 3640 block_rsv->durable = 1; 3641 mutex_lock(&fs_info->durable_block_rsv_mutex); 3642 list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); 3643 mutex_unlock(&fs_info->durable_block_rsv_mutex); 3644 } 3645 3646 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3647 struct btrfs_root *root, 3648 struct btrfs_block_rsv *block_rsv, 3649 u64 num_bytes) 3650 { 3651 int ret; 3652 3653 if (num_bytes == 0) 3654 return 0; 3655 3656 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); 3657 if (!ret) { 3658 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3659 return 0; 3660 } 3661 3662 return ret; 3663 } 3664 3665 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 3666 struct btrfs_root *root, 3667 struct btrfs_block_rsv *block_rsv, 3668 u64 min_reserved, int min_factor) 3669 { 3670 u64 num_bytes = 0; 3671 int commit_trans = 0; 3672 int ret = -ENOSPC; 3673 3674 if (!block_rsv) 3675 return 0; 3676 3677 spin_lock(&block_rsv->lock); 3678 if (min_factor > 0) 3679 num_bytes = div_factor(block_rsv->size, min_factor); 3680 if (min_reserved > num_bytes) 3681 num_bytes = min_reserved; 3682 3683 if (block_rsv->reserved >= num_bytes) { 3684 ret = 0; 3685 } else { 3686 num_bytes -= block_rsv->reserved; 3687 if (block_rsv->durable && 3688 block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) 3689 commit_trans = 1; 3690 } 3691 spin_unlock(&block_rsv->lock); 3692 if (!ret) 3693 return 0; 3694 3695 if (block_rsv->refill_used) { 3696 ret = reserve_metadata_bytes(trans, root, block_rsv, 3697 num_bytes, 0); 3698 if (!ret) { 3699 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3700 return 0; 3701 } 3702 } 3703 3704 if (commit_trans) { 3705 if (trans) 3706 return -EAGAIN; 3707 3708 trans = btrfs_join_transaction(root); 3709 BUG_ON(IS_ERR(trans)); 3710 ret = btrfs_commit_transaction(trans, root); 3711 return 0; 3712 } 3713 3714 return -ENOSPC; 3715 } 3716 3717 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, 3718 struct btrfs_block_rsv *dst_rsv, 3719 u64 num_bytes) 3720 { 3721 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3722 } 3723 3724 void btrfs_block_rsv_release(struct btrfs_root *root, 3725 struct btrfs_block_rsv *block_rsv, 3726 u64 num_bytes) 3727 { 3728 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 3729 if (global_rsv->full || global_rsv == block_rsv || 3730 block_rsv->space_info != global_rsv->space_info) 3731 global_rsv = NULL; 3732 block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); 3733 } 3734 3735 /* 3736 * helper to calculate size of global block reservation. 3737 * the desired value is sum of space used by extent tree, 3738 * checksum tree and root tree 3739 */ 3740 static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) 3741 { 3742 struct btrfs_space_info *sinfo; 3743 u64 num_bytes; 3744 u64 meta_used; 3745 u64 data_used; 3746 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3747 3748 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3749 spin_lock(&sinfo->lock); 3750 data_used = sinfo->bytes_used; 3751 spin_unlock(&sinfo->lock); 3752 3753 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3754 spin_lock(&sinfo->lock); 3755 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) 3756 data_used = 0; 3757 meta_used = sinfo->bytes_used; 3758 spin_unlock(&sinfo->lock); 3759 3760 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * 3761 csum_size * 2; 3762 num_bytes += div64_u64(data_used + meta_used, 50); 3763 3764 if (num_bytes * 3 > meta_used) 3765 num_bytes = div64_u64(meta_used, 3); 3766 3767 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); 3768 } 3769 3770 static void update_global_block_rsv(struct btrfs_fs_info *fs_info) 3771 { 3772 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 3773 struct btrfs_space_info *sinfo = block_rsv->space_info; 3774 u64 num_bytes; 3775 3776 num_bytes = calc_global_metadata_size(fs_info); 3777 3778 spin_lock(&block_rsv->lock); 3779 spin_lock(&sinfo->lock); 3780 3781 block_rsv->size = num_bytes; 3782 3783 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3784 sinfo->bytes_reserved + sinfo->bytes_readonly + 3785 sinfo->bytes_may_use; 3786 3787 if (sinfo->total_bytes > num_bytes) { 3788 num_bytes = sinfo->total_bytes - num_bytes; 3789 block_rsv->reserved += num_bytes; 3790 sinfo->bytes_reserved += num_bytes; 3791 } 3792 3793 if (block_rsv->reserved >= block_rsv->size) { 3794 num_bytes = block_rsv->reserved - block_rsv->size; 3795 sinfo->bytes_reserved -= num_bytes; 3796 sinfo->reservation_progress++; 3797 block_rsv->reserved = block_rsv->size; 3798 block_rsv->full = 1; 3799 } 3800 3801 spin_unlock(&sinfo->lock); 3802 spin_unlock(&block_rsv->lock); 3803 } 3804 3805 static void init_global_block_rsv(struct btrfs_fs_info *fs_info) 3806 { 3807 struct btrfs_space_info *space_info; 3808 3809 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 3810 fs_info->chunk_block_rsv.space_info = space_info; 3811 fs_info->chunk_block_rsv.priority = 10; 3812 3813 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3814 fs_info->global_block_rsv.space_info = space_info; 3815 fs_info->global_block_rsv.priority = 10; 3816 fs_info->global_block_rsv.refill_used = 1; 3817 fs_info->delalloc_block_rsv.space_info = space_info; 3818 fs_info->trans_block_rsv.space_info = space_info; 3819 fs_info->empty_block_rsv.space_info = space_info; 3820 fs_info->empty_block_rsv.priority = 10; 3821 3822 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; 3823 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 3824 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; 3825 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 3826 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 3827 3828 btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); 3829 3830 btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); 3831 3832 update_global_block_rsv(fs_info); 3833 } 3834 3835 static void release_global_block_rsv(struct btrfs_fs_info *fs_info) 3836 { 3837 block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); 3838 WARN_ON(fs_info->delalloc_block_rsv.size > 0); 3839 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); 3840 WARN_ON(fs_info->trans_block_rsv.size > 0); 3841 WARN_ON(fs_info->trans_block_rsv.reserved > 0); 3842 WARN_ON(fs_info->chunk_block_rsv.size > 0); 3843 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3844 } 3845 3846 int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, 3847 struct btrfs_root *root, 3848 struct btrfs_block_rsv *rsv) 3849 { 3850 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; 3851 u64 num_bytes; 3852 int ret; 3853 3854 /* 3855 * Truncate should be freeing data, but give us 2 items just in case it 3856 * needs to use some space. We may want to be smarter about this in the 3857 * future. 3858 */ 3859 num_bytes = btrfs_calc_trans_metadata_size(root, 2); 3860 3861 /* We already have enough bytes, just return */ 3862 if (rsv->reserved >= num_bytes) 3863 return 0; 3864 3865 num_bytes -= rsv->reserved; 3866 3867 /* 3868 * You should have reserved enough space before hand to do this, so this 3869 * should not fail. 3870 */ 3871 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); 3872 BUG_ON(ret); 3873 3874 return 0; 3875 } 3876 3877 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3878 struct btrfs_root *root, 3879 int num_items) 3880 { 3881 u64 num_bytes; 3882 int ret; 3883 3884 if (num_items == 0 || root->fs_info->chunk_root == root) 3885 return 0; 3886 3887 num_bytes = btrfs_calc_trans_metadata_size(root, num_items); 3888 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3889 num_bytes); 3890 if (!ret) { 3891 trans->bytes_reserved += num_bytes; 3892 trans->block_rsv = &root->fs_info->trans_block_rsv; 3893 } 3894 return ret; 3895 } 3896 3897 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3898 struct btrfs_root *root) 3899 { 3900 if (!trans->bytes_reserved) 3901 return; 3902 3903 BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); 3904 btrfs_block_rsv_release(root, trans->block_rsv, 3905 trans->bytes_reserved); 3906 trans->bytes_reserved = 0; 3907 } 3908 3909 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 3910 struct inode *inode) 3911 { 3912 struct btrfs_root *root = BTRFS_I(inode)->root; 3913 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 3914 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3915 3916 /* 3917 * We need to hold space in order to delete our orphan item once we've 3918 * added it, so this takes the reservation so we can release it later 3919 * when we are truly done with the orphan item. 3920 */ 3921 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 3922 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3923 } 3924 3925 void btrfs_orphan_release_metadata(struct inode *inode) 3926 { 3927 struct btrfs_root *root = BTRFS_I(inode)->root; 3928 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 3929 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3930 } 3931 3932 int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, 3933 struct btrfs_pending_snapshot *pending) 3934 { 3935 struct btrfs_root *root = pending->root; 3936 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 3937 struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; 3938 /* 3939 * two for root back/forward refs, two for directory entries 3940 * and one for root of the snapshot. 3941 */ 3942 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); 3943 dst_rsv->space_info = src_rsv->space_info; 3944 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3945 } 3946 3947 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 3948 { 3949 return num_bytes >>= 3; 3950 } 3951 3952 int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) 3953 { 3954 struct btrfs_root *root = BTRFS_I(inode)->root; 3955 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3956 u64 to_reserve; 3957 int nr_extents; 3958 int reserved_extents; 3959 int ret; 3960 3961 if (btrfs_transaction_in_commit(root->fs_info)) 3962 schedule_timeout(1); 3963 3964 num_bytes = ALIGN(num_bytes, root->sectorsize); 3965 3966 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3967 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 3968 3969 if (nr_extents > reserved_extents) { 3970 nr_extents -= reserved_extents; 3971 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 3972 } else { 3973 nr_extents = 0; 3974 to_reserve = 0; 3975 } 3976 3977 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3978 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 3979 if (ret) 3980 return ret; 3981 3982 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); 3983 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3984 3985 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3986 3987 if (block_rsv->size > 512 * 1024 * 1024) 3988 shrink_delalloc(NULL, root, to_reserve, 0); 3989 3990 return 0; 3991 } 3992 3993 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 3994 { 3995 struct btrfs_root *root = BTRFS_I(inode)->root; 3996 u64 to_free; 3997 int nr_extents; 3998 int reserved_extents; 3999 4000 num_bytes = ALIGN(num_bytes, root->sectorsize); 4001 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4002 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); 4003 4004 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 4005 do { 4006 int old, new; 4007 4008 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4009 if (nr_extents >= reserved_extents) { 4010 nr_extents = 0; 4011 break; 4012 } 4013 old = reserved_extents; 4014 nr_extents = reserved_extents - nr_extents; 4015 new = reserved_extents - nr_extents; 4016 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, 4017 reserved_extents, new); 4018 if (likely(old == reserved_extents)) 4019 break; 4020 reserved_extents = old; 4021 } while (1); 4022 4023 to_free = calc_csum_metadata_size(inode, num_bytes); 4024 if (nr_extents > 0) 4025 to_free += btrfs_calc_trans_metadata_size(root, nr_extents); 4026 4027 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4028 to_free); 4029 } 4030 4031 int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) 4032 { 4033 int ret; 4034 4035 ret = btrfs_check_data_free_space(inode, num_bytes); 4036 if (ret) 4037 return ret; 4038 4039 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); 4040 if (ret) { 4041 btrfs_free_reserved_data_space(inode, num_bytes); 4042 return ret; 4043 } 4044 4045 return 0; 4046 } 4047 4048 void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) 4049 { 4050 btrfs_delalloc_release_metadata(inode, num_bytes); 4051 btrfs_free_reserved_data_space(inode, num_bytes); 4052 } 4053 4054 static int update_block_group(struct btrfs_trans_handle *trans, 4055 struct btrfs_root *root, 4056 u64 bytenr, u64 num_bytes, int alloc) 4057 { 4058 struct btrfs_block_group_cache *cache = NULL; 4059 struct btrfs_fs_info *info = root->fs_info; 4060 u64 total = num_bytes; 4061 u64 old_val; 4062 u64 byte_in_group; 4063 int factor; 4064 4065 /* block accounting for super block */ 4066 spin_lock(&info->delalloc_lock); 4067 old_val = btrfs_super_bytes_used(&info->super_copy); 4068 if (alloc) 4069 old_val += num_bytes; 4070 else 4071 old_val -= num_bytes; 4072 btrfs_set_super_bytes_used(&info->super_copy, old_val); 4073 spin_unlock(&info->delalloc_lock); 4074 4075 while (total) { 4076 cache = btrfs_lookup_block_group(info, bytenr); 4077 if (!cache) 4078 return -1; 4079 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | 4080 BTRFS_BLOCK_GROUP_RAID1 | 4081 BTRFS_BLOCK_GROUP_RAID10)) 4082 factor = 2; 4083 else 4084 factor = 1; 4085 /* 4086 * If this block group has free space cache written out, we 4087 * need to make sure to load it if we are removing space. This 4088 * is because we need the unpinning stage to actually add the 4089 * space back to the block group, otherwise we will leak space. 4090 */ 4091 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4092 cache_block_group(cache, trans, NULL, 1); 4093 4094 byte_in_group = bytenr - cache->key.objectid; 4095 WARN_ON(byte_in_group > cache->key.offset); 4096 4097 spin_lock(&cache->space_info->lock); 4098 spin_lock(&cache->lock); 4099 4100 if (btrfs_super_cache_generation(&info->super_copy) != 0 && 4101 cache->disk_cache_state < BTRFS_DC_CLEAR) 4102 cache->disk_cache_state = BTRFS_DC_CLEAR; 4103 4104 cache->dirty = 1; 4105 old_val = btrfs_block_group_used(&cache->item); 4106 num_bytes = min(total, cache->key.offset - byte_in_group); 4107 if (alloc) { 4108 old_val += num_bytes; 4109 btrfs_set_block_group_used(&cache->item, old_val); 4110 cache->reserved -= num_bytes; 4111 cache->space_info->bytes_reserved -= num_bytes; 4112 cache->space_info->reservation_progress++; 4113 cache->space_info->bytes_used += num_bytes; 4114 cache->space_info->disk_used += num_bytes * factor; 4115 spin_unlock(&cache->lock); 4116 spin_unlock(&cache->space_info->lock); 4117 } else { 4118 old_val -= num_bytes; 4119 btrfs_set_block_group_used(&cache->item, old_val); 4120 cache->pinned += num_bytes; 4121 cache->space_info->bytes_pinned += num_bytes; 4122 cache->space_info->bytes_used -= num_bytes; 4123 cache->space_info->disk_used -= num_bytes * factor; 4124 spin_unlock(&cache->lock); 4125 spin_unlock(&cache->space_info->lock); 4126 4127 set_extent_dirty(info->pinned_extents, 4128 bytenr, bytenr + num_bytes - 1, 4129 GFP_NOFS | __GFP_NOFAIL); 4130 } 4131 btrfs_put_block_group(cache); 4132 total -= num_bytes; 4133 bytenr += num_bytes; 4134 } 4135 return 0; 4136 } 4137 4138 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 4139 { 4140 struct btrfs_block_group_cache *cache; 4141 u64 bytenr; 4142 4143 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 4144 if (!cache) 4145 return 0; 4146 4147 bytenr = cache->key.objectid; 4148 btrfs_put_block_group(cache); 4149 4150 return bytenr; 4151 } 4152 4153 static int pin_down_extent(struct btrfs_root *root, 4154 struct btrfs_block_group_cache *cache, 4155 u64 bytenr, u64 num_bytes, int reserved) 4156 { 4157 spin_lock(&cache->space_info->lock); 4158 spin_lock(&cache->lock); 4159 cache->pinned += num_bytes; 4160 cache->space_info->bytes_pinned += num_bytes; 4161 if (reserved) { 4162 cache->reserved -= num_bytes; 4163 cache->space_info->bytes_reserved -= num_bytes; 4164 cache->space_info->reservation_progress++; 4165 } 4166 spin_unlock(&cache->lock); 4167 spin_unlock(&cache->space_info->lock); 4168 4169 set_extent_dirty(root->fs_info->pinned_extents, bytenr, 4170 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 4171 return 0; 4172 } 4173 4174 /* 4175 * this function must be called within transaction 4176 */ 4177 int btrfs_pin_extent(struct btrfs_root *root, 4178 u64 bytenr, u64 num_bytes, int reserved) 4179 { 4180 struct btrfs_block_group_cache *cache; 4181 4182 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 4183 BUG_ON(!cache); 4184 4185 pin_down_extent(root, cache, bytenr, num_bytes, reserved); 4186 4187 btrfs_put_block_group(cache); 4188 return 0; 4189 } 4190 4191 /* 4192 * update size of reserved extents. this function may return -EAGAIN 4193 * if 'reserve' is true or 'sinfo' is false. 4194 */ 4195 int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 4196 u64 num_bytes, int reserve, int sinfo) 4197 { 4198 int ret = 0; 4199 if (sinfo) { 4200 struct btrfs_space_info *space_info = cache->space_info; 4201 spin_lock(&space_info->lock); 4202 spin_lock(&cache->lock); 4203 if (reserve) { 4204 if (cache->ro) { 4205 ret = -EAGAIN; 4206 } else { 4207 cache->reserved += num_bytes; 4208 space_info->bytes_reserved += num_bytes; 4209 } 4210 } else { 4211 if (cache->ro) 4212 space_info->bytes_readonly += num_bytes; 4213 cache->reserved -= num_bytes; 4214 space_info->bytes_reserved -= num_bytes; 4215 space_info->reservation_progress++; 4216 } 4217 spin_unlock(&cache->lock); 4218 spin_unlock(&space_info->lock); 4219 } else { 4220 spin_lock(&cache->lock); 4221 if (cache->ro) { 4222 ret = -EAGAIN; 4223 } else { 4224 if (reserve) 4225 cache->reserved += num_bytes; 4226 else 4227 cache->reserved -= num_bytes; 4228 } 4229 spin_unlock(&cache->lock); 4230 } 4231 return ret; 4232 } 4233 4234 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 4235 struct btrfs_root *root) 4236 { 4237 struct btrfs_fs_info *fs_info = root->fs_info; 4238 struct btrfs_caching_control *next; 4239 struct btrfs_caching_control *caching_ctl; 4240 struct btrfs_block_group_cache *cache; 4241 4242 down_write(&fs_info->extent_commit_sem); 4243 4244 list_for_each_entry_safe(caching_ctl, next, 4245 &fs_info->caching_block_groups, list) { 4246 cache = caching_ctl->block_group; 4247 if (block_group_cache_done(cache)) { 4248 cache->last_byte_to_unpin = (u64)-1; 4249 list_del_init(&caching_ctl->list); 4250 put_caching_control(caching_ctl); 4251 } else { 4252 cache->last_byte_to_unpin = caching_ctl->progress; 4253 } 4254 } 4255 4256 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4257 fs_info->pinned_extents = &fs_info->freed_extents[1]; 4258 else 4259 fs_info->pinned_extents = &fs_info->freed_extents[0]; 4260 4261 up_write(&fs_info->extent_commit_sem); 4262 4263 update_global_block_rsv(fs_info); 4264 return 0; 4265 } 4266 4267 static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 4268 { 4269 struct btrfs_fs_info *fs_info = root->fs_info; 4270 struct btrfs_block_group_cache *cache = NULL; 4271 u64 len; 4272 4273 while (start <= end) { 4274 if (!cache || 4275 start >= cache->key.objectid + cache->key.offset) { 4276 if (cache) 4277 btrfs_put_block_group(cache); 4278 cache = btrfs_lookup_block_group(fs_info, start); 4279 BUG_ON(!cache); 4280 } 4281 4282 len = cache->key.objectid + cache->key.offset - start; 4283 len = min(len, end + 1 - start); 4284 4285 if (start < cache->last_byte_to_unpin) { 4286 len = min(len, cache->last_byte_to_unpin - start); 4287 btrfs_add_free_space(cache, start, len); 4288 } 4289 4290 start += len; 4291 4292 spin_lock(&cache->space_info->lock); 4293 spin_lock(&cache->lock); 4294 cache->pinned -= len; 4295 cache->space_info->bytes_pinned -= len; 4296 if (cache->ro) { 4297 cache->space_info->bytes_readonly += len; 4298 } else if (cache->reserved_pinned > 0) { 4299 len = min(len, cache->reserved_pinned); 4300 cache->reserved_pinned -= len; 4301 cache->space_info->bytes_reserved += len; 4302 } 4303 spin_unlock(&cache->lock); 4304 spin_unlock(&cache->space_info->lock); 4305 } 4306 4307 if (cache) 4308 btrfs_put_block_group(cache); 4309 return 0; 4310 } 4311 4312 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 4313 struct btrfs_root *root) 4314 { 4315 struct btrfs_fs_info *fs_info = root->fs_info; 4316 struct extent_io_tree *unpin; 4317 struct btrfs_block_rsv *block_rsv; 4318 struct btrfs_block_rsv *next_rsv; 4319 u64 start; 4320 u64 end; 4321 int idx; 4322 int ret; 4323 4324 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4325 unpin = &fs_info->freed_extents[1]; 4326 else 4327 unpin = &fs_info->freed_extents[0]; 4328 4329 while (1) { 4330 ret = find_first_extent_bit(unpin, 0, &start, &end, 4331 EXTENT_DIRTY); 4332 if (ret) 4333 break; 4334 4335 if (btrfs_test_opt(root, DISCARD)) 4336 ret = btrfs_discard_extent(root, start, 4337 end + 1 - start, NULL); 4338 4339 clear_extent_dirty(unpin, start, end, GFP_NOFS); 4340 unpin_extent_range(root, start, end); 4341 cond_resched(); 4342 } 4343 4344 mutex_lock(&fs_info->durable_block_rsv_mutex); 4345 list_for_each_entry_safe(block_rsv, next_rsv, 4346 &fs_info->durable_block_rsv_list, list) { 4347 4348 idx = trans->transid & 0x1; 4349 if (block_rsv->freed[idx] > 0) { 4350 block_rsv_add_bytes(block_rsv, 4351 block_rsv->freed[idx], 0); 4352 block_rsv->freed[idx] = 0; 4353 } 4354 if (atomic_read(&block_rsv->usage) == 0) { 4355 btrfs_block_rsv_release(root, block_rsv, (u64)-1); 4356 4357 if (block_rsv->freed[0] == 0 && 4358 block_rsv->freed[1] == 0) { 4359 list_del_init(&block_rsv->list); 4360 kfree(block_rsv); 4361 } 4362 } else { 4363 btrfs_block_rsv_release(root, block_rsv, 0); 4364 } 4365 } 4366 mutex_unlock(&fs_info->durable_block_rsv_mutex); 4367 4368 return 0; 4369 } 4370 4371 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 4372 struct btrfs_root *root, 4373 u64 bytenr, u64 num_bytes, u64 parent, 4374 u64 root_objectid, u64 owner_objectid, 4375 u64 owner_offset, int refs_to_drop, 4376 struct btrfs_delayed_extent_op *extent_op) 4377 { 4378 struct btrfs_key key; 4379 struct btrfs_path *path; 4380 struct btrfs_fs_info *info = root->fs_info; 4381 struct btrfs_root *extent_root = info->extent_root; 4382 struct extent_buffer *leaf; 4383 struct btrfs_extent_item *ei; 4384 struct btrfs_extent_inline_ref *iref; 4385 int ret; 4386 int is_data; 4387 int extent_slot = 0; 4388 int found_extent = 0; 4389 int num_to_del = 1; 4390 u32 item_size; 4391 u64 refs; 4392 4393 path = btrfs_alloc_path(); 4394 if (!path) 4395 return -ENOMEM; 4396 4397 path->reada = 1; 4398 path->leave_spinning = 1; 4399 4400 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 4401 BUG_ON(!is_data && refs_to_drop != 1); 4402 4403 ret = lookup_extent_backref(trans, extent_root, path, &iref, 4404 bytenr, num_bytes, parent, 4405 root_objectid, owner_objectid, 4406 owner_offset); 4407 if (ret == 0) { 4408 extent_slot = path->slots[0]; 4409 while (extent_slot >= 0) { 4410 btrfs_item_key_to_cpu(path->nodes[0], &key, 4411 extent_slot); 4412 if (key.objectid != bytenr) 4413 break; 4414 if (key.type == BTRFS_EXTENT_ITEM_KEY && 4415 key.offset == num_bytes) { 4416 found_extent = 1; 4417 break; 4418 } 4419 if (path->slots[0] - extent_slot > 5) 4420 break; 4421 extent_slot--; 4422 } 4423 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4424 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 4425 if (found_extent && item_size < sizeof(*ei)) 4426 found_extent = 0; 4427 #endif 4428 if (!found_extent) { 4429 BUG_ON(iref); 4430 ret = remove_extent_backref(trans, extent_root, path, 4431 NULL, refs_to_drop, 4432 is_data); 4433 BUG_ON(ret); 4434 btrfs_release_path(path); 4435 path->leave_spinning = 1; 4436 4437 key.objectid = bytenr; 4438 key.type = BTRFS_EXTENT_ITEM_KEY; 4439 key.offset = num_bytes; 4440 4441 ret = btrfs_search_slot(trans, extent_root, 4442 &key, path, -1, 1); 4443 if (ret) { 4444 printk(KERN_ERR "umm, got %d back from search" 4445 ", was looking for %llu\n", ret, 4446 (unsigned long long)bytenr); 4447 btrfs_print_leaf(extent_root, path->nodes[0]); 4448 } 4449 BUG_ON(ret); 4450 extent_slot = path->slots[0]; 4451 } 4452 } else { 4453 btrfs_print_leaf(extent_root, path->nodes[0]); 4454 WARN_ON(1); 4455 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 4456 "parent %llu root %llu owner %llu offset %llu\n", 4457 (unsigned long long)bytenr, 4458 (unsigned long long)parent, 4459 (unsigned long long)root_objectid, 4460 (unsigned long long)owner_objectid, 4461 (unsigned long long)owner_offset); 4462 } 4463 4464 leaf = path->nodes[0]; 4465 item_size = btrfs_item_size_nr(leaf, extent_slot); 4466 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4467 if (item_size < sizeof(*ei)) { 4468 BUG_ON(found_extent || extent_slot != path->slots[0]); 4469 ret = convert_extent_item_v0(trans, extent_root, path, 4470 owner_objectid, 0); 4471 BUG_ON(ret < 0); 4472 4473 btrfs_release_path(path); 4474 path->leave_spinning = 1; 4475 4476 key.objectid = bytenr; 4477 key.type = BTRFS_EXTENT_ITEM_KEY; 4478 key.offset = num_bytes; 4479 4480 ret = btrfs_search_slot(trans, extent_root, &key, path, 4481 -1, 1); 4482 if (ret) { 4483 printk(KERN_ERR "umm, got %d back from search" 4484 ", was looking for %llu\n", ret, 4485 (unsigned long long)bytenr); 4486 btrfs_print_leaf(extent_root, path->nodes[0]); 4487 } 4488 BUG_ON(ret); 4489 extent_slot = path->slots[0]; 4490 leaf = path->nodes[0]; 4491 item_size = btrfs_item_size_nr(leaf, extent_slot); 4492 } 4493 #endif 4494 BUG_ON(item_size < sizeof(*ei)); 4495 ei = btrfs_item_ptr(leaf, extent_slot, 4496 struct btrfs_extent_item); 4497 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 4498 struct btrfs_tree_block_info *bi; 4499 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 4500 bi = (struct btrfs_tree_block_info *)(ei + 1); 4501 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 4502 } 4503 4504 refs = btrfs_extent_refs(leaf, ei); 4505 BUG_ON(refs < refs_to_drop); 4506 refs -= refs_to_drop; 4507 4508 if (refs > 0) { 4509 if (extent_op) 4510 __run_delayed_extent_op(extent_op, leaf, ei); 4511 /* 4512 * In the case of inline back ref, reference count will 4513 * be updated by remove_extent_backref 4514 */ 4515 if (iref) { 4516 BUG_ON(!found_extent); 4517 } else { 4518 btrfs_set_extent_refs(leaf, ei, refs); 4519 btrfs_mark_buffer_dirty(leaf); 4520 } 4521 if (found_extent) { 4522 ret = remove_extent_backref(trans, extent_root, path, 4523 iref, refs_to_drop, 4524 is_data); 4525 BUG_ON(ret); 4526 } 4527 } else { 4528 if (found_extent) { 4529 BUG_ON(is_data && refs_to_drop != 4530 extent_data_ref_count(root, path, iref)); 4531 if (iref) { 4532 BUG_ON(path->slots[0] != extent_slot); 4533 } else { 4534 BUG_ON(path->slots[0] != extent_slot + 1); 4535 path->slots[0] = extent_slot; 4536 num_to_del = 2; 4537 } 4538 } 4539 4540 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4541 num_to_del); 4542 BUG_ON(ret); 4543 btrfs_release_path(path); 4544 4545 if (is_data) { 4546 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4547 BUG_ON(ret); 4548 } else { 4549 invalidate_mapping_pages(info->btree_inode->i_mapping, 4550 bytenr >> PAGE_CACHE_SHIFT, 4551 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 4552 } 4553 4554 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 4555 BUG_ON(ret); 4556 } 4557 btrfs_free_path(path); 4558 return ret; 4559 } 4560 4561 /* 4562 * when we free an block, it is possible (and likely) that we free the last 4563 * delayed ref for that extent as well. This searches the delayed ref tree for 4564 * a given extent, and if there are no other delayed refs to be processed, it 4565 * removes it from the tree. 4566 */ 4567 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 4568 struct btrfs_root *root, u64 bytenr) 4569 { 4570 struct btrfs_delayed_ref_head *head; 4571 struct btrfs_delayed_ref_root *delayed_refs; 4572 struct btrfs_delayed_ref_node *ref; 4573 struct rb_node *node; 4574 int ret = 0; 4575 4576 delayed_refs = &trans->transaction->delayed_refs; 4577 spin_lock(&delayed_refs->lock); 4578 head = btrfs_find_delayed_ref_head(trans, bytenr); 4579 if (!head) 4580 goto out; 4581 4582 node = rb_prev(&head->node.rb_node); 4583 if (!node) 4584 goto out; 4585 4586 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 4587 4588 /* there are still entries for this ref, we can't drop it */ 4589 if (ref->bytenr == bytenr) 4590 goto out; 4591 4592 if (head->extent_op) { 4593 if (!head->must_insert_reserved) 4594 goto out; 4595 kfree(head->extent_op); 4596 head->extent_op = NULL; 4597 } 4598 4599 /* 4600 * waiting for the lock here would deadlock. If someone else has it 4601 * locked they are already in the process of dropping it anyway 4602 */ 4603 if (!mutex_trylock(&head->mutex)) 4604 goto out; 4605 4606 /* 4607 * at this point we have a head with no other entries. Go 4608 * ahead and process it. 4609 */ 4610 head->node.in_tree = 0; 4611 rb_erase(&head->node.rb_node, &delayed_refs->root); 4612 4613 delayed_refs->num_entries--; 4614 4615 /* 4616 * we don't take a ref on the node because we're removing it from the 4617 * tree, so we just steal the ref the tree was holding. 4618 */ 4619 delayed_refs->num_heads--; 4620 if (list_empty(&head->cluster)) 4621 delayed_refs->num_heads_ready--; 4622 4623 list_del_init(&head->cluster); 4624 spin_unlock(&delayed_refs->lock); 4625 4626 BUG_ON(head->extent_op); 4627 if (head->must_insert_reserved) 4628 ret = 1; 4629 4630 mutex_unlock(&head->mutex); 4631 btrfs_put_delayed_ref(&head->node); 4632 return ret; 4633 out: 4634 spin_unlock(&delayed_refs->lock); 4635 return 0; 4636 } 4637 4638 void btrfs_free_tree_block(struct btrfs_trans_handle *trans, 4639 struct btrfs_root *root, 4640 struct extent_buffer *buf, 4641 u64 parent, int last_ref) 4642 { 4643 struct btrfs_block_rsv *block_rsv; 4644 struct btrfs_block_group_cache *cache = NULL; 4645 int ret; 4646 4647 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4648 ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, 4649 parent, root->root_key.objectid, 4650 btrfs_header_level(buf), 4651 BTRFS_DROP_DELAYED_REF, NULL); 4652 BUG_ON(ret); 4653 } 4654 4655 if (!last_ref) 4656 return; 4657 4658 block_rsv = get_block_rsv(trans, root); 4659 cache = btrfs_lookup_block_group(root->fs_info, buf->start); 4660 if (block_rsv->space_info != cache->space_info) 4661 goto out; 4662 4663 if (btrfs_header_generation(buf) == trans->transid) { 4664 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4665 ret = check_ref_cleanup(trans, root, buf->start); 4666 if (!ret) 4667 goto pin; 4668 } 4669 4670 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 4671 pin_down_extent(root, cache, buf->start, buf->len, 1); 4672 goto pin; 4673 } 4674 4675 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4676 4677 btrfs_add_free_space(cache, buf->start, buf->len); 4678 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); 4679 if (ret == -EAGAIN) { 4680 /* block group became read-only */ 4681 btrfs_update_reserved_bytes(cache, buf->len, 0, 1); 4682 goto out; 4683 } 4684 4685 ret = 1; 4686 spin_lock(&block_rsv->lock); 4687 if (block_rsv->reserved < block_rsv->size) { 4688 block_rsv->reserved += buf->len; 4689 ret = 0; 4690 } 4691 spin_unlock(&block_rsv->lock); 4692 4693 if (ret) { 4694 spin_lock(&cache->space_info->lock); 4695 cache->space_info->bytes_reserved -= buf->len; 4696 cache->space_info->reservation_progress++; 4697 spin_unlock(&cache->space_info->lock); 4698 } 4699 goto out; 4700 } 4701 pin: 4702 if (block_rsv->durable && !cache->ro) { 4703 ret = 0; 4704 spin_lock(&cache->lock); 4705 if (!cache->ro) { 4706 cache->reserved_pinned += buf->len; 4707 ret = 1; 4708 } 4709 spin_unlock(&cache->lock); 4710 4711 if (ret) { 4712 spin_lock(&block_rsv->lock); 4713 block_rsv->freed[trans->transid & 0x1] += buf->len; 4714 spin_unlock(&block_rsv->lock); 4715 } 4716 } 4717 out: 4718 /* 4719 * Deleting the buffer, clear the corrupt flag since it doesn't matter 4720 * anymore. 4721 */ 4722 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); 4723 btrfs_put_block_group(cache); 4724 } 4725 4726 int btrfs_free_extent(struct btrfs_trans_handle *trans, 4727 struct btrfs_root *root, 4728 u64 bytenr, u64 num_bytes, u64 parent, 4729 u64 root_objectid, u64 owner, u64 offset) 4730 { 4731 int ret; 4732 4733 /* 4734 * tree log blocks never actually go into the extent allocation 4735 * tree, just update pinning info and exit early. 4736 */ 4737 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 4738 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 4739 /* unlocks the pinned mutex */ 4740 btrfs_pin_extent(root, bytenr, num_bytes, 1); 4741 ret = 0; 4742 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 4743 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 4744 parent, root_objectid, (int)owner, 4745 BTRFS_DROP_DELAYED_REF, NULL); 4746 BUG_ON(ret); 4747 } else { 4748 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 4749 parent, root_objectid, owner, 4750 offset, BTRFS_DROP_DELAYED_REF, NULL); 4751 BUG_ON(ret); 4752 } 4753 return ret; 4754 } 4755 4756 static u64 stripe_align(struct btrfs_root *root, u64 val) 4757 { 4758 u64 mask = ((u64)root->stripesize - 1); 4759 u64 ret = (val + mask) & ~mask; 4760 return ret; 4761 } 4762 4763 /* 4764 * when we wait for progress in the block group caching, its because 4765 * our allocation attempt failed at least once. So, we must sleep 4766 * and let some progress happen before we try again. 4767 * 4768 * This function will sleep at least once waiting for new free space to 4769 * show up, and then it will check the block group free space numbers 4770 * for our min num_bytes. Another option is to have it go ahead 4771 * and look in the rbtree for a free extent of a given size, but this 4772 * is a good start. 4773 */ 4774 static noinline int 4775 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 4776 u64 num_bytes) 4777 { 4778 struct btrfs_caching_control *caching_ctl; 4779 DEFINE_WAIT(wait); 4780 4781 caching_ctl = get_caching_control(cache); 4782 if (!caching_ctl) 4783 return 0; 4784 4785 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4786 (cache->free_space_ctl->free_space >= num_bytes)); 4787 4788 put_caching_control(caching_ctl); 4789 return 0; 4790 } 4791 4792 static noinline int 4793 wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 4794 { 4795 struct btrfs_caching_control *caching_ctl; 4796 DEFINE_WAIT(wait); 4797 4798 caching_ctl = get_caching_control(cache); 4799 if (!caching_ctl) 4800 return 0; 4801 4802 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 4803 4804 put_caching_control(caching_ctl); 4805 return 0; 4806 } 4807 4808 static int get_block_group_index(struct btrfs_block_group_cache *cache) 4809 { 4810 int index; 4811 if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) 4812 index = 0; 4813 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) 4814 index = 1; 4815 else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) 4816 index = 2; 4817 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) 4818 index = 3; 4819 else 4820 index = 4; 4821 return index; 4822 } 4823 4824 enum btrfs_loop_type { 4825 LOOP_FIND_IDEAL = 0, 4826 LOOP_CACHING_NOWAIT = 1, 4827 LOOP_CACHING_WAIT = 2, 4828 LOOP_ALLOC_CHUNK = 3, 4829 LOOP_NO_EMPTY_SIZE = 4, 4830 }; 4831 4832 /* 4833 * walks the btree of allocated extents and find a hole of a given size. 4834 * The key ins is changed to record the hole: 4835 * ins->objectid == block start 4836 * ins->flags = BTRFS_EXTENT_ITEM_KEY 4837 * ins->offset == number of blocks 4838 * Any available blocks before search_start are skipped. 4839 */ 4840 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 4841 struct btrfs_root *orig_root, 4842 u64 num_bytes, u64 empty_size, 4843 u64 search_start, u64 search_end, 4844 u64 hint_byte, struct btrfs_key *ins, 4845 u64 data) 4846 { 4847 int ret = 0; 4848 struct btrfs_root *root = orig_root->fs_info->extent_root; 4849 struct btrfs_free_cluster *last_ptr = NULL; 4850 struct btrfs_block_group_cache *block_group = NULL; 4851 int empty_cluster = 2 * 1024 * 1024; 4852 int allowed_chunk_alloc = 0; 4853 int done_chunk_alloc = 0; 4854 struct btrfs_space_info *space_info; 4855 int last_ptr_loop = 0; 4856 int loop = 0; 4857 int index = 0; 4858 bool found_uncached_bg = false; 4859 bool failed_cluster_refill = false; 4860 bool failed_alloc = false; 4861 bool use_cluster = true; 4862 u64 ideal_cache_percent = 0; 4863 u64 ideal_cache_offset = 0; 4864 4865 WARN_ON(num_bytes < root->sectorsize); 4866 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4867 ins->objectid = 0; 4868 ins->offset = 0; 4869 4870 space_info = __find_space_info(root->fs_info, data); 4871 if (!space_info) { 4872 printk(KERN_ERR "No space info for %llu\n", data); 4873 return -ENOSPC; 4874 } 4875 4876 /* 4877 * If the space info is for both data and metadata it means we have a 4878 * small filesystem and we can't use the clustering stuff. 4879 */ 4880 if (btrfs_mixed_space_info(space_info)) 4881 use_cluster = false; 4882 4883 if (orig_root->ref_cows || empty_size) 4884 allowed_chunk_alloc = 1; 4885 4886 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { 4887 last_ptr = &root->fs_info->meta_alloc_cluster; 4888 if (!btrfs_test_opt(root, SSD)) 4889 empty_cluster = 64 * 1024; 4890 } 4891 4892 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && 4893 btrfs_test_opt(root, SSD)) { 4894 last_ptr = &root->fs_info->data_alloc_cluster; 4895 } 4896 4897 if (last_ptr) { 4898 spin_lock(&last_ptr->lock); 4899 if (last_ptr->block_group) 4900 hint_byte = last_ptr->window_start; 4901 spin_unlock(&last_ptr->lock); 4902 } 4903 4904 search_start = max(search_start, first_logical_byte(root, 0)); 4905 search_start = max(search_start, hint_byte); 4906 4907 if (!last_ptr) 4908 empty_cluster = 0; 4909 4910 if (search_start == hint_byte) { 4911 ideal_cache: 4912 block_group = btrfs_lookup_block_group(root->fs_info, 4913 search_start); 4914 /* 4915 * we don't want to use the block group if it doesn't match our 4916 * allocation bits, or if its not cached. 4917 * 4918 * However if we are re-searching with an ideal block group 4919 * picked out then we don't care that the block group is cached. 4920 */ 4921 if (block_group && block_group_bits(block_group, data) && 4922 (block_group->cached != BTRFS_CACHE_NO || 4923 search_start == ideal_cache_offset)) { 4924 down_read(&space_info->groups_sem); 4925 if (list_empty(&block_group->list) || 4926 block_group->ro) { 4927 /* 4928 * someone is removing this block group, 4929 * we can't jump into the have_block_group 4930 * target because our list pointers are not 4931 * valid 4932 */ 4933 btrfs_put_block_group(block_group); 4934 up_read(&space_info->groups_sem); 4935 } else { 4936 index = get_block_group_index(block_group); 4937 goto have_block_group; 4938 } 4939 } else if (block_group) { 4940 btrfs_put_block_group(block_group); 4941 } 4942 } 4943 search: 4944 down_read(&space_info->groups_sem); 4945 list_for_each_entry(block_group, &space_info->block_groups[index], 4946 list) { 4947 u64 offset; 4948 int cached; 4949 4950 btrfs_get_block_group(block_group); 4951 search_start = block_group->key.objectid; 4952 4953 /* 4954 * this can happen if we end up cycling through all the 4955 * raid types, but we want to make sure we only allocate 4956 * for the proper type. 4957 */ 4958 if (!block_group_bits(block_group, data)) { 4959 u64 extra = BTRFS_BLOCK_GROUP_DUP | 4960 BTRFS_BLOCK_GROUP_RAID1 | 4961 BTRFS_BLOCK_GROUP_RAID10; 4962 4963 /* 4964 * if they asked for extra copies and this block group 4965 * doesn't provide them, bail. This does allow us to 4966 * fill raid0 from raid1. 4967 */ 4968 if ((data & extra) && !(block_group->flags & extra)) 4969 goto loop; 4970 } 4971 4972 have_block_group: 4973 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4974 u64 free_percent; 4975 4976 ret = cache_block_group(block_group, trans, 4977 orig_root, 1); 4978 if (block_group->cached == BTRFS_CACHE_FINISHED) 4979 goto have_block_group; 4980 4981 free_percent = btrfs_block_group_used(&block_group->item); 4982 free_percent *= 100; 4983 free_percent = div64_u64(free_percent, 4984 block_group->key.offset); 4985 free_percent = 100 - free_percent; 4986 if (free_percent > ideal_cache_percent && 4987 likely(!block_group->ro)) { 4988 ideal_cache_offset = block_group->key.objectid; 4989 ideal_cache_percent = free_percent; 4990 } 4991 4992 /* 4993 * We only want to start kthread caching if we are at 4994 * the point where we will wait for caching to make 4995 * progress, or if our ideal search is over and we've 4996 * found somebody to start caching. 4997 */ 4998 if (loop > LOOP_CACHING_NOWAIT || 4999 (loop > LOOP_FIND_IDEAL && 5000 atomic_read(&space_info->caching_threads) < 2)) { 5001 ret = cache_block_group(block_group, trans, 5002 orig_root, 0); 5003 BUG_ON(ret); 5004 } 5005 found_uncached_bg = true; 5006 5007 /* 5008 * If loop is set for cached only, try the next block 5009 * group. 5010 */ 5011 if (loop == LOOP_FIND_IDEAL) 5012 goto loop; 5013 } 5014 5015 cached = block_group_cache_done(block_group); 5016 if (unlikely(!cached)) 5017 found_uncached_bg = true; 5018 5019 if (unlikely(block_group->ro)) 5020 goto loop; 5021 5022 spin_lock(&block_group->free_space_ctl->tree_lock); 5023 if (cached && 5024 block_group->free_space_ctl->free_space < 5025 num_bytes + empty_size) { 5026 spin_unlock(&block_group->free_space_ctl->tree_lock); 5027 goto loop; 5028 } 5029 spin_unlock(&block_group->free_space_ctl->tree_lock); 5030 5031 /* 5032 * Ok we want to try and use the cluster allocator, so lets look 5033 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5034 * have tried the cluster allocator plenty of times at this 5035 * point and not have found anything, so we are likely way too 5036 * fragmented for the clustering stuff to find anything, so lets 5037 * just skip it and let the allocator find whatever block it can 5038 * find 5039 */ 5040 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 5041 /* 5042 * the refill lock keeps out other 5043 * people trying to start a new cluster 5044 */ 5045 spin_lock(&last_ptr->refill_lock); 5046 if (last_ptr->block_group && 5047 (last_ptr->block_group->ro || 5048 !block_group_bits(last_ptr->block_group, data))) { 5049 offset = 0; 5050 goto refill_cluster; 5051 } 5052 5053 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 5054 num_bytes, search_start); 5055 if (offset) { 5056 /* we have a block, we're done */ 5057 spin_unlock(&last_ptr->refill_lock); 5058 goto checks; 5059 } 5060 5061 spin_lock(&last_ptr->lock); 5062 /* 5063 * whoops, this cluster doesn't actually point to 5064 * this block group. Get a ref on the block 5065 * group is does point to and try again 5066 */ 5067 if (!last_ptr_loop && last_ptr->block_group && 5068 last_ptr->block_group != block_group) { 5069 5070 btrfs_put_block_group(block_group); 5071 block_group = last_ptr->block_group; 5072 btrfs_get_block_group(block_group); 5073 spin_unlock(&last_ptr->lock); 5074 spin_unlock(&last_ptr->refill_lock); 5075 5076 last_ptr_loop = 1; 5077 search_start = block_group->key.objectid; 5078 /* 5079 * we know this block group is properly 5080 * in the list because 5081 * btrfs_remove_block_group, drops the 5082 * cluster before it removes the block 5083 * group from the list 5084 */ 5085 goto have_block_group; 5086 } 5087 spin_unlock(&last_ptr->lock); 5088 refill_cluster: 5089 /* 5090 * this cluster didn't work out, free it and 5091 * start over 5092 */ 5093 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5094 5095 last_ptr_loop = 0; 5096 5097 /* allocate a cluster in this block group */ 5098 ret = btrfs_find_space_cluster(trans, root, 5099 block_group, last_ptr, 5100 offset, num_bytes, 5101 empty_cluster + empty_size); 5102 if (ret == 0) { 5103 /* 5104 * now pull our allocation out of this 5105 * cluster 5106 */ 5107 offset = btrfs_alloc_from_cluster(block_group, 5108 last_ptr, num_bytes, 5109 search_start); 5110 if (offset) { 5111 /* we found one, proceed */ 5112 spin_unlock(&last_ptr->refill_lock); 5113 goto checks; 5114 } 5115 } else if (!cached && loop > LOOP_CACHING_NOWAIT 5116 && !failed_cluster_refill) { 5117 spin_unlock(&last_ptr->refill_lock); 5118 5119 failed_cluster_refill = true; 5120 wait_block_group_cache_progress(block_group, 5121 num_bytes + empty_cluster + empty_size); 5122 goto have_block_group; 5123 } 5124 5125 /* 5126 * at this point we either didn't find a cluster 5127 * or we weren't able to allocate a block from our 5128 * cluster. Free the cluster we've been trying 5129 * to use, and go to the next block group 5130 */ 5131 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5132 spin_unlock(&last_ptr->refill_lock); 5133 goto loop; 5134 } 5135 5136 offset = btrfs_find_space_for_alloc(block_group, search_start, 5137 num_bytes, empty_size); 5138 /* 5139 * If we didn't find a chunk, and we haven't failed on this 5140 * block group before, and this block group is in the middle of 5141 * caching and we are ok with waiting, then go ahead and wait 5142 * for progress to be made, and set failed_alloc to true. 5143 * 5144 * If failed_alloc is true then we've already waited on this 5145 * block group once and should move on to the next block group. 5146 */ 5147 if (!offset && !failed_alloc && !cached && 5148 loop > LOOP_CACHING_NOWAIT) { 5149 wait_block_group_cache_progress(block_group, 5150 num_bytes + empty_size); 5151 failed_alloc = true; 5152 goto have_block_group; 5153 } else if (!offset) { 5154 goto loop; 5155 } 5156 checks: 5157 search_start = stripe_align(root, offset); 5158 /* move on to the next group */ 5159 if (search_start + num_bytes >= search_end) { 5160 btrfs_add_free_space(block_group, offset, num_bytes); 5161 goto loop; 5162 } 5163 5164 /* move on to the next group */ 5165 if (search_start + num_bytes > 5166 block_group->key.objectid + block_group->key.offset) { 5167 btrfs_add_free_space(block_group, offset, num_bytes); 5168 goto loop; 5169 } 5170 5171 ins->objectid = search_start; 5172 ins->offset = num_bytes; 5173 5174 if (offset < search_start) 5175 btrfs_add_free_space(block_group, offset, 5176 search_start - offset); 5177 BUG_ON(offset > search_start); 5178 5179 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, 5180 (data & BTRFS_BLOCK_GROUP_DATA)); 5181 if (ret == -EAGAIN) { 5182 btrfs_add_free_space(block_group, offset, num_bytes); 5183 goto loop; 5184 } 5185 5186 /* we are all good, lets return */ 5187 ins->objectid = search_start; 5188 ins->offset = num_bytes; 5189 5190 if (offset < search_start) 5191 btrfs_add_free_space(block_group, offset, 5192 search_start - offset); 5193 BUG_ON(offset > search_start); 5194 btrfs_put_block_group(block_group); 5195 break; 5196 loop: 5197 failed_cluster_refill = false; 5198 failed_alloc = false; 5199 BUG_ON(index != get_block_group_index(block_group)); 5200 btrfs_put_block_group(block_group); 5201 } 5202 up_read(&space_info->groups_sem); 5203 5204 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) 5205 goto search; 5206 5207 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 5208 * for them to make caching progress. Also 5209 * determine the best possible bg to cache 5210 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 5211 * caching kthreads as we move along 5212 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 5213 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 5214 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5215 * again 5216 */ 5217 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { 5218 index = 0; 5219 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5220 found_uncached_bg = false; 5221 loop++; 5222 if (!ideal_cache_percent && 5223 atomic_read(&space_info->caching_threads)) 5224 goto search; 5225 5226 /* 5227 * 1 of the following 2 things have happened so far 5228 * 5229 * 1) We found an ideal block group for caching that 5230 * is mostly full and will cache quickly, so we might 5231 * as well wait for it. 5232 * 5233 * 2) We searched for cached only and we didn't find 5234 * anything, and we didn't start any caching kthreads 5235 * either, so chances are we will loop through and 5236 * start a couple caching kthreads, and then come back 5237 * around and just wait for them. This will be slower 5238 * because we will have 2 caching kthreads reading at 5239 * the same time when we could have just started one 5240 * and waited for it to get far enough to give us an 5241 * allocation, so go ahead and go to the wait caching 5242 * loop. 5243 */ 5244 loop = LOOP_CACHING_WAIT; 5245 search_start = ideal_cache_offset; 5246 ideal_cache_percent = 0; 5247 goto ideal_cache; 5248 } else if (loop == LOOP_FIND_IDEAL) { 5249 /* 5250 * Didn't find a uncached bg, wait on anything we find 5251 * next. 5252 */ 5253 loop = LOOP_CACHING_WAIT; 5254 goto search; 5255 } 5256 5257 loop++; 5258 5259 if (loop == LOOP_ALLOC_CHUNK) { 5260 if (allowed_chunk_alloc) { 5261 ret = do_chunk_alloc(trans, root, num_bytes + 5262 2 * 1024 * 1024, data, 5263 CHUNK_ALLOC_LIMITED); 5264 allowed_chunk_alloc = 0; 5265 if (ret == 1) 5266 done_chunk_alloc = 1; 5267 } else if (!done_chunk_alloc && 5268 space_info->force_alloc == 5269 CHUNK_ALLOC_NO_FORCE) { 5270 space_info->force_alloc = CHUNK_ALLOC_LIMITED; 5271 } 5272 5273 /* 5274 * We didn't allocate a chunk, go ahead and drop the 5275 * empty size and loop again. 5276 */ 5277 if (!done_chunk_alloc) 5278 loop = LOOP_NO_EMPTY_SIZE; 5279 } 5280 5281 if (loop == LOOP_NO_EMPTY_SIZE) { 5282 empty_size = 0; 5283 empty_cluster = 0; 5284 } 5285 5286 goto search; 5287 } else if (!ins->objectid) { 5288 ret = -ENOSPC; 5289 } else if (ins->objectid) { 5290 ret = 0; 5291 } 5292 5293 return ret; 5294 } 5295 5296 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 5297 int dump_block_groups) 5298 { 5299 struct btrfs_block_group_cache *cache; 5300 int index = 0; 5301 5302 spin_lock(&info->lock); 5303 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 5304 (unsigned long long)(info->total_bytes - info->bytes_used - 5305 info->bytes_pinned - info->bytes_reserved - 5306 info->bytes_readonly), 5307 (info->full) ? "" : "not "); 5308 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 5309 "reserved=%llu, may_use=%llu, readonly=%llu\n", 5310 (unsigned long long)info->total_bytes, 5311 (unsigned long long)info->bytes_used, 5312 (unsigned long long)info->bytes_pinned, 5313 (unsigned long long)info->bytes_reserved, 5314 (unsigned long long)info->bytes_may_use, 5315 (unsigned long long)info->bytes_readonly); 5316 spin_unlock(&info->lock); 5317 5318 if (!dump_block_groups) 5319 return; 5320 5321 down_read(&info->groups_sem); 5322 again: 5323 list_for_each_entry(cache, &info->block_groups[index], list) { 5324 spin_lock(&cache->lock); 5325 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 5326 "%llu pinned %llu reserved\n", 5327 (unsigned long long)cache->key.objectid, 5328 (unsigned long long)cache->key.offset, 5329 (unsigned long long)btrfs_block_group_used(&cache->item), 5330 (unsigned long long)cache->pinned, 5331 (unsigned long long)cache->reserved); 5332 btrfs_dump_free_space(cache, bytes); 5333 spin_unlock(&cache->lock); 5334 } 5335 if (++index < BTRFS_NR_RAID_TYPES) 5336 goto again; 5337 up_read(&info->groups_sem); 5338 } 5339 5340 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 5341 struct btrfs_root *root, 5342 u64 num_bytes, u64 min_alloc_size, 5343 u64 empty_size, u64 hint_byte, 5344 u64 search_end, struct btrfs_key *ins, 5345 u64 data) 5346 { 5347 int ret; 5348 u64 search_start = 0; 5349 5350 data = btrfs_get_alloc_profile(root, data); 5351 again: 5352 /* 5353 * the only place that sets empty_size is btrfs_realloc_node, which 5354 * is not called recursively on allocations 5355 */ 5356 if (empty_size || root->ref_cows) 5357 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5358 num_bytes + 2 * 1024 * 1024, data, 5359 CHUNK_ALLOC_NO_FORCE); 5360 5361 WARN_ON(num_bytes < root->sectorsize); 5362 ret = find_free_extent(trans, root, num_bytes, empty_size, 5363 search_start, search_end, hint_byte, 5364 ins, data); 5365 5366 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 5367 num_bytes = num_bytes >> 1; 5368 num_bytes = num_bytes & ~(root->sectorsize - 1); 5369 num_bytes = max(num_bytes, min_alloc_size); 5370 do_chunk_alloc(trans, root->fs_info->extent_root, 5371 num_bytes, data, CHUNK_ALLOC_FORCE); 5372 goto again; 5373 } 5374 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5375 struct btrfs_space_info *sinfo; 5376 5377 sinfo = __find_space_info(root->fs_info, data); 5378 printk(KERN_ERR "btrfs allocation failed flags %llu, " 5379 "wanted %llu\n", (unsigned long long)data, 5380 (unsigned long long)num_bytes); 5381 dump_space_info(sinfo, num_bytes, 1); 5382 } 5383 5384 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); 5385 5386 return ret; 5387 } 5388 5389 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 5390 { 5391 struct btrfs_block_group_cache *cache; 5392 int ret = 0; 5393 5394 cache = btrfs_lookup_block_group(root->fs_info, start); 5395 if (!cache) { 5396 printk(KERN_ERR "Unable to find block group for %llu\n", 5397 (unsigned long long)start); 5398 return -ENOSPC; 5399 } 5400 5401 if (btrfs_test_opt(root, DISCARD)) 5402 ret = btrfs_discard_extent(root, start, len, NULL); 5403 5404 btrfs_add_free_space(cache, start, len); 5405 btrfs_update_reserved_bytes(cache, len, 0, 1); 5406 btrfs_put_block_group(cache); 5407 5408 trace_btrfs_reserved_extent_free(root, start, len); 5409 5410 return ret; 5411 } 5412 5413 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5414 struct btrfs_root *root, 5415 u64 parent, u64 root_objectid, 5416 u64 flags, u64 owner, u64 offset, 5417 struct btrfs_key *ins, int ref_mod) 5418 { 5419 int ret; 5420 struct btrfs_fs_info *fs_info = root->fs_info; 5421 struct btrfs_extent_item *extent_item; 5422 struct btrfs_extent_inline_ref *iref; 5423 struct btrfs_path *path; 5424 struct extent_buffer *leaf; 5425 int type; 5426 u32 size; 5427 5428 if (parent > 0) 5429 type = BTRFS_SHARED_DATA_REF_KEY; 5430 else 5431 type = BTRFS_EXTENT_DATA_REF_KEY; 5432 5433 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 5434 5435 path = btrfs_alloc_path(); 5436 if (!path) 5437 return -ENOMEM; 5438 5439 path->leave_spinning = 1; 5440 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5441 ins, size); 5442 BUG_ON(ret); 5443 5444 leaf = path->nodes[0]; 5445 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5446 struct btrfs_extent_item); 5447 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 5448 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 5449 btrfs_set_extent_flags(leaf, extent_item, 5450 flags | BTRFS_EXTENT_FLAG_DATA); 5451 5452 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 5453 btrfs_set_extent_inline_ref_type(leaf, iref, type); 5454 if (parent > 0) { 5455 struct btrfs_shared_data_ref *ref; 5456 ref = (struct btrfs_shared_data_ref *)(iref + 1); 5457 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 5458 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 5459 } else { 5460 struct btrfs_extent_data_ref *ref; 5461 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 5462 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 5463 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 5464 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 5465 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 5466 } 5467 5468 btrfs_mark_buffer_dirty(path->nodes[0]); 5469 btrfs_free_path(path); 5470 5471 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5472 if (ret) { 5473 printk(KERN_ERR "btrfs update block group failed for %llu " 5474 "%llu\n", (unsigned long long)ins->objectid, 5475 (unsigned long long)ins->offset); 5476 BUG(); 5477 } 5478 return ret; 5479 } 5480 5481 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 5482 struct btrfs_root *root, 5483 u64 parent, u64 root_objectid, 5484 u64 flags, struct btrfs_disk_key *key, 5485 int level, struct btrfs_key *ins) 5486 { 5487 int ret; 5488 struct btrfs_fs_info *fs_info = root->fs_info; 5489 struct btrfs_extent_item *extent_item; 5490 struct btrfs_tree_block_info *block_info; 5491 struct btrfs_extent_inline_ref *iref; 5492 struct btrfs_path *path; 5493 struct extent_buffer *leaf; 5494 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 5495 5496 path = btrfs_alloc_path(); 5497 BUG_ON(!path); 5498 5499 path->leave_spinning = 1; 5500 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5501 ins, size); 5502 BUG_ON(ret); 5503 5504 leaf = path->nodes[0]; 5505 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5506 struct btrfs_extent_item); 5507 btrfs_set_extent_refs(leaf, extent_item, 1); 5508 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 5509 btrfs_set_extent_flags(leaf, extent_item, 5510 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 5511 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 5512 5513 btrfs_set_tree_block_key(leaf, block_info, key); 5514 btrfs_set_tree_block_level(leaf, block_info, level); 5515 5516 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 5517 if (parent > 0) { 5518 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 5519 btrfs_set_extent_inline_ref_type(leaf, iref, 5520 BTRFS_SHARED_BLOCK_REF_KEY); 5521 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 5522 } else { 5523 btrfs_set_extent_inline_ref_type(leaf, iref, 5524 BTRFS_TREE_BLOCK_REF_KEY); 5525 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 5526 } 5527 5528 btrfs_mark_buffer_dirty(leaf); 5529 btrfs_free_path(path); 5530 5531 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5532 if (ret) { 5533 printk(KERN_ERR "btrfs update block group failed for %llu " 5534 "%llu\n", (unsigned long long)ins->objectid, 5535 (unsigned long long)ins->offset); 5536 BUG(); 5537 } 5538 return ret; 5539 } 5540 5541 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5542 struct btrfs_root *root, 5543 u64 root_objectid, u64 owner, 5544 u64 offset, struct btrfs_key *ins) 5545 { 5546 int ret; 5547 5548 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 5549 5550 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 5551 0, root_objectid, owner, offset, 5552 BTRFS_ADD_DELAYED_EXTENT, NULL); 5553 return ret; 5554 } 5555 5556 /* 5557 * this is used by the tree logging recovery code. It records that 5558 * an extent has been allocated and makes sure to clear the free 5559 * space cache bits as well 5560 */ 5561 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 5562 struct btrfs_root *root, 5563 u64 root_objectid, u64 owner, u64 offset, 5564 struct btrfs_key *ins) 5565 { 5566 int ret; 5567 struct btrfs_block_group_cache *block_group; 5568 struct btrfs_caching_control *caching_ctl; 5569 u64 start = ins->objectid; 5570 u64 num_bytes = ins->offset; 5571 5572 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5573 cache_block_group(block_group, trans, NULL, 0); 5574 caching_ctl = get_caching_control(block_group); 5575 5576 if (!caching_ctl) { 5577 BUG_ON(!block_group_cache_done(block_group)); 5578 ret = btrfs_remove_free_space(block_group, start, num_bytes); 5579 BUG_ON(ret); 5580 } else { 5581 mutex_lock(&caching_ctl->mutex); 5582 5583 if (start >= caching_ctl->progress) { 5584 ret = add_excluded_extent(root, start, num_bytes); 5585 BUG_ON(ret); 5586 } else if (start + num_bytes <= caching_ctl->progress) { 5587 ret = btrfs_remove_free_space(block_group, 5588 start, num_bytes); 5589 BUG_ON(ret); 5590 } else { 5591 num_bytes = caching_ctl->progress - start; 5592 ret = btrfs_remove_free_space(block_group, 5593 start, num_bytes); 5594 BUG_ON(ret); 5595 5596 start = caching_ctl->progress; 5597 num_bytes = ins->objectid + ins->offset - 5598 caching_ctl->progress; 5599 ret = add_excluded_extent(root, start, num_bytes); 5600 BUG_ON(ret); 5601 } 5602 5603 mutex_unlock(&caching_ctl->mutex); 5604 put_caching_control(caching_ctl); 5605 } 5606 5607 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); 5608 BUG_ON(ret); 5609 btrfs_put_block_group(block_group); 5610 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5611 0, owner, offset, ins, 1); 5612 return ret; 5613 } 5614 5615 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 5616 struct btrfs_root *root, 5617 u64 bytenr, u32 blocksize, 5618 int level) 5619 { 5620 struct extent_buffer *buf; 5621 5622 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 5623 if (!buf) 5624 return ERR_PTR(-ENOMEM); 5625 btrfs_set_header_generation(buf, trans->transid); 5626 btrfs_set_buffer_lockdep_class(buf, level); 5627 btrfs_tree_lock(buf); 5628 clean_tree_block(trans, root, buf); 5629 5630 btrfs_set_lock_blocking(buf); 5631 btrfs_set_buffer_uptodate(buf); 5632 5633 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 5634 /* 5635 * we allow two log transactions at a time, use different 5636 * EXENT bit to differentiate dirty pages. 5637 */ 5638 if (root->log_transid % 2 == 0) 5639 set_extent_dirty(&root->dirty_log_pages, buf->start, 5640 buf->start + buf->len - 1, GFP_NOFS); 5641 else 5642 set_extent_new(&root->dirty_log_pages, buf->start, 5643 buf->start + buf->len - 1, GFP_NOFS); 5644 } else { 5645 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 5646 buf->start + buf->len - 1, GFP_NOFS); 5647 } 5648 trans->blocks_used++; 5649 /* this returns a buffer locked for blocking */ 5650 return buf; 5651 } 5652 5653 static struct btrfs_block_rsv * 5654 use_block_rsv(struct btrfs_trans_handle *trans, 5655 struct btrfs_root *root, u32 blocksize) 5656 { 5657 struct btrfs_block_rsv *block_rsv; 5658 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 5659 int ret; 5660 5661 block_rsv = get_block_rsv(trans, root); 5662 5663 if (block_rsv->size == 0) { 5664 ret = reserve_metadata_bytes(trans, root, block_rsv, 5665 blocksize, 0); 5666 /* 5667 * If we couldn't reserve metadata bytes try and use some from 5668 * the global reserve. 5669 */ 5670 if (ret && block_rsv != global_rsv) { 5671 ret = block_rsv_use_bytes(global_rsv, blocksize); 5672 if (!ret) 5673 return global_rsv; 5674 return ERR_PTR(ret); 5675 } else if (ret) { 5676 return ERR_PTR(ret); 5677 } 5678 return block_rsv; 5679 } 5680 5681 ret = block_rsv_use_bytes(block_rsv, blocksize); 5682 if (!ret) 5683 return block_rsv; 5684 if (ret) { 5685 WARN_ON(1); 5686 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, 5687 0); 5688 if (!ret) { 5689 spin_lock(&block_rsv->lock); 5690 block_rsv->size += blocksize; 5691 spin_unlock(&block_rsv->lock); 5692 return block_rsv; 5693 } else if (ret && block_rsv != global_rsv) { 5694 ret = block_rsv_use_bytes(global_rsv, blocksize); 5695 if (!ret) 5696 return global_rsv; 5697 } 5698 } 5699 5700 return ERR_PTR(-ENOSPC); 5701 } 5702 5703 static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) 5704 { 5705 block_rsv_add_bytes(block_rsv, blocksize, 0); 5706 block_rsv_release_bytes(block_rsv, NULL, 0); 5707 } 5708 5709 /* 5710 * finds a free extent and does all the dirty work required for allocation 5711 * returns the key for the extent through ins, and a tree buffer for 5712 * the first block of the extent through buf. 5713 * 5714 * returns the tree buffer or NULL. 5715 */ 5716 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 5717 struct btrfs_root *root, u32 blocksize, 5718 u64 parent, u64 root_objectid, 5719 struct btrfs_disk_key *key, int level, 5720 u64 hint, u64 empty_size) 5721 { 5722 struct btrfs_key ins; 5723 struct btrfs_block_rsv *block_rsv; 5724 struct extent_buffer *buf; 5725 u64 flags = 0; 5726 int ret; 5727 5728 5729 block_rsv = use_block_rsv(trans, root, blocksize); 5730 if (IS_ERR(block_rsv)) 5731 return ERR_CAST(block_rsv); 5732 5733 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, 5734 empty_size, hint, (u64)-1, &ins, 0); 5735 if (ret) { 5736 unuse_block_rsv(block_rsv, blocksize); 5737 return ERR_PTR(ret); 5738 } 5739 5740 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 5741 blocksize, level); 5742 BUG_ON(IS_ERR(buf)); 5743 5744 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 5745 if (parent == 0) 5746 parent = ins.objectid; 5747 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 5748 } else 5749 BUG_ON(parent > 0); 5750 5751 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 5752 struct btrfs_delayed_extent_op *extent_op; 5753 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 5754 BUG_ON(!extent_op); 5755 if (key) 5756 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 5757 else 5758 memset(&extent_op->key, 0, sizeof(extent_op->key)); 5759 extent_op->flags_to_set = flags; 5760 extent_op->update_key = 1; 5761 extent_op->update_flags = 1; 5762 extent_op->is_data = 0; 5763 5764 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, 5765 ins.offset, parent, root_objectid, 5766 level, BTRFS_ADD_DELAYED_EXTENT, 5767 extent_op); 5768 BUG_ON(ret); 5769 } 5770 return buf; 5771 } 5772 5773 struct walk_control { 5774 u64 refs[BTRFS_MAX_LEVEL]; 5775 u64 flags[BTRFS_MAX_LEVEL]; 5776 struct btrfs_key update_progress; 5777 int stage; 5778 int level; 5779 int shared_level; 5780 int update_ref; 5781 int keep_locks; 5782 int reada_slot; 5783 int reada_count; 5784 }; 5785 5786 #define DROP_REFERENCE 1 5787 #define UPDATE_BACKREF 2 5788 5789 static noinline void reada_walk_down(struct btrfs_trans_handle *trans, 5790 struct btrfs_root *root, 5791 struct walk_control *wc, 5792 struct btrfs_path *path) 5793 { 5794 u64 bytenr; 5795 u64 generation; 5796 u64 refs; 5797 u64 flags; 5798 u32 nritems; 5799 u32 blocksize; 5800 struct btrfs_key key; 5801 struct extent_buffer *eb; 5802 int ret; 5803 int slot; 5804 int nread = 0; 5805 5806 if (path->slots[wc->level] < wc->reada_slot) { 5807 wc->reada_count = wc->reada_count * 2 / 3; 5808 wc->reada_count = max(wc->reada_count, 2); 5809 } else { 5810 wc->reada_count = wc->reada_count * 3 / 2; 5811 wc->reada_count = min_t(int, wc->reada_count, 5812 BTRFS_NODEPTRS_PER_BLOCK(root)); 5813 } 5814 5815 eb = path->nodes[wc->level]; 5816 nritems = btrfs_header_nritems(eb); 5817 blocksize = btrfs_level_size(root, wc->level - 1); 5818 5819 for (slot = path->slots[wc->level]; slot < nritems; slot++) { 5820 if (nread >= wc->reada_count) 5821 break; 5822 5823 cond_resched(); 5824 bytenr = btrfs_node_blockptr(eb, slot); 5825 generation = btrfs_node_ptr_generation(eb, slot); 5826 5827 if (slot == path->slots[wc->level]) 5828 goto reada; 5829 5830 if (wc->stage == UPDATE_BACKREF && 5831 generation <= root->root_key.offset) 5832 continue; 5833 5834 /* We don't lock the tree block, it's OK to be racy here */ 5835 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5836 &refs, &flags); 5837 BUG_ON(ret); 5838 BUG_ON(refs == 0); 5839 5840 if (wc->stage == DROP_REFERENCE) { 5841 if (refs == 1) 5842 goto reada; 5843 5844 if (wc->level == 1 && 5845 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5846 continue; 5847 if (!wc->update_ref || 5848 generation <= root->root_key.offset) 5849 continue; 5850 btrfs_node_key_to_cpu(eb, &key, slot); 5851 ret = btrfs_comp_cpu_keys(&key, 5852 &wc->update_progress); 5853 if (ret < 0) 5854 continue; 5855 } else { 5856 if (wc->level == 1 && 5857 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5858 continue; 5859 } 5860 reada: 5861 ret = readahead_tree_block(root, bytenr, blocksize, 5862 generation); 5863 if (ret) 5864 break; 5865 nread++; 5866 } 5867 wc->reada_slot = slot; 5868 } 5869 5870 /* 5871 * hepler to process tree block while walking down the tree. 5872 * 5873 * when wc->stage == UPDATE_BACKREF, this function updates 5874 * back refs for pointers in the block. 5875 * 5876 * NOTE: return value 1 means we should stop walking down. 5877 */ 5878 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5879 struct btrfs_root *root, 5880 struct btrfs_path *path, 5881 struct walk_control *wc, int lookup_info) 5882 { 5883 int level = wc->level; 5884 struct extent_buffer *eb = path->nodes[level]; 5885 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5886 int ret; 5887 5888 if (wc->stage == UPDATE_BACKREF && 5889 btrfs_header_owner(eb) != root->root_key.objectid) 5890 return 1; 5891 5892 /* 5893 * when reference count of tree block is 1, it won't increase 5894 * again. once full backref flag is set, we never clear it. 5895 */ 5896 if (lookup_info && 5897 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5898 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 5899 BUG_ON(!path->locks[level]); 5900 ret = btrfs_lookup_extent_info(trans, root, 5901 eb->start, eb->len, 5902 &wc->refs[level], 5903 &wc->flags[level]); 5904 BUG_ON(ret); 5905 BUG_ON(wc->refs[level] == 0); 5906 } 5907 5908 if (wc->stage == DROP_REFERENCE) { 5909 if (wc->refs[level] > 1) 5910 return 1; 5911 5912 if (path->locks[level] && !wc->keep_locks) { 5913 btrfs_tree_unlock(eb); 5914 path->locks[level] = 0; 5915 } 5916 return 0; 5917 } 5918 5919 /* wc->stage == UPDATE_BACKREF */ 5920 if (!(wc->flags[level] & flag)) { 5921 BUG_ON(!path->locks[level]); 5922 ret = btrfs_inc_ref(trans, root, eb, 1); 5923 BUG_ON(ret); 5924 ret = btrfs_dec_ref(trans, root, eb, 0); 5925 BUG_ON(ret); 5926 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 5927 eb->len, flag, 0); 5928 BUG_ON(ret); 5929 wc->flags[level] |= flag; 5930 } 5931 5932 /* 5933 * the block is shared by multiple trees, so it's not good to 5934 * keep the tree lock 5935 */ 5936 if (path->locks[level] && level > 0) { 5937 btrfs_tree_unlock(eb); 5938 path->locks[level] = 0; 5939 } 5940 return 0; 5941 } 5942 5943 /* 5944 * hepler to process tree block pointer. 5945 * 5946 * when wc->stage == DROP_REFERENCE, this function checks 5947 * reference count of the block pointed to. if the block 5948 * is shared and we need update back refs for the subtree 5949 * rooted at the block, this function changes wc->stage to 5950 * UPDATE_BACKREF. if the block is shared and there is no 5951 * need to update back, this function drops the reference 5952 * to the block. 5953 * 5954 * NOTE: return value 1 means we should stop walking down. 5955 */ 5956 static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5957 struct btrfs_root *root, 5958 struct btrfs_path *path, 5959 struct walk_control *wc, int *lookup_info) 5960 { 5961 u64 bytenr; 5962 u64 generation; 5963 u64 parent; 5964 u32 blocksize; 5965 struct btrfs_key key; 5966 struct extent_buffer *next; 5967 int level = wc->level; 5968 int reada = 0; 5969 int ret = 0; 5970 5971 generation = btrfs_node_ptr_generation(path->nodes[level], 5972 path->slots[level]); 5973 /* 5974 * if the lower level block was created before the snapshot 5975 * was created, we know there is no need to update back refs 5976 * for the subtree 5977 */ 5978 if (wc->stage == UPDATE_BACKREF && 5979 generation <= root->root_key.offset) { 5980 *lookup_info = 1; 5981 return 1; 5982 } 5983 5984 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5985 blocksize = btrfs_level_size(root, level - 1); 5986 5987 next = btrfs_find_tree_block(root, bytenr, blocksize); 5988 if (!next) { 5989 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 5990 if (!next) 5991 return -ENOMEM; 5992 reada = 1; 5993 } 5994 btrfs_tree_lock(next); 5995 btrfs_set_lock_blocking(next); 5996 5997 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5998 &wc->refs[level - 1], 5999 &wc->flags[level - 1]); 6000 BUG_ON(ret); 6001 BUG_ON(wc->refs[level - 1] == 0); 6002 *lookup_info = 0; 6003 6004 if (wc->stage == DROP_REFERENCE) { 6005 if (wc->refs[level - 1] > 1) { 6006 if (level == 1 && 6007 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 6008 goto skip; 6009 6010 if (!wc->update_ref || 6011 generation <= root->root_key.offset) 6012 goto skip; 6013 6014 btrfs_node_key_to_cpu(path->nodes[level], &key, 6015 path->slots[level]); 6016 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); 6017 if (ret < 0) 6018 goto skip; 6019 6020 wc->stage = UPDATE_BACKREF; 6021 wc->shared_level = level - 1; 6022 } 6023 } else { 6024 if (level == 1 && 6025 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 6026 goto skip; 6027 } 6028 6029 if (!btrfs_buffer_uptodate(next, generation)) { 6030 btrfs_tree_unlock(next); 6031 free_extent_buffer(next); 6032 next = NULL; 6033 *lookup_info = 1; 6034 } 6035 6036 if (!next) { 6037 if (reada && level == 1) 6038 reada_walk_down(trans, root, wc, path); 6039 next = read_tree_block(root, bytenr, blocksize, generation); 6040 if (!next) 6041 return -EIO; 6042 btrfs_tree_lock(next); 6043 btrfs_set_lock_blocking(next); 6044 } 6045 6046 level--; 6047 BUG_ON(level != btrfs_header_level(next)); 6048 path->nodes[level] = next; 6049 path->slots[level] = 0; 6050 path->locks[level] = 1; 6051 wc->level = level; 6052 if (wc->level == 1) 6053 wc->reada_slot = 0; 6054 return 0; 6055 skip: 6056 wc->refs[level - 1] = 0; 6057 wc->flags[level - 1] = 0; 6058 if (wc->stage == DROP_REFERENCE) { 6059 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 6060 parent = path->nodes[level]->start; 6061 } else { 6062 BUG_ON(root->root_key.objectid != 6063 btrfs_header_owner(path->nodes[level])); 6064 parent = 0; 6065 } 6066 6067 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 6068 root->root_key.objectid, level - 1, 0); 6069 BUG_ON(ret); 6070 } 6071 btrfs_tree_unlock(next); 6072 free_extent_buffer(next); 6073 *lookup_info = 1; 6074 return 1; 6075 } 6076 6077 /* 6078 * hepler to process tree block while walking up the tree. 6079 * 6080 * when wc->stage == DROP_REFERENCE, this function drops 6081 * reference count on the block. 6082 * 6083 * when wc->stage == UPDATE_BACKREF, this function changes 6084 * wc->stage back to DROP_REFERENCE if we changed wc->stage 6085 * to UPDATE_BACKREF previously while processing the block. 6086 * 6087 * NOTE: return value 1 means we should stop walking up. 6088 */ 6089 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 6090 struct btrfs_root *root, 6091 struct btrfs_path *path, 6092 struct walk_control *wc) 6093 { 6094 int ret; 6095 int level = wc->level; 6096 struct extent_buffer *eb = path->nodes[level]; 6097 u64 parent = 0; 6098 6099 if (wc->stage == UPDATE_BACKREF) { 6100 BUG_ON(wc->shared_level < level); 6101 if (level < wc->shared_level) 6102 goto out; 6103 6104 ret = find_next_key(path, level + 1, &wc->update_progress); 6105 if (ret > 0) 6106 wc->update_ref = 0; 6107 6108 wc->stage = DROP_REFERENCE; 6109 wc->shared_level = -1; 6110 path->slots[level] = 0; 6111 6112 /* 6113 * check reference count again if the block isn't locked. 6114 * we should start walking down the tree again if reference 6115 * count is one. 6116 */ 6117 if (!path->locks[level]) { 6118 BUG_ON(level == 0); 6119 btrfs_tree_lock(eb); 6120 btrfs_set_lock_blocking(eb); 6121 path->locks[level] = 1; 6122 6123 ret = btrfs_lookup_extent_info(trans, root, 6124 eb->start, eb->len, 6125 &wc->refs[level], 6126 &wc->flags[level]); 6127 BUG_ON(ret); 6128 BUG_ON(wc->refs[level] == 0); 6129 if (wc->refs[level] == 1) { 6130 btrfs_tree_unlock(eb); 6131 path->locks[level] = 0; 6132 return 1; 6133 } 6134 } 6135 } 6136 6137 /* wc->stage == DROP_REFERENCE */ 6138 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 6139 6140 if (wc->refs[level] == 1) { 6141 if (level == 0) { 6142 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6143 ret = btrfs_dec_ref(trans, root, eb, 1); 6144 else 6145 ret = btrfs_dec_ref(trans, root, eb, 0); 6146 BUG_ON(ret); 6147 } 6148 /* make block locked assertion in clean_tree_block happy */ 6149 if (!path->locks[level] && 6150 btrfs_header_generation(eb) == trans->transid) { 6151 btrfs_tree_lock(eb); 6152 btrfs_set_lock_blocking(eb); 6153 path->locks[level] = 1; 6154 } 6155 clean_tree_block(trans, root, eb); 6156 } 6157 6158 if (eb == root->node) { 6159 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6160 parent = eb->start; 6161 else 6162 BUG_ON(root->root_key.objectid != 6163 btrfs_header_owner(eb)); 6164 } else { 6165 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6166 parent = path->nodes[level + 1]->start; 6167 else 6168 BUG_ON(root->root_key.objectid != 6169 btrfs_header_owner(path->nodes[level + 1])); 6170 } 6171 6172 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); 6173 out: 6174 wc->refs[level] = 0; 6175 wc->flags[level] = 0; 6176 return 0; 6177 } 6178 6179 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 6180 struct btrfs_root *root, 6181 struct btrfs_path *path, 6182 struct walk_control *wc) 6183 { 6184 int level = wc->level; 6185 int lookup_info = 1; 6186 int ret; 6187 6188 while (level >= 0) { 6189 ret = walk_down_proc(trans, root, path, wc, lookup_info); 6190 if (ret > 0) 6191 break; 6192 6193 if (level == 0) 6194 break; 6195 6196 if (path->slots[level] >= 6197 btrfs_header_nritems(path->nodes[level])) 6198 break; 6199 6200 ret = do_walk_down(trans, root, path, wc, &lookup_info); 6201 if (ret > 0) { 6202 path->slots[level]++; 6203 continue; 6204 } else if (ret < 0) 6205 return ret; 6206 level = wc->level; 6207 } 6208 return 0; 6209 } 6210 6211 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 6212 struct btrfs_root *root, 6213 struct btrfs_path *path, 6214 struct walk_control *wc, int max_level) 6215 { 6216 int level = wc->level; 6217 int ret; 6218 6219 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 6220 while (level < max_level && path->nodes[level]) { 6221 wc->level = level; 6222 if (path->slots[level] + 1 < 6223 btrfs_header_nritems(path->nodes[level])) { 6224 path->slots[level]++; 6225 return 0; 6226 } else { 6227 ret = walk_up_proc(trans, root, path, wc); 6228 if (ret > 0) 6229 return 0; 6230 6231 if (path->locks[level]) { 6232 btrfs_tree_unlock(path->nodes[level]); 6233 path->locks[level] = 0; 6234 } 6235 free_extent_buffer(path->nodes[level]); 6236 path->nodes[level] = NULL; 6237 level++; 6238 } 6239 } 6240 return 1; 6241 } 6242 6243 /* 6244 * drop a subvolume tree. 6245 * 6246 * this function traverses the tree freeing any blocks that only 6247 * referenced by the tree. 6248 * 6249 * when a shared tree block is found. this function decreases its 6250 * reference count by one. if update_ref is true, this function 6251 * also make sure backrefs for the shared block and all lower level 6252 * blocks are properly updated. 6253 */ 6254 int btrfs_drop_snapshot(struct btrfs_root *root, 6255 struct btrfs_block_rsv *block_rsv, int update_ref) 6256 { 6257 struct btrfs_path *path; 6258 struct btrfs_trans_handle *trans; 6259 struct btrfs_root *tree_root = root->fs_info->tree_root; 6260 struct btrfs_root_item *root_item = &root->root_item; 6261 struct walk_control *wc; 6262 struct btrfs_key key; 6263 int err = 0; 6264 int ret; 6265 int level; 6266 6267 path = btrfs_alloc_path(); 6268 BUG_ON(!path); 6269 6270 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6271 BUG_ON(!wc); 6272 6273 trans = btrfs_start_transaction(tree_root, 0); 6274 BUG_ON(IS_ERR(trans)); 6275 6276 if (block_rsv) 6277 trans->block_rsv = block_rsv; 6278 6279 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 6280 level = btrfs_header_level(root->node); 6281 path->nodes[level] = btrfs_lock_root_node(root); 6282 btrfs_set_lock_blocking(path->nodes[level]); 6283 path->slots[level] = 0; 6284 path->locks[level] = 1; 6285 memset(&wc->update_progress, 0, 6286 sizeof(wc->update_progress)); 6287 } else { 6288 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 6289 memcpy(&wc->update_progress, &key, 6290 sizeof(wc->update_progress)); 6291 6292 level = root_item->drop_level; 6293 BUG_ON(level == 0); 6294 path->lowest_level = level; 6295 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6296 path->lowest_level = 0; 6297 if (ret < 0) { 6298 err = ret; 6299 goto out; 6300 } 6301 WARN_ON(ret > 0); 6302 6303 /* 6304 * unlock our path, this is safe because only this 6305 * function is allowed to delete this snapshot 6306 */ 6307 btrfs_unlock_up_safe(path, 0); 6308 6309 level = btrfs_header_level(root->node); 6310 while (1) { 6311 btrfs_tree_lock(path->nodes[level]); 6312 btrfs_set_lock_blocking(path->nodes[level]); 6313 6314 ret = btrfs_lookup_extent_info(trans, root, 6315 path->nodes[level]->start, 6316 path->nodes[level]->len, 6317 &wc->refs[level], 6318 &wc->flags[level]); 6319 BUG_ON(ret); 6320 BUG_ON(wc->refs[level] == 0); 6321 6322 if (level == root_item->drop_level) 6323 break; 6324 6325 btrfs_tree_unlock(path->nodes[level]); 6326 WARN_ON(wc->refs[level] != 1); 6327 level--; 6328 } 6329 } 6330 6331 wc->level = level; 6332 wc->shared_level = -1; 6333 wc->stage = DROP_REFERENCE; 6334 wc->update_ref = update_ref; 6335 wc->keep_locks = 0; 6336 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 6337 6338 while (1) { 6339 ret = walk_down_tree(trans, root, path, wc); 6340 if (ret < 0) { 6341 err = ret; 6342 break; 6343 } 6344 6345 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 6346 if (ret < 0) { 6347 err = ret; 6348 break; 6349 } 6350 6351 if (ret > 0) { 6352 BUG_ON(wc->stage != DROP_REFERENCE); 6353 break; 6354 } 6355 6356 if (wc->stage == DROP_REFERENCE) { 6357 level = wc->level; 6358 btrfs_node_key(path->nodes[level], 6359 &root_item->drop_progress, 6360 path->slots[level]); 6361 root_item->drop_level = level; 6362 } 6363 6364 BUG_ON(wc->level == 0); 6365 if (btrfs_should_end_transaction(trans, tree_root)) { 6366 ret = btrfs_update_root(trans, tree_root, 6367 &root->root_key, 6368 root_item); 6369 BUG_ON(ret); 6370 6371 btrfs_end_transaction_throttle(trans, tree_root); 6372 trans = btrfs_start_transaction(tree_root, 0); 6373 BUG_ON(IS_ERR(trans)); 6374 if (block_rsv) 6375 trans->block_rsv = block_rsv; 6376 } 6377 } 6378 btrfs_release_path(path); 6379 BUG_ON(err); 6380 6381 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6382 BUG_ON(ret); 6383 6384 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 6385 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 6386 NULL, NULL); 6387 BUG_ON(ret < 0); 6388 if (ret > 0) { 6389 /* if we fail to delete the orphan item this time 6390 * around, it'll get picked up the next time. 6391 * 6392 * The most common failure here is just -ENOENT. 6393 */ 6394 btrfs_del_orphan_item(trans, tree_root, 6395 root->root_key.objectid); 6396 } 6397 } 6398 6399 if (root->in_radix) { 6400 btrfs_free_fs_root(tree_root->fs_info, root); 6401 } else { 6402 free_extent_buffer(root->node); 6403 free_extent_buffer(root->commit_root); 6404 kfree(root); 6405 } 6406 out: 6407 btrfs_end_transaction_throttle(trans, tree_root); 6408 kfree(wc); 6409 btrfs_free_path(path); 6410 return err; 6411 } 6412 6413 /* 6414 * drop subtree rooted at tree block 'node'. 6415 * 6416 * NOTE: this function will unlock and release tree block 'node' 6417 */ 6418 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 6419 struct btrfs_root *root, 6420 struct extent_buffer *node, 6421 struct extent_buffer *parent) 6422 { 6423 struct btrfs_path *path; 6424 struct walk_control *wc; 6425 int level; 6426 int parent_level; 6427 int ret = 0; 6428 int wret; 6429 6430 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6431 6432 path = btrfs_alloc_path(); 6433 if (!path) 6434 return -ENOMEM; 6435 6436 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6437 if (!wc) { 6438 btrfs_free_path(path); 6439 return -ENOMEM; 6440 } 6441 6442 btrfs_assert_tree_locked(parent); 6443 parent_level = btrfs_header_level(parent); 6444 extent_buffer_get(parent); 6445 path->nodes[parent_level] = parent; 6446 path->slots[parent_level] = btrfs_header_nritems(parent); 6447 6448 btrfs_assert_tree_locked(node); 6449 level = btrfs_header_level(node); 6450 path->nodes[level] = node; 6451 path->slots[level] = 0; 6452 path->locks[level] = 1; 6453 6454 wc->refs[parent_level] = 1; 6455 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 6456 wc->level = level; 6457 wc->shared_level = -1; 6458 wc->stage = DROP_REFERENCE; 6459 wc->update_ref = 0; 6460 wc->keep_locks = 1; 6461 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 6462 6463 while (1) { 6464 wret = walk_down_tree(trans, root, path, wc); 6465 if (wret < 0) { 6466 ret = wret; 6467 break; 6468 } 6469 6470 wret = walk_up_tree(trans, root, path, wc, parent_level); 6471 if (wret < 0) 6472 ret = wret; 6473 if (wret != 0) 6474 break; 6475 } 6476 6477 kfree(wc); 6478 btrfs_free_path(path); 6479 return ret; 6480 } 6481 6482 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6483 { 6484 u64 num_devices; 6485 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6486 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6487 6488 /* 6489 * we add in the count of missing devices because we want 6490 * to make sure that any RAID levels on a degraded FS 6491 * continue to be honored. 6492 */ 6493 num_devices = root->fs_info->fs_devices->rw_devices + 6494 root->fs_info->fs_devices->missing_devices; 6495 6496 if (num_devices == 1) { 6497 stripped |= BTRFS_BLOCK_GROUP_DUP; 6498 stripped = flags & ~stripped; 6499 6500 /* turn raid0 into single device chunks */ 6501 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6502 return stripped; 6503 6504 /* turn mirroring into duplication */ 6505 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6506 BTRFS_BLOCK_GROUP_RAID10)) 6507 return stripped | BTRFS_BLOCK_GROUP_DUP; 6508 return flags; 6509 } else { 6510 /* they already had raid on here, just return */ 6511 if (flags & stripped) 6512 return flags; 6513 6514 stripped |= BTRFS_BLOCK_GROUP_DUP; 6515 stripped = flags & ~stripped; 6516 6517 /* switch duplicated blocks with raid1 */ 6518 if (flags & BTRFS_BLOCK_GROUP_DUP) 6519 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6520 6521 /* turn single device chunks into raid0 */ 6522 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6523 } 6524 return flags; 6525 } 6526 6527 static int set_block_group_ro(struct btrfs_block_group_cache *cache) 6528 { 6529 struct btrfs_space_info *sinfo = cache->space_info; 6530 u64 num_bytes; 6531 int ret = -ENOSPC; 6532 6533 if (cache->ro) 6534 return 0; 6535 6536 spin_lock(&sinfo->lock); 6537 spin_lock(&cache->lock); 6538 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6539 cache->bytes_super - btrfs_block_group_used(&cache->item); 6540 6541 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6542 sinfo->bytes_may_use + sinfo->bytes_readonly + 6543 cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { 6544 sinfo->bytes_readonly += num_bytes; 6545 sinfo->bytes_reserved += cache->reserved_pinned; 6546 cache->reserved_pinned = 0; 6547 cache->ro = 1; 6548 ret = 0; 6549 } 6550 6551 spin_unlock(&cache->lock); 6552 spin_unlock(&sinfo->lock); 6553 return ret; 6554 } 6555 6556 int btrfs_set_block_group_ro(struct btrfs_root *root, 6557 struct btrfs_block_group_cache *cache) 6558 6559 { 6560 struct btrfs_trans_handle *trans; 6561 u64 alloc_flags; 6562 int ret; 6563 6564 BUG_ON(cache->ro); 6565 6566 trans = btrfs_join_transaction(root); 6567 BUG_ON(IS_ERR(trans)); 6568 6569 alloc_flags = update_block_group_flags(root, cache->flags); 6570 if (alloc_flags != cache->flags) 6571 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6572 CHUNK_ALLOC_FORCE); 6573 6574 ret = set_block_group_ro(cache); 6575 if (!ret) 6576 goto out; 6577 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 6578 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6579 CHUNK_ALLOC_FORCE); 6580 if (ret < 0) 6581 goto out; 6582 ret = set_block_group_ro(cache); 6583 out: 6584 btrfs_end_transaction(trans, root); 6585 return ret; 6586 } 6587 6588 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 6589 struct btrfs_root *root, u64 type) 6590 { 6591 u64 alloc_flags = get_alloc_profile(root, type); 6592 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6593 CHUNK_ALLOC_FORCE); 6594 } 6595 6596 /* 6597 * helper to account the unused space of all the readonly block group in the 6598 * list. takes mirrors into account. 6599 */ 6600 static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) 6601 { 6602 struct btrfs_block_group_cache *block_group; 6603 u64 free_bytes = 0; 6604 int factor; 6605 6606 list_for_each_entry(block_group, groups_list, list) { 6607 spin_lock(&block_group->lock); 6608 6609 if (!block_group->ro) { 6610 spin_unlock(&block_group->lock); 6611 continue; 6612 } 6613 6614 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | 6615 BTRFS_BLOCK_GROUP_RAID10 | 6616 BTRFS_BLOCK_GROUP_DUP)) 6617 factor = 2; 6618 else 6619 factor = 1; 6620 6621 free_bytes += (block_group->key.offset - 6622 btrfs_block_group_used(&block_group->item)) * 6623 factor; 6624 6625 spin_unlock(&block_group->lock); 6626 } 6627 6628 return free_bytes; 6629 } 6630 6631 /* 6632 * helper to account the unused space of all the readonly block group in the 6633 * space_info. takes mirrors into account. 6634 */ 6635 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) 6636 { 6637 int i; 6638 u64 free_bytes = 0; 6639 6640 spin_lock(&sinfo->lock); 6641 6642 for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) 6643 if (!list_empty(&sinfo->block_groups[i])) 6644 free_bytes += __btrfs_get_ro_block_group_free_space( 6645 &sinfo->block_groups[i]); 6646 6647 spin_unlock(&sinfo->lock); 6648 6649 return free_bytes; 6650 } 6651 6652 int btrfs_set_block_group_rw(struct btrfs_root *root, 6653 struct btrfs_block_group_cache *cache) 6654 { 6655 struct btrfs_space_info *sinfo = cache->space_info; 6656 u64 num_bytes; 6657 6658 BUG_ON(!cache->ro); 6659 6660 spin_lock(&sinfo->lock); 6661 spin_lock(&cache->lock); 6662 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6663 cache->bytes_super - btrfs_block_group_used(&cache->item); 6664 sinfo->bytes_readonly -= num_bytes; 6665 cache->ro = 0; 6666 spin_unlock(&cache->lock); 6667 spin_unlock(&sinfo->lock); 6668 return 0; 6669 } 6670 6671 /* 6672 * checks to see if its even possible to relocate this block group. 6673 * 6674 * @return - -1 if it's not a good idea to relocate this block group, 0 if its 6675 * ok to go ahead and try. 6676 */ 6677 int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) 6678 { 6679 struct btrfs_block_group_cache *block_group; 6680 struct btrfs_space_info *space_info; 6681 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 6682 struct btrfs_device *device; 6683 int full = 0; 6684 int ret = 0; 6685 6686 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 6687 6688 /* odd, couldn't find the block group, leave it alone */ 6689 if (!block_group) 6690 return -1; 6691 6692 /* no bytes used, we're good */ 6693 if (!btrfs_block_group_used(&block_group->item)) 6694 goto out; 6695 6696 space_info = block_group->space_info; 6697 spin_lock(&space_info->lock); 6698 6699 full = space_info->full; 6700 6701 /* 6702 * if this is the last block group we have in this space, we can't 6703 * relocate it unless we're able to allocate a new chunk below. 6704 * 6705 * Otherwise, we need to make sure we have room in the space to handle 6706 * all of the extents from this block group. If we can, we're good 6707 */ 6708 if ((space_info->total_bytes != block_group->key.offset) && 6709 (space_info->bytes_used + space_info->bytes_reserved + 6710 space_info->bytes_pinned + space_info->bytes_readonly + 6711 btrfs_block_group_used(&block_group->item) < 6712 space_info->total_bytes)) { 6713 spin_unlock(&space_info->lock); 6714 goto out; 6715 } 6716 spin_unlock(&space_info->lock); 6717 6718 /* 6719 * ok we don't have enough space, but maybe we have free space on our 6720 * devices to allocate new chunks for relocation, so loop through our 6721 * alloc devices and guess if we have enough space. However, if we 6722 * were marked as full, then we know there aren't enough chunks, and we 6723 * can just return. 6724 */ 6725 ret = -1; 6726 if (full) 6727 goto out; 6728 6729 mutex_lock(&root->fs_info->chunk_mutex); 6730 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 6731 u64 min_free = btrfs_block_group_used(&block_group->item); 6732 u64 dev_offset; 6733 6734 /* 6735 * check to make sure we can actually find a chunk with enough 6736 * space to fit our block group in. 6737 */ 6738 if (device->total_bytes > device->bytes_used + min_free) { 6739 ret = find_free_dev_extent(NULL, device, min_free, 6740 &dev_offset, NULL); 6741 if (!ret) 6742 break; 6743 ret = -1; 6744 } 6745 } 6746 mutex_unlock(&root->fs_info->chunk_mutex); 6747 out: 6748 btrfs_put_block_group(block_group); 6749 return ret; 6750 } 6751 6752 static int find_first_block_group(struct btrfs_root *root, 6753 struct btrfs_path *path, struct btrfs_key *key) 6754 { 6755 int ret = 0; 6756 struct btrfs_key found_key; 6757 struct extent_buffer *leaf; 6758 int slot; 6759 6760 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 6761 if (ret < 0) 6762 goto out; 6763 6764 while (1) { 6765 slot = path->slots[0]; 6766 leaf = path->nodes[0]; 6767 if (slot >= btrfs_header_nritems(leaf)) { 6768 ret = btrfs_next_leaf(root, path); 6769 if (ret == 0) 6770 continue; 6771 if (ret < 0) 6772 goto out; 6773 break; 6774 } 6775 btrfs_item_key_to_cpu(leaf, &found_key, slot); 6776 6777 if (found_key.objectid >= key->objectid && 6778 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 6779 ret = 0; 6780 goto out; 6781 } 6782 path->slots[0]++; 6783 } 6784 out: 6785 return ret; 6786 } 6787 6788 void btrfs_put_block_group_cache(struct btrfs_fs_info *info) 6789 { 6790 struct btrfs_block_group_cache *block_group; 6791 u64 last = 0; 6792 6793 while (1) { 6794 struct inode *inode; 6795 6796 block_group = btrfs_lookup_first_block_group(info, last); 6797 while (block_group) { 6798 spin_lock(&block_group->lock); 6799 if (block_group->iref) 6800 break; 6801 spin_unlock(&block_group->lock); 6802 block_group = next_block_group(info->tree_root, 6803 block_group); 6804 } 6805 if (!block_group) { 6806 if (last == 0) 6807 break; 6808 last = 0; 6809 continue; 6810 } 6811 6812 inode = block_group->inode; 6813 block_group->iref = 0; 6814 block_group->inode = NULL; 6815 spin_unlock(&block_group->lock); 6816 iput(inode); 6817 last = block_group->key.objectid + block_group->key.offset; 6818 btrfs_put_block_group(block_group); 6819 } 6820 } 6821 6822 int btrfs_free_block_groups(struct btrfs_fs_info *info) 6823 { 6824 struct btrfs_block_group_cache *block_group; 6825 struct btrfs_space_info *space_info; 6826 struct btrfs_caching_control *caching_ctl; 6827 struct rb_node *n; 6828 6829 down_write(&info->extent_commit_sem); 6830 while (!list_empty(&info->caching_block_groups)) { 6831 caching_ctl = list_entry(info->caching_block_groups.next, 6832 struct btrfs_caching_control, list); 6833 list_del(&caching_ctl->list); 6834 put_caching_control(caching_ctl); 6835 } 6836 up_write(&info->extent_commit_sem); 6837 6838 spin_lock(&info->block_group_cache_lock); 6839 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6840 block_group = rb_entry(n, struct btrfs_block_group_cache, 6841 cache_node); 6842 rb_erase(&block_group->cache_node, 6843 &info->block_group_cache_tree); 6844 spin_unlock(&info->block_group_cache_lock); 6845 6846 down_write(&block_group->space_info->groups_sem); 6847 list_del(&block_group->list); 6848 up_write(&block_group->space_info->groups_sem); 6849 6850 if (block_group->cached == BTRFS_CACHE_STARTED) 6851 wait_block_group_cache_done(block_group); 6852 6853 /* 6854 * We haven't cached this block group, which means we could 6855 * possibly have excluded extents on this block group. 6856 */ 6857 if (block_group->cached == BTRFS_CACHE_NO) 6858 free_excluded_extents(info->extent_root, block_group); 6859 6860 btrfs_remove_free_space_cache(block_group); 6861 btrfs_put_block_group(block_group); 6862 6863 spin_lock(&info->block_group_cache_lock); 6864 } 6865 spin_unlock(&info->block_group_cache_lock); 6866 6867 /* now that all the block groups are freed, go through and 6868 * free all the space_info structs. This is only called during 6869 * the final stages of unmount, and so we know nobody is 6870 * using them. We call synchronize_rcu() once before we start, 6871 * just to be on the safe side. 6872 */ 6873 synchronize_rcu(); 6874 6875 release_global_block_rsv(info); 6876 6877 while(!list_empty(&info->space_info)) { 6878 space_info = list_entry(info->space_info.next, 6879 struct btrfs_space_info, 6880 list); 6881 if (space_info->bytes_pinned > 0 || 6882 space_info->bytes_reserved > 0) { 6883 WARN_ON(1); 6884 dump_space_info(space_info, 0, 0); 6885 } 6886 list_del(&space_info->list); 6887 kfree(space_info); 6888 } 6889 return 0; 6890 } 6891 6892 static void __link_block_group(struct btrfs_space_info *space_info, 6893 struct btrfs_block_group_cache *cache) 6894 { 6895 int index = get_block_group_index(cache); 6896 6897 down_write(&space_info->groups_sem); 6898 list_add_tail(&cache->list, &space_info->block_groups[index]); 6899 up_write(&space_info->groups_sem); 6900 } 6901 6902 int btrfs_read_block_groups(struct btrfs_root *root) 6903 { 6904 struct btrfs_path *path; 6905 int ret; 6906 struct btrfs_block_group_cache *cache; 6907 struct btrfs_fs_info *info = root->fs_info; 6908 struct btrfs_space_info *space_info; 6909 struct btrfs_key key; 6910 struct btrfs_key found_key; 6911 struct extent_buffer *leaf; 6912 int need_clear = 0; 6913 u64 cache_gen; 6914 6915 root = info->extent_root; 6916 key.objectid = 0; 6917 key.offset = 0; 6918 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 6919 path = btrfs_alloc_path(); 6920 if (!path) 6921 return -ENOMEM; 6922 path->reada = 1; 6923 6924 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 6925 if (cache_gen != 0 && 6926 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) 6927 need_clear = 1; 6928 if (btrfs_test_opt(root, CLEAR_CACHE)) 6929 need_clear = 1; 6930 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) 6931 printk(KERN_INFO "btrfs: disk space caching is enabled\n"); 6932 6933 while (1) { 6934 ret = find_first_block_group(root, path, &key); 6935 if (ret > 0) 6936 break; 6937 if (ret != 0) 6938 goto error; 6939 leaf = path->nodes[0]; 6940 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6941 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6942 if (!cache) { 6943 ret = -ENOMEM; 6944 goto error; 6945 } 6946 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 6947 GFP_NOFS); 6948 if (!cache->free_space_ctl) { 6949 kfree(cache); 6950 ret = -ENOMEM; 6951 goto error; 6952 } 6953 6954 atomic_set(&cache->count, 1); 6955 spin_lock_init(&cache->lock); 6956 cache->fs_info = info; 6957 INIT_LIST_HEAD(&cache->list); 6958 INIT_LIST_HEAD(&cache->cluster_list); 6959 6960 if (need_clear) 6961 cache->disk_cache_state = BTRFS_DC_CLEAR; 6962 6963 read_extent_buffer(leaf, &cache->item, 6964 btrfs_item_ptr_offset(leaf, path->slots[0]), 6965 sizeof(cache->item)); 6966 memcpy(&cache->key, &found_key, sizeof(found_key)); 6967 6968 key.objectid = found_key.objectid + found_key.offset; 6969 btrfs_release_path(path); 6970 cache->flags = btrfs_block_group_flags(&cache->item); 6971 cache->sectorsize = root->sectorsize; 6972 6973 btrfs_init_free_space_ctl(cache); 6974 6975 /* 6976 * We need to exclude the super stripes now so that the space 6977 * info has super bytes accounted for, otherwise we'll think 6978 * we have more space than we actually do. 6979 */ 6980 exclude_super_stripes(root, cache); 6981 6982 /* 6983 * check for two cases, either we are full, and therefore 6984 * don't need to bother with the caching work since we won't 6985 * find any space, or we are empty, and we can just add all 6986 * the space in and be done with it. This saves us _alot_ of 6987 * time, particularly in the full case. 6988 */ 6989 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 6990 cache->last_byte_to_unpin = (u64)-1; 6991 cache->cached = BTRFS_CACHE_FINISHED; 6992 free_excluded_extents(root, cache); 6993 } else if (btrfs_block_group_used(&cache->item) == 0) { 6994 cache->last_byte_to_unpin = (u64)-1; 6995 cache->cached = BTRFS_CACHE_FINISHED; 6996 add_new_free_space(cache, root->fs_info, 6997 found_key.objectid, 6998 found_key.objectid + 6999 found_key.offset); 7000 free_excluded_extents(root, cache); 7001 } 7002 7003 ret = update_space_info(info, cache->flags, found_key.offset, 7004 btrfs_block_group_used(&cache->item), 7005 &space_info); 7006 BUG_ON(ret); 7007 cache->space_info = space_info; 7008 spin_lock(&cache->space_info->lock); 7009 cache->space_info->bytes_readonly += cache->bytes_super; 7010 spin_unlock(&cache->space_info->lock); 7011 7012 __link_block_group(space_info, cache); 7013 7014 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7015 BUG_ON(ret); 7016 7017 set_avail_alloc_bits(root->fs_info, cache->flags); 7018 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7019 set_block_group_ro(cache); 7020 } 7021 7022 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { 7023 if (!(get_alloc_profile(root, space_info->flags) & 7024 (BTRFS_BLOCK_GROUP_RAID10 | 7025 BTRFS_BLOCK_GROUP_RAID1 | 7026 BTRFS_BLOCK_GROUP_DUP))) 7027 continue; 7028 /* 7029 * avoid allocating from un-mirrored block group if there are 7030 * mirrored block groups. 7031 */ 7032 list_for_each_entry(cache, &space_info->block_groups[3], list) 7033 set_block_group_ro(cache); 7034 list_for_each_entry(cache, &space_info->block_groups[4], list) 7035 set_block_group_ro(cache); 7036 } 7037 7038 init_global_block_rsv(info); 7039 ret = 0; 7040 error: 7041 btrfs_free_path(path); 7042 return ret; 7043 } 7044 7045 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7046 struct btrfs_root *root, u64 bytes_used, 7047 u64 type, u64 chunk_objectid, u64 chunk_offset, 7048 u64 size) 7049 { 7050 int ret; 7051 struct btrfs_root *extent_root; 7052 struct btrfs_block_group_cache *cache; 7053 7054 extent_root = root->fs_info->extent_root; 7055 7056 root->fs_info->last_trans_log_full_commit = trans->transid; 7057 7058 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7059 if (!cache) 7060 return -ENOMEM; 7061 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 7062 GFP_NOFS); 7063 if (!cache->free_space_ctl) { 7064 kfree(cache); 7065 return -ENOMEM; 7066 } 7067 7068 cache->key.objectid = chunk_offset; 7069 cache->key.offset = size; 7070 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7071 cache->sectorsize = root->sectorsize; 7072 cache->fs_info = root->fs_info; 7073 7074 atomic_set(&cache->count, 1); 7075 spin_lock_init(&cache->lock); 7076 INIT_LIST_HEAD(&cache->list); 7077 INIT_LIST_HEAD(&cache->cluster_list); 7078 7079 btrfs_init_free_space_ctl(cache); 7080 7081 btrfs_set_block_group_used(&cache->item, bytes_used); 7082 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7083 cache->flags = type; 7084 btrfs_set_block_group_flags(&cache->item, type); 7085 7086 cache->last_byte_to_unpin = (u64)-1; 7087 cache->cached = BTRFS_CACHE_FINISHED; 7088 exclude_super_stripes(root, cache); 7089 7090 add_new_free_space(cache, root->fs_info, chunk_offset, 7091 chunk_offset + size); 7092 7093 free_excluded_extents(root, cache); 7094 7095 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7096 &cache->space_info); 7097 BUG_ON(ret); 7098 7099 spin_lock(&cache->space_info->lock); 7100 cache->space_info->bytes_readonly += cache->bytes_super; 7101 spin_unlock(&cache->space_info->lock); 7102 7103 __link_block_group(cache->space_info, cache); 7104 7105 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7106 BUG_ON(ret); 7107 7108 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7109 sizeof(cache->item)); 7110 BUG_ON(ret); 7111 7112 set_avail_alloc_bits(extent_root->fs_info, type); 7113 7114 return 0; 7115 } 7116 7117 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7118 struct btrfs_root *root, u64 group_start) 7119 { 7120 struct btrfs_path *path; 7121 struct btrfs_block_group_cache *block_group; 7122 struct btrfs_free_cluster *cluster; 7123 struct btrfs_root *tree_root = root->fs_info->tree_root; 7124 struct btrfs_key key; 7125 struct inode *inode; 7126 int ret; 7127 int factor; 7128 7129 root = root->fs_info->extent_root; 7130 7131 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7132 BUG_ON(!block_group); 7133 BUG_ON(!block_group->ro); 7134 7135 /* 7136 * Free the reserved super bytes from this block group before 7137 * remove it. 7138 */ 7139 free_excluded_extents(root, block_group); 7140 7141 memcpy(&key, &block_group->key, sizeof(key)); 7142 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 7143 BTRFS_BLOCK_GROUP_RAID1 | 7144 BTRFS_BLOCK_GROUP_RAID10)) 7145 factor = 2; 7146 else 7147 factor = 1; 7148 7149 /* make sure this block group isn't part of an allocation cluster */ 7150 cluster = &root->fs_info->data_alloc_cluster; 7151 spin_lock(&cluster->refill_lock); 7152 btrfs_return_cluster_to_free_space(block_group, cluster); 7153 spin_unlock(&cluster->refill_lock); 7154 7155 /* 7156 * make sure this block group isn't part of a metadata 7157 * allocation cluster 7158 */ 7159 cluster = &root->fs_info->meta_alloc_cluster; 7160 spin_lock(&cluster->refill_lock); 7161 btrfs_return_cluster_to_free_space(block_group, cluster); 7162 spin_unlock(&cluster->refill_lock); 7163 7164 path = btrfs_alloc_path(); 7165 BUG_ON(!path); 7166 7167 inode = lookup_free_space_inode(root, block_group, path); 7168 if (!IS_ERR(inode)) { 7169 btrfs_orphan_add(trans, inode); 7170 clear_nlink(inode); 7171 /* One for the block groups ref */ 7172 spin_lock(&block_group->lock); 7173 if (block_group->iref) { 7174 block_group->iref = 0; 7175 block_group->inode = NULL; 7176 spin_unlock(&block_group->lock); 7177 iput(inode); 7178 } else { 7179 spin_unlock(&block_group->lock); 7180 } 7181 /* One for our lookup ref */ 7182 iput(inode); 7183 } 7184 7185 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 7186 key.offset = block_group->key.objectid; 7187 key.type = 0; 7188 7189 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); 7190 if (ret < 0) 7191 goto out; 7192 if (ret > 0) 7193 btrfs_release_path(path); 7194 if (ret == 0) { 7195 ret = btrfs_del_item(trans, tree_root, path); 7196 if (ret) 7197 goto out; 7198 btrfs_release_path(path); 7199 } 7200 7201 spin_lock(&root->fs_info->block_group_cache_lock); 7202 rb_erase(&block_group->cache_node, 7203 &root->fs_info->block_group_cache_tree); 7204 spin_unlock(&root->fs_info->block_group_cache_lock); 7205 7206 down_write(&block_group->space_info->groups_sem); 7207 /* 7208 * we must use list_del_init so people can check to see if they 7209 * are still on the list after taking the semaphore 7210 */ 7211 list_del_init(&block_group->list); 7212 up_write(&block_group->space_info->groups_sem); 7213 7214 if (block_group->cached == BTRFS_CACHE_STARTED) 7215 wait_block_group_cache_done(block_group); 7216 7217 btrfs_remove_free_space_cache(block_group); 7218 7219 spin_lock(&block_group->space_info->lock); 7220 block_group->space_info->total_bytes -= block_group->key.offset; 7221 block_group->space_info->bytes_readonly -= block_group->key.offset; 7222 block_group->space_info->disk_total -= block_group->key.offset * factor; 7223 spin_unlock(&block_group->space_info->lock); 7224 7225 memcpy(&key, &block_group->key, sizeof(key)); 7226 7227 btrfs_clear_space_info_full(root->fs_info); 7228 7229 btrfs_put_block_group(block_group); 7230 btrfs_put_block_group(block_group); 7231 7232 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7233 if (ret > 0) 7234 ret = -EIO; 7235 if (ret < 0) 7236 goto out; 7237 7238 ret = btrfs_del_item(trans, root, path); 7239 out: 7240 btrfs_free_path(path); 7241 return ret; 7242 } 7243 7244 int btrfs_init_space_info(struct btrfs_fs_info *fs_info) 7245 { 7246 struct btrfs_space_info *space_info; 7247 struct btrfs_super_block *disk_super; 7248 u64 features; 7249 u64 flags; 7250 int mixed = 0; 7251 int ret; 7252 7253 disk_super = &fs_info->super_copy; 7254 if (!btrfs_super_root(disk_super)) 7255 return 1; 7256 7257 features = btrfs_super_incompat_flags(disk_super); 7258 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 7259 mixed = 1; 7260 7261 flags = BTRFS_BLOCK_GROUP_SYSTEM; 7262 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7263 if (ret) 7264 goto out; 7265 7266 if (mixed) { 7267 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; 7268 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7269 } else { 7270 flags = BTRFS_BLOCK_GROUP_METADATA; 7271 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7272 if (ret) 7273 goto out; 7274 7275 flags = BTRFS_BLOCK_GROUP_DATA; 7276 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7277 } 7278 out: 7279 return ret; 7280 } 7281 7282 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 7283 { 7284 return unpin_extent_range(root, start, end); 7285 } 7286 7287 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 7288 u64 num_bytes, u64 *actual_bytes) 7289 { 7290 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); 7291 } 7292 7293 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) 7294 { 7295 struct btrfs_fs_info *fs_info = root->fs_info; 7296 struct btrfs_block_group_cache *cache = NULL; 7297 u64 group_trimmed; 7298 u64 start; 7299 u64 end; 7300 u64 trimmed = 0; 7301 int ret = 0; 7302 7303 cache = btrfs_lookup_block_group(fs_info, range->start); 7304 7305 while (cache) { 7306 if (cache->key.objectid >= (range->start + range->len)) { 7307 btrfs_put_block_group(cache); 7308 break; 7309 } 7310 7311 start = max(range->start, cache->key.objectid); 7312 end = min(range->start + range->len, 7313 cache->key.objectid + cache->key.offset); 7314 7315 if (end - start >= range->minlen) { 7316 if (!block_group_cache_done(cache)) { 7317 ret = cache_block_group(cache, NULL, root, 0); 7318 if (!ret) 7319 wait_block_group_cache_done(cache); 7320 } 7321 ret = btrfs_trim_block_group(cache, 7322 &group_trimmed, 7323 start, 7324 end, 7325 range->minlen); 7326 7327 trimmed += group_trimmed; 7328 if (ret) { 7329 btrfs_put_block_group(cache); 7330 break; 7331 } 7332 } 7333 7334 cache = next_block_group(fs_info->tree_root, cache); 7335 } 7336 7337 range->len = trimmed; 7338 return ret; 7339 } 7340