1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include "compat.h" 26 #include "hash.h" 27 #include "ctree.h" 28 #include "disk-io.h" 29 #include "print-tree.h" 30 #include "transaction.h" 31 #include "volumes.h" 32 #include "locking.h" 33 #include "free-space-cache.h" 34 35 static int update_block_group(struct btrfs_trans_handle *trans, 36 struct btrfs_root *root, 37 u64 bytenr, u64 num_bytes, int alloc, 38 int mark_free); 39 static int update_reserved_extents(struct btrfs_block_group_cache *cache, 40 u64 num_bytes, int reserve); 41 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 42 struct btrfs_root *root, 43 u64 bytenr, u64 num_bytes, u64 parent, 44 u64 root_objectid, u64 owner_objectid, 45 u64 owner_offset, int refs_to_drop, 46 struct btrfs_delayed_extent_op *extra_op); 47 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 48 struct extent_buffer *leaf, 49 struct btrfs_extent_item *ei); 50 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 51 struct btrfs_root *root, 52 u64 parent, u64 root_objectid, 53 u64 flags, u64 owner, u64 offset, 54 struct btrfs_key *ins, int ref_mod); 55 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 parent, u64 root_objectid, 58 u64 flags, struct btrfs_disk_key *key, 59 int level, struct btrfs_key *ins); 60 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 61 struct btrfs_root *extent_root, u64 alloc_bytes, 62 u64 flags, int force); 63 static int pin_down_bytes(struct btrfs_trans_handle *trans, 64 struct btrfs_root *root, 65 struct btrfs_path *path, 66 u64 bytenr, u64 num_bytes, 67 int is_data, int reserved, 68 struct extent_buffer **must_clean); 69 static int find_next_key(struct btrfs_path *path, int level, 70 struct btrfs_key *key); 71 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 72 int dump_block_groups); 73 74 static noinline int 75 block_group_cache_done(struct btrfs_block_group_cache *cache) 76 { 77 smp_mb(); 78 return cache->cached == BTRFS_CACHE_FINISHED; 79 } 80 81 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 82 { 83 return (cache->flags & bits) == bits; 84 } 85 86 /* 87 * this adds the block group to the fs_info rb tree for the block group 88 * cache 89 */ 90 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 91 struct btrfs_block_group_cache *block_group) 92 { 93 struct rb_node **p; 94 struct rb_node *parent = NULL; 95 struct btrfs_block_group_cache *cache; 96 97 spin_lock(&info->block_group_cache_lock); 98 p = &info->block_group_cache_tree.rb_node; 99 100 while (*p) { 101 parent = *p; 102 cache = rb_entry(parent, struct btrfs_block_group_cache, 103 cache_node); 104 if (block_group->key.objectid < cache->key.objectid) { 105 p = &(*p)->rb_left; 106 } else if (block_group->key.objectid > cache->key.objectid) { 107 p = &(*p)->rb_right; 108 } else { 109 spin_unlock(&info->block_group_cache_lock); 110 return -EEXIST; 111 } 112 } 113 114 rb_link_node(&block_group->cache_node, parent, p); 115 rb_insert_color(&block_group->cache_node, 116 &info->block_group_cache_tree); 117 spin_unlock(&info->block_group_cache_lock); 118 119 return 0; 120 } 121 122 /* 123 * This will return the block group at or after bytenr if contains is 0, else 124 * it will return the block group that contains the bytenr 125 */ 126 static struct btrfs_block_group_cache * 127 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 128 int contains) 129 { 130 struct btrfs_block_group_cache *cache, *ret = NULL; 131 struct rb_node *n; 132 u64 end, start; 133 134 spin_lock(&info->block_group_cache_lock); 135 n = info->block_group_cache_tree.rb_node; 136 137 while (n) { 138 cache = rb_entry(n, struct btrfs_block_group_cache, 139 cache_node); 140 end = cache->key.objectid + cache->key.offset - 1; 141 start = cache->key.objectid; 142 143 if (bytenr < start) { 144 if (!contains && (!ret || start < ret->key.objectid)) 145 ret = cache; 146 n = n->rb_left; 147 } else if (bytenr > start) { 148 if (contains && bytenr <= end) { 149 ret = cache; 150 break; 151 } 152 n = n->rb_right; 153 } else { 154 ret = cache; 155 break; 156 } 157 } 158 if (ret) 159 atomic_inc(&ret->count); 160 spin_unlock(&info->block_group_cache_lock); 161 162 return ret; 163 } 164 165 static int add_excluded_extent(struct btrfs_root *root, 166 u64 start, u64 num_bytes) 167 { 168 u64 end = start + num_bytes - 1; 169 set_extent_bits(&root->fs_info->freed_extents[0], 170 start, end, EXTENT_UPTODATE, GFP_NOFS); 171 set_extent_bits(&root->fs_info->freed_extents[1], 172 start, end, EXTENT_UPTODATE, GFP_NOFS); 173 return 0; 174 } 175 176 static void free_excluded_extents(struct btrfs_root *root, 177 struct btrfs_block_group_cache *cache) 178 { 179 u64 start, end; 180 181 start = cache->key.objectid; 182 end = start + cache->key.offset - 1; 183 184 clear_extent_bits(&root->fs_info->freed_extents[0], 185 start, end, EXTENT_UPTODATE, GFP_NOFS); 186 clear_extent_bits(&root->fs_info->freed_extents[1], 187 start, end, EXTENT_UPTODATE, GFP_NOFS); 188 } 189 190 static int exclude_super_stripes(struct btrfs_root *root, 191 struct btrfs_block_group_cache *cache) 192 { 193 u64 bytenr; 194 u64 *logical; 195 int stripe_len; 196 int i, nr, ret; 197 198 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { 199 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; 200 cache->bytes_super += stripe_len; 201 ret = add_excluded_extent(root, cache->key.objectid, 202 stripe_len); 203 BUG_ON(ret); 204 } 205 206 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 207 bytenr = btrfs_sb_offset(i); 208 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 209 cache->key.objectid, bytenr, 210 0, &logical, &nr, &stripe_len); 211 BUG_ON(ret); 212 213 while (nr--) { 214 cache->bytes_super += stripe_len; 215 ret = add_excluded_extent(root, logical[nr], 216 stripe_len); 217 BUG_ON(ret); 218 } 219 220 kfree(logical); 221 } 222 return 0; 223 } 224 225 static struct btrfs_caching_control * 226 get_caching_control(struct btrfs_block_group_cache *cache) 227 { 228 struct btrfs_caching_control *ctl; 229 230 spin_lock(&cache->lock); 231 if (cache->cached != BTRFS_CACHE_STARTED) { 232 spin_unlock(&cache->lock); 233 return NULL; 234 } 235 236 ctl = cache->caching_ctl; 237 atomic_inc(&ctl->count); 238 spin_unlock(&cache->lock); 239 return ctl; 240 } 241 242 static void put_caching_control(struct btrfs_caching_control *ctl) 243 { 244 if (atomic_dec_and_test(&ctl->count)) 245 kfree(ctl); 246 } 247 248 /* 249 * this is only called by cache_block_group, since we could have freed extents 250 * we need to check the pinned_extents for any extents that can't be used yet 251 * since their free space will be released as soon as the transaction commits. 252 */ 253 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 254 struct btrfs_fs_info *info, u64 start, u64 end) 255 { 256 u64 extent_start, extent_end, size, total_added = 0; 257 int ret; 258 259 while (start < end) { 260 ret = find_first_extent_bit(info->pinned_extents, start, 261 &extent_start, &extent_end, 262 EXTENT_DIRTY | EXTENT_UPTODATE); 263 if (ret) 264 break; 265 266 if (extent_start <= start) { 267 start = extent_end + 1; 268 } else if (extent_start > start && extent_start < end) { 269 size = extent_start - start; 270 total_added += size; 271 ret = btrfs_add_free_space(block_group, start, 272 size); 273 BUG_ON(ret); 274 start = extent_end + 1; 275 } else { 276 break; 277 } 278 } 279 280 if (start < end) { 281 size = end - start; 282 total_added += size; 283 ret = btrfs_add_free_space(block_group, start, size); 284 BUG_ON(ret); 285 } 286 287 return total_added; 288 } 289 290 static int caching_kthread(void *data) 291 { 292 struct btrfs_block_group_cache *block_group = data; 293 struct btrfs_fs_info *fs_info = block_group->fs_info; 294 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 295 struct btrfs_root *extent_root = fs_info->extent_root; 296 struct btrfs_path *path; 297 struct extent_buffer *leaf; 298 struct btrfs_key key; 299 u64 total_found = 0; 300 u64 last = 0; 301 u32 nritems; 302 int ret = 0; 303 304 path = btrfs_alloc_path(); 305 if (!path) 306 return -ENOMEM; 307 308 exclude_super_stripes(extent_root, block_group); 309 spin_lock(&block_group->space_info->lock); 310 block_group->space_info->bytes_super += block_group->bytes_super; 311 spin_unlock(&block_group->space_info->lock); 312 313 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 314 315 /* 316 * We don't want to deadlock with somebody trying to allocate a new 317 * extent for the extent root while also trying to search the extent 318 * root to add free space. So we skip locking and search the commit 319 * root, since its read-only 320 */ 321 path->skip_locking = 1; 322 path->search_commit_root = 1; 323 path->reada = 2; 324 325 key.objectid = last; 326 key.offset = 0; 327 key.type = BTRFS_EXTENT_ITEM_KEY; 328 again: 329 mutex_lock(&caching_ctl->mutex); 330 /* need to make sure the commit_root doesn't disappear */ 331 down_read(&fs_info->extent_commit_sem); 332 333 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 334 if (ret < 0) 335 goto err; 336 337 leaf = path->nodes[0]; 338 nritems = btrfs_header_nritems(leaf); 339 340 while (1) { 341 smp_mb(); 342 if (fs_info->closing > 1) { 343 last = (u64)-1; 344 break; 345 } 346 347 if (path->slots[0] < nritems) { 348 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 349 } else { 350 ret = find_next_key(path, 0, &key); 351 if (ret) 352 break; 353 354 caching_ctl->progress = last; 355 btrfs_release_path(extent_root, path); 356 up_read(&fs_info->extent_commit_sem); 357 mutex_unlock(&caching_ctl->mutex); 358 if (btrfs_transaction_in_commit(fs_info)) 359 schedule_timeout(1); 360 else 361 cond_resched(); 362 goto again; 363 } 364 365 if (key.objectid < block_group->key.objectid) { 366 path->slots[0]++; 367 continue; 368 } 369 370 if (key.objectid >= block_group->key.objectid + 371 block_group->key.offset) 372 break; 373 374 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 375 total_found += add_new_free_space(block_group, 376 fs_info, last, 377 key.objectid); 378 last = key.objectid + key.offset; 379 380 if (total_found > (1024 * 1024 * 2)) { 381 total_found = 0; 382 wake_up(&caching_ctl->wait); 383 } 384 } 385 path->slots[0]++; 386 } 387 ret = 0; 388 389 total_found += add_new_free_space(block_group, fs_info, last, 390 block_group->key.objectid + 391 block_group->key.offset); 392 caching_ctl->progress = (u64)-1; 393 394 spin_lock(&block_group->lock); 395 block_group->caching_ctl = NULL; 396 block_group->cached = BTRFS_CACHE_FINISHED; 397 spin_unlock(&block_group->lock); 398 399 err: 400 btrfs_free_path(path); 401 up_read(&fs_info->extent_commit_sem); 402 403 free_excluded_extents(extent_root, block_group); 404 405 mutex_unlock(&caching_ctl->mutex); 406 wake_up(&caching_ctl->wait); 407 408 put_caching_control(caching_ctl); 409 atomic_dec(&block_group->space_info->caching_threads); 410 return 0; 411 } 412 413 static int cache_block_group(struct btrfs_block_group_cache *cache) 414 { 415 struct btrfs_fs_info *fs_info = cache->fs_info; 416 struct btrfs_caching_control *caching_ctl; 417 struct task_struct *tsk; 418 int ret = 0; 419 420 smp_mb(); 421 if (cache->cached != BTRFS_CACHE_NO) 422 return 0; 423 424 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 425 BUG_ON(!caching_ctl); 426 427 INIT_LIST_HEAD(&caching_ctl->list); 428 mutex_init(&caching_ctl->mutex); 429 init_waitqueue_head(&caching_ctl->wait); 430 caching_ctl->block_group = cache; 431 caching_ctl->progress = cache->key.objectid; 432 /* one for caching kthread, one for caching block group list */ 433 atomic_set(&caching_ctl->count, 2); 434 435 spin_lock(&cache->lock); 436 if (cache->cached != BTRFS_CACHE_NO) { 437 spin_unlock(&cache->lock); 438 kfree(caching_ctl); 439 return 0; 440 } 441 cache->caching_ctl = caching_ctl; 442 cache->cached = BTRFS_CACHE_STARTED; 443 spin_unlock(&cache->lock); 444 445 down_write(&fs_info->extent_commit_sem); 446 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 447 up_write(&fs_info->extent_commit_sem); 448 449 atomic_inc(&cache->space_info->caching_threads); 450 451 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 452 cache->key.objectid); 453 if (IS_ERR(tsk)) { 454 ret = PTR_ERR(tsk); 455 printk(KERN_ERR "error running thread %d\n", ret); 456 BUG(); 457 } 458 459 return ret; 460 } 461 462 /* 463 * return the block group that starts at or after bytenr 464 */ 465 static struct btrfs_block_group_cache * 466 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 467 { 468 struct btrfs_block_group_cache *cache; 469 470 cache = block_group_cache_tree_search(info, bytenr, 0); 471 472 return cache; 473 } 474 475 /* 476 * return the block group that contains the given bytenr 477 */ 478 struct btrfs_block_group_cache *btrfs_lookup_block_group( 479 struct btrfs_fs_info *info, 480 u64 bytenr) 481 { 482 struct btrfs_block_group_cache *cache; 483 484 cache = block_group_cache_tree_search(info, bytenr, 1); 485 486 return cache; 487 } 488 489 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 490 { 491 if (atomic_dec_and_test(&cache->count)) 492 kfree(cache); 493 } 494 495 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 496 u64 flags) 497 { 498 struct list_head *head = &info->space_info; 499 struct btrfs_space_info *found; 500 501 rcu_read_lock(); 502 list_for_each_entry_rcu(found, head, list) { 503 if (found->flags == flags) { 504 rcu_read_unlock(); 505 return found; 506 } 507 } 508 rcu_read_unlock(); 509 return NULL; 510 } 511 512 /* 513 * after adding space to the filesystem, we need to clear the full flags 514 * on all the space infos. 515 */ 516 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 517 { 518 struct list_head *head = &info->space_info; 519 struct btrfs_space_info *found; 520 521 rcu_read_lock(); 522 list_for_each_entry_rcu(found, head, list) 523 found->full = 0; 524 rcu_read_unlock(); 525 } 526 527 static u64 div_factor(u64 num, int factor) 528 { 529 if (factor == 10) 530 return num; 531 num *= factor; 532 do_div(num, 10); 533 return num; 534 } 535 536 u64 btrfs_find_block_group(struct btrfs_root *root, 537 u64 search_start, u64 search_hint, int owner) 538 { 539 struct btrfs_block_group_cache *cache; 540 u64 used; 541 u64 last = max(search_hint, search_start); 542 u64 group_start = 0; 543 int full_search = 0; 544 int factor = 9; 545 int wrapped = 0; 546 again: 547 while (1) { 548 cache = btrfs_lookup_first_block_group(root->fs_info, last); 549 if (!cache) 550 break; 551 552 spin_lock(&cache->lock); 553 last = cache->key.objectid + cache->key.offset; 554 used = btrfs_block_group_used(&cache->item); 555 556 if ((full_search || !cache->ro) && 557 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 558 if (used + cache->pinned + cache->reserved < 559 div_factor(cache->key.offset, factor)) { 560 group_start = cache->key.objectid; 561 spin_unlock(&cache->lock); 562 btrfs_put_block_group(cache); 563 goto found; 564 } 565 } 566 spin_unlock(&cache->lock); 567 btrfs_put_block_group(cache); 568 cond_resched(); 569 } 570 if (!wrapped) { 571 last = search_start; 572 wrapped = 1; 573 goto again; 574 } 575 if (!full_search && factor < 10) { 576 last = search_start; 577 full_search = 1; 578 factor = 10; 579 goto again; 580 } 581 found: 582 return group_start; 583 } 584 585 /* simple helper to search for an existing extent at a given offset */ 586 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 587 { 588 int ret; 589 struct btrfs_key key; 590 struct btrfs_path *path; 591 592 path = btrfs_alloc_path(); 593 BUG_ON(!path); 594 key.objectid = start; 595 key.offset = len; 596 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 597 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 598 0, 0); 599 btrfs_free_path(path); 600 return ret; 601 } 602 603 /* 604 * Back reference rules. Back refs have three main goals: 605 * 606 * 1) differentiate between all holders of references to an extent so that 607 * when a reference is dropped we can make sure it was a valid reference 608 * before freeing the extent. 609 * 610 * 2) Provide enough information to quickly find the holders of an extent 611 * if we notice a given block is corrupted or bad. 612 * 613 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 614 * maintenance. This is actually the same as #2, but with a slightly 615 * different use case. 616 * 617 * There are two kinds of back refs. The implicit back refs is optimized 618 * for pointers in non-shared tree blocks. For a given pointer in a block, 619 * back refs of this kind provide information about the block's owner tree 620 * and the pointer's key. These information allow us to find the block by 621 * b-tree searching. The full back refs is for pointers in tree blocks not 622 * referenced by their owner trees. The location of tree block is recorded 623 * in the back refs. Actually the full back refs is generic, and can be 624 * used in all cases the implicit back refs is used. The major shortcoming 625 * of the full back refs is its overhead. Every time a tree block gets 626 * COWed, we have to update back refs entry for all pointers in it. 627 * 628 * For a newly allocated tree block, we use implicit back refs for 629 * pointers in it. This means most tree related operations only involve 630 * implicit back refs. For a tree block created in old transaction, the 631 * only way to drop a reference to it is COW it. So we can detect the 632 * event that tree block loses its owner tree's reference and do the 633 * back refs conversion. 634 * 635 * When a tree block is COW'd through a tree, there are four cases: 636 * 637 * The reference count of the block is one and the tree is the block's 638 * owner tree. Nothing to do in this case. 639 * 640 * The reference count of the block is one and the tree is not the 641 * block's owner tree. In this case, full back refs is used for pointers 642 * in the block. Remove these full back refs, add implicit back refs for 643 * every pointers in the new block. 644 * 645 * The reference count of the block is greater than one and the tree is 646 * the block's owner tree. In this case, implicit back refs is used for 647 * pointers in the block. Add full back refs for every pointers in the 648 * block, increase lower level extents' reference counts. The original 649 * implicit back refs are entailed to the new block. 650 * 651 * The reference count of the block is greater than one and the tree is 652 * not the block's owner tree. Add implicit back refs for every pointer in 653 * the new block, increase lower level extents' reference count. 654 * 655 * Back Reference Key composing: 656 * 657 * The key objectid corresponds to the first byte in the extent, 658 * The key type is used to differentiate between types of back refs. 659 * There are different meanings of the key offset for different types 660 * of back refs. 661 * 662 * File extents can be referenced by: 663 * 664 * - multiple snapshots, subvolumes, or different generations in one subvol 665 * - different files inside a single subvolume 666 * - different offsets inside a file (bookend extents in file.c) 667 * 668 * The extent ref structure for the implicit back refs has fields for: 669 * 670 * - Objectid of the subvolume root 671 * - objectid of the file holding the reference 672 * - original offset in the file 673 * - how many bookend extents 674 * 675 * The key offset for the implicit back refs is hash of the first 676 * three fields. 677 * 678 * The extent ref structure for the full back refs has field for: 679 * 680 * - number of pointers in the tree leaf 681 * 682 * The key offset for the implicit back refs is the first byte of 683 * the tree leaf 684 * 685 * When a file extent is allocated, The implicit back refs is used. 686 * the fields are filled in: 687 * 688 * (root_key.objectid, inode objectid, offset in file, 1) 689 * 690 * When a file extent is removed file truncation, we find the 691 * corresponding implicit back refs and check the following fields: 692 * 693 * (btrfs_header_owner(leaf), inode objectid, offset in file) 694 * 695 * Btree extents can be referenced by: 696 * 697 * - Different subvolumes 698 * 699 * Both the implicit back refs and the full back refs for tree blocks 700 * only consist of key. The key offset for the implicit back refs is 701 * objectid of block's owner tree. The key offset for the full back refs 702 * is the first byte of parent block. 703 * 704 * When implicit back refs is used, information about the lowest key and 705 * level of the tree block are required. These information are stored in 706 * tree block info structure. 707 */ 708 709 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 710 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 711 struct btrfs_root *root, 712 struct btrfs_path *path, 713 u64 owner, u32 extra_size) 714 { 715 struct btrfs_extent_item *item; 716 struct btrfs_extent_item_v0 *ei0; 717 struct btrfs_extent_ref_v0 *ref0; 718 struct btrfs_tree_block_info *bi; 719 struct extent_buffer *leaf; 720 struct btrfs_key key; 721 struct btrfs_key found_key; 722 u32 new_size = sizeof(*item); 723 u64 refs; 724 int ret; 725 726 leaf = path->nodes[0]; 727 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 728 729 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 730 ei0 = btrfs_item_ptr(leaf, path->slots[0], 731 struct btrfs_extent_item_v0); 732 refs = btrfs_extent_refs_v0(leaf, ei0); 733 734 if (owner == (u64)-1) { 735 while (1) { 736 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 737 ret = btrfs_next_leaf(root, path); 738 if (ret < 0) 739 return ret; 740 BUG_ON(ret > 0); 741 leaf = path->nodes[0]; 742 } 743 btrfs_item_key_to_cpu(leaf, &found_key, 744 path->slots[0]); 745 BUG_ON(key.objectid != found_key.objectid); 746 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 747 path->slots[0]++; 748 continue; 749 } 750 ref0 = btrfs_item_ptr(leaf, path->slots[0], 751 struct btrfs_extent_ref_v0); 752 owner = btrfs_ref_objectid_v0(leaf, ref0); 753 break; 754 } 755 } 756 btrfs_release_path(root, path); 757 758 if (owner < BTRFS_FIRST_FREE_OBJECTID) 759 new_size += sizeof(*bi); 760 761 new_size -= sizeof(*ei0); 762 ret = btrfs_search_slot(trans, root, &key, path, 763 new_size + extra_size, 1); 764 if (ret < 0) 765 return ret; 766 BUG_ON(ret); 767 768 ret = btrfs_extend_item(trans, root, path, new_size); 769 BUG_ON(ret); 770 771 leaf = path->nodes[0]; 772 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 773 btrfs_set_extent_refs(leaf, item, refs); 774 /* FIXME: get real generation */ 775 btrfs_set_extent_generation(leaf, item, 0); 776 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 777 btrfs_set_extent_flags(leaf, item, 778 BTRFS_EXTENT_FLAG_TREE_BLOCK | 779 BTRFS_BLOCK_FLAG_FULL_BACKREF); 780 bi = (struct btrfs_tree_block_info *)(item + 1); 781 /* FIXME: get first key of the block */ 782 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 783 btrfs_set_tree_block_level(leaf, bi, (int)owner); 784 } else { 785 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 786 } 787 btrfs_mark_buffer_dirty(leaf); 788 return 0; 789 } 790 #endif 791 792 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 793 { 794 u32 high_crc = ~(u32)0; 795 u32 low_crc = ~(u32)0; 796 __le64 lenum; 797 798 lenum = cpu_to_le64(root_objectid); 799 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 800 lenum = cpu_to_le64(owner); 801 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 802 lenum = cpu_to_le64(offset); 803 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 804 805 return ((u64)high_crc << 31) ^ (u64)low_crc; 806 } 807 808 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 809 struct btrfs_extent_data_ref *ref) 810 { 811 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 812 btrfs_extent_data_ref_objectid(leaf, ref), 813 btrfs_extent_data_ref_offset(leaf, ref)); 814 } 815 816 static int match_extent_data_ref(struct extent_buffer *leaf, 817 struct btrfs_extent_data_ref *ref, 818 u64 root_objectid, u64 owner, u64 offset) 819 { 820 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 821 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 822 btrfs_extent_data_ref_offset(leaf, ref) != offset) 823 return 0; 824 return 1; 825 } 826 827 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 828 struct btrfs_root *root, 829 struct btrfs_path *path, 830 u64 bytenr, u64 parent, 831 u64 root_objectid, 832 u64 owner, u64 offset) 833 { 834 struct btrfs_key key; 835 struct btrfs_extent_data_ref *ref; 836 struct extent_buffer *leaf; 837 u32 nritems; 838 int ret; 839 int recow; 840 int err = -ENOENT; 841 842 key.objectid = bytenr; 843 if (parent) { 844 key.type = BTRFS_SHARED_DATA_REF_KEY; 845 key.offset = parent; 846 } else { 847 key.type = BTRFS_EXTENT_DATA_REF_KEY; 848 key.offset = hash_extent_data_ref(root_objectid, 849 owner, offset); 850 } 851 again: 852 recow = 0; 853 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 854 if (ret < 0) { 855 err = ret; 856 goto fail; 857 } 858 859 if (parent) { 860 if (!ret) 861 return 0; 862 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 863 key.type = BTRFS_EXTENT_REF_V0_KEY; 864 btrfs_release_path(root, path); 865 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 866 if (ret < 0) { 867 err = ret; 868 goto fail; 869 } 870 if (!ret) 871 return 0; 872 #endif 873 goto fail; 874 } 875 876 leaf = path->nodes[0]; 877 nritems = btrfs_header_nritems(leaf); 878 while (1) { 879 if (path->slots[0] >= nritems) { 880 ret = btrfs_next_leaf(root, path); 881 if (ret < 0) 882 err = ret; 883 if (ret) 884 goto fail; 885 886 leaf = path->nodes[0]; 887 nritems = btrfs_header_nritems(leaf); 888 recow = 1; 889 } 890 891 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 892 if (key.objectid != bytenr || 893 key.type != BTRFS_EXTENT_DATA_REF_KEY) 894 goto fail; 895 896 ref = btrfs_item_ptr(leaf, path->slots[0], 897 struct btrfs_extent_data_ref); 898 899 if (match_extent_data_ref(leaf, ref, root_objectid, 900 owner, offset)) { 901 if (recow) { 902 btrfs_release_path(root, path); 903 goto again; 904 } 905 err = 0; 906 break; 907 } 908 path->slots[0]++; 909 } 910 fail: 911 return err; 912 } 913 914 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 915 struct btrfs_root *root, 916 struct btrfs_path *path, 917 u64 bytenr, u64 parent, 918 u64 root_objectid, u64 owner, 919 u64 offset, int refs_to_add) 920 { 921 struct btrfs_key key; 922 struct extent_buffer *leaf; 923 u32 size; 924 u32 num_refs; 925 int ret; 926 927 key.objectid = bytenr; 928 if (parent) { 929 key.type = BTRFS_SHARED_DATA_REF_KEY; 930 key.offset = parent; 931 size = sizeof(struct btrfs_shared_data_ref); 932 } else { 933 key.type = BTRFS_EXTENT_DATA_REF_KEY; 934 key.offset = hash_extent_data_ref(root_objectid, 935 owner, offset); 936 size = sizeof(struct btrfs_extent_data_ref); 937 } 938 939 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 940 if (ret && ret != -EEXIST) 941 goto fail; 942 943 leaf = path->nodes[0]; 944 if (parent) { 945 struct btrfs_shared_data_ref *ref; 946 ref = btrfs_item_ptr(leaf, path->slots[0], 947 struct btrfs_shared_data_ref); 948 if (ret == 0) { 949 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 950 } else { 951 num_refs = btrfs_shared_data_ref_count(leaf, ref); 952 num_refs += refs_to_add; 953 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 954 } 955 } else { 956 struct btrfs_extent_data_ref *ref; 957 while (ret == -EEXIST) { 958 ref = btrfs_item_ptr(leaf, path->slots[0], 959 struct btrfs_extent_data_ref); 960 if (match_extent_data_ref(leaf, ref, root_objectid, 961 owner, offset)) 962 break; 963 btrfs_release_path(root, path); 964 key.offset++; 965 ret = btrfs_insert_empty_item(trans, root, path, &key, 966 size); 967 if (ret && ret != -EEXIST) 968 goto fail; 969 970 leaf = path->nodes[0]; 971 } 972 ref = btrfs_item_ptr(leaf, path->slots[0], 973 struct btrfs_extent_data_ref); 974 if (ret == 0) { 975 btrfs_set_extent_data_ref_root(leaf, ref, 976 root_objectid); 977 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 978 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 979 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 980 } else { 981 num_refs = btrfs_extent_data_ref_count(leaf, ref); 982 num_refs += refs_to_add; 983 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 984 } 985 } 986 btrfs_mark_buffer_dirty(leaf); 987 ret = 0; 988 fail: 989 btrfs_release_path(root, path); 990 return ret; 991 } 992 993 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 994 struct btrfs_root *root, 995 struct btrfs_path *path, 996 int refs_to_drop) 997 { 998 struct btrfs_key key; 999 struct btrfs_extent_data_ref *ref1 = NULL; 1000 struct btrfs_shared_data_ref *ref2 = NULL; 1001 struct extent_buffer *leaf; 1002 u32 num_refs = 0; 1003 int ret = 0; 1004 1005 leaf = path->nodes[0]; 1006 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1007 1008 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1009 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1010 struct btrfs_extent_data_ref); 1011 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1012 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1013 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1014 struct btrfs_shared_data_ref); 1015 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1016 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1017 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1018 struct btrfs_extent_ref_v0 *ref0; 1019 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1020 struct btrfs_extent_ref_v0); 1021 num_refs = btrfs_ref_count_v0(leaf, ref0); 1022 #endif 1023 } else { 1024 BUG(); 1025 } 1026 1027 BUG_ON(num_refs < refs_to_drop); 1028 num_refs -= refs_to_drop; 1029 1030 if (num_refs == 0) { 1031 ret = btrfs_del_item(trans, root, path); 1032 } else { 1033 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1034 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1035 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1036 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1037 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1038 else { 1039 struct btrfs_extent_ref_v0 *ref0; 1040 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1041 struct btrfs_extent_ref_v0); 1042 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 1043 } 1044 #endif 1045 btrfs_mark_buffer_dirty(leaf); 1046 } 1047 return ret; 1048 } 1049 1050 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 1051 struct btrfs_path *path, 1052 struct btrfs_extent_inline_ref *iref) 1053 { 1054 struct btrfs_key key; 1055 struct extent_buffer *leaf; 1056 struct btrfs_extent_data_ref *ref1; 1057 struct btrfs_shared_data_ref *ref2; 1058 u32 num_refs = 0; 1059 1060 leaf = path->nodes[0]; 1061 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1062 if (iref) { 1063 if (btrfs_extent_inline_ref_type(leaf, iref) == 1064 BTRFS_EXTENT_DATA_REF_KEY) { 1065 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 1066 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1067 } else { 1068 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 1069 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1070 } 1071 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1072 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1073 struct btrfs_extent_data_ref); 1074 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1075 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1076 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1077 struct btrfs_shared_data_ref); 1078 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1079 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1080 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1081 struct btrfs_extent_ref_v0 *ref0; 1082 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1083 struct btrfs_extent_ref_v0); 1084 num_refs = btrfs_ref_count_v0(leaf, ref0); 1085 #endif 1086 } else { 1087 WARN_ON(1); 1088 } 1089 return num_refs; 1090 } 1091 1092 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1093 struct btrfs_root *root, 1094 struct btrfs_path *path, 1095 u64 bytenr, u64 parent, 1096 u64 root_objectid) 1097 { 1098 struct btrfs_key key; 1099 int ret; 1100 1101 key.objectid = bytenr; 1102 if (parent) { 1103 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1104 key.offset = parent; 1105 } else { 1106 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1107 key.offset = root_objectid; 1108 } 1109 1110 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1111 if (ret > 0) 1112 ret = -ENOENT; 1113 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1114 if (ret == -ENOENT && parent) { 1115 btrfs_release_path(root, path); 1116 key.type = BTRFS_EXTENT_REF_V0_KEY; 1117 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1118 if (ret > 0) 1119 ret = -ENOENT; 1120 } 1121 #endif 1122 return ret; 1123 } 1124 1125 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1126 struct btrfs_root *root, 1127 struct btrfs_path *path, 1128 u64 bytenr, u64 parent, 1129 u64 root_objectid) 1130 { 1131 struct btrfs_key key; 1132 int ret; 1133 1134 key.objectid = bytenr; 1135 if (parent) { 1136 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1137 key.offset = parent; 1138 } else { 1139 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1140 key.offset = root_objectid; 1141 } 1142 1143 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1144 btrfs_release_path(root, path); 1145 return ret; 1146 } 1147 1148 static inline int extent_ref_type(u64 parent, u64 owner) 1149 { 1150 int type; 1151 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1152 if (parent > 0) 1153 type = BTRFS_SHARED_BLOCK_REF_KEY; 1154 else 1155 type = BTRFS_TREE_BLOCK_REF_KEY; 1156 } else { 1157 if (parent > 0) 1158 type = BTRFS_SHARED_DATA_REF_KEY; 1159 else 1160 type = BTRFS_EXTENT_DATA_REF_KEY; 1161 } 1162 return type; 1163 } 1164 1165 static int find_next_key(struct btrfs_path *path, int level, 1166 struct btrfs_key *key) 1167 1168 { 1169 for (; level < BTRFS_MAX_LEVEL; level++) { 1170 if (!path->nodes[level]) 1171 break; 1172 if (path->slots[level] + 1 >= 1173 btrfs_header_nritems(path->nodes[level])) 1174 continue; 1175 if (level == 0) 1176 btrfs_item_key_to_cpu(path->nodes[level], key, 1177 path->slots[level] + 1); 1178 else 1179 btrfs_node_key_to_cpu(path->nodes[level], key, 1180 path->slots[level] + 1); 1181 return 0; 1182 } 1183 return 1; 1184 } 1185 1186 /* 1187 * look for inline back ref. if back ref is found, *ref_ret is set 1188 * to the address of inline back ref, and 0 is returned. 1189 * 1190 * if back ref isn't found, *ref_ret is set to the address where it 1191 * should be inserted, and -ENOENT is returned. 1192 * 1193 * if insert is true and there are too many inline back refs, the path 1194 * points to the extent item, and -EAGAIN is returned. 1195 * 1196 * NOTE: inline back refs are ordered in the same way that back ref 1197 * items in the tree are ordered. 1198 */ 1199 static noinline_for_stack 1200 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1201 struct btrfs_root *root, 1202 struct btrfs_path *path, 1203 struct btrfs_extent_inline_ref **ref_ret, 1204 u64 bytenr, u64 num_bytes, 1205 u64 parent, u64 root_objectid, 1206 u64 owner, u64 offset, int insert) 1207 { 1208 struct btrfs_key key; 1209 struct extent_buffer *leaf; 1210 struct btrfs_extent_item *ei; 1211 struct btrfs_extent_inline_ref *iref; 1212 u64 flags; 1213 u64 item_size; 1214 unsigned long ptr; 1215 unsigned long end; 1216 int extra_size; 1217 int type; 1218 int want; 1219 int ret; 1220 int err = 0; 1221 1222 key.objectid = bytenr; 1223 key.type = BTRFS_EXTENT_ITEM_KEY; 1224 key.offset = num_bytes; 1225 1226 want = extent_ref_type(parent, owner); 1227 if (insert) { 1228 extra_size = btrfs_extent_inline_ref_size(want); 1229 path->keep_locks = 1; 1230 } else 1231 extra_size = -1; 1232 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1233 if (ret < 0) { 1234 err = ret; 1235 goto out; 1236 } 1237 BUG_ON(ret); 1238 1239 leaf = path->nodes[0]; 1240 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1241 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1242 if (item_size < sizeof(*ei)) { 1243 if (!insert) { 1244 err = -ENOENT; 1245 goto out; 1246 } 1247 ret = convert_extent_item_v0(trans, root, path, owner, 1248 extra_size); 1249 if (ret < 0) { 1250 err = ret; 1251 goto out; 1252 } 1253 leaf = path->nodes[0]; 1254 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1255 } 1256 #endif 1257 BUG_ON(item_size < sizeof(*ei)); 1258 1259 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1260 flags = btrfs_extent_flags(leaf, ei); 1261 1262 ptr = (unsigned long)(ei + 1); 1263 end = (unsigned long)ei + item_size; 1264 1265 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1266 ptr += sizeof(struct btrfs_tree_block_info); 1267 BUG_ON(ptr > end); 1268 } else { 1269 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1270 } 1271 1272 err = -ENOENT; 1273 while (1) { 1274 if (ptr >= end) { 1275 WARN_ON(ptr > end); 1276 break; 1277 } 1278 iref = (struct btrfs_extent_inline_ref *)ptr; 1279 type = btrfs_extent_inline_ref_type(leaf, iref); 1280 if (want < type) 1281 break; 1282 if (want > type) { 1283 ptr += btrfs_extent_inline_ref_size(type); 1284 continue; 1285 } 1286 1287 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1288 struct btrfs_extent_data_ref *dref; 1289 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1290 if (match_extent_data_ref(leaf, dref, root_objectid, 1291 owner, offset)) { 1292 err = 0; 1293 break; 1294 } 1295 if (hash_extent_data_ref_item(leaf, dref) < 1296 hash_extent_data_ref(root_objectid, owner, offset)) 1297 break; 1298 } else { 1299 u64 ref_offset; 1300 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1301 if (parent > 0) { 1302 if (parent == ref_offset) { 1303 err = 0; 1304 break; 1305 } 1306 if (ref_offset < parent) 1307 break; 1308 } else { 1309 if (root_objectid == ref_offset) { 1310 err = 0; 1311 break; 1312 } 1313 if (ref_offset < root_objectid) 1314 break; 1315 } 1316 } 1317 ptr += btrfs_extent_inline_ref_size(type); 1318 } 1319 if (err == -ENOENT && insert) { 1320 if (item_size + extra_size >= 1321 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1322 err = -EAGAIN; 1323 goto out; 1324 } 1325 /* 1326 * To add new inline back ref, we have to make sure 1327 * there is no corresponding back ref item. 1328 * For simplicity, we just do not add new inline back 1329 * ref if there is any kind of item for this block 1330 */ 1331 if (find_next_key(path, 0, &key) == 0 && 1332 key.objectid == bytenr && 1333 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1334 err = -EAGAIN; 1335 goto out; 1336 } 1337 } 1338 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1339 out: 1340 if (insert) { 1341 path->keep_locks = 0; 1342 btrfs_unlock_up_safe(path, 1); 1343 } 1344 return err; 1345 } 1346 1347 /* 1348 * helper to add new inline back ref 1349 */ 1350 static noinline_for_stack 1351 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1352 struct btrfs_root *root, 1353 struct btrfs_path *path, 1354 struct btrfs_extent_inline_ref *iref, 1355 u64 parent, u64 root_objectid, 1356 u64 owner, u64 offset, int refs_to_add, 1357 struct btrfs_delayed_extent_op *extent_op) 1358 { 1359 struct extent_buffer *leaf; 1360 struct btrfs_extent_item *ei; 1361 unsigned long ptr; 1362 unsigned long end; 1363 unsigned long item_offset; 1364 u64 refs; 1365 int size; 1366 int type; 1367 int ret; 1368 1369 leaf = path->nodes[0]; 1370 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1371 item_offset = (unsigned long)iref - (unsigned long)ei; 1372 1373 type = extent_ref_type(parent, owner); 1374 size = btrfs_extent_inline_ref_size(type); 1375 1376 ret = btrfs_extend_item(trans, root, path, size); 1377 BUG_ON(ret); 1378 1379 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1380 refs = btrfs_extent_refs(leaf, ei); 1381 refs += refs_to_add; 1382 btrfs_set_extent_refs(leaf, ei, refs); 1383 if (extent_op) 1384 __run_delayed_extent_op(extent_op, leaf, ei); 1385 1386 ptr = (unsigned long)ei + item_offset; 1387 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1388 if (ptr < end - size) 1389 memmove_extent_buffer(leaf, ptr + size, ptr, 1390 end - size - ptr); 1391 1392 iref = (struct btrfs_extent_inline_ref *)ptr; 1393 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1394 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1395 struct btrfs_extent_data_ref *dref; 1396 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1397 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1398 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1399 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1400 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1401 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1402 struct btrfs_shared_data_ref *sref; 1403 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1404 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1405 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1406 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1407 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1408 } else { 1409 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1410 } 1411 btrfs_mark_buffer_dirty(leaf); 1412 return 0; 1413 } 1414 1415 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1416 struct btrfs_root *root, 1417 struct btrfs_path *path, 1418 struct btrfs_extent_inline_ref **ref_ret, 1419 u64 bytenr, u64 num_bytes, u64 parent, 1420 u64 root_objectid, u64 owner, u64 offset) 1421 { 1422 int ret; 1423 1424 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1425 bytenr, num_bytes, parent, 1426 root_objectid, owner, offset, 0); 1427 if (ret != -ENOENT) 1428 return ret; 1429 1430 btrfs_release_path(root, path); 1431 *ref_ret = NULL; 1432 1433 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1434 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1435 root_objectid); 1436 } else { 1437 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1438 root_objectid, owner, offset); 1439 } 1440 return ret; 1441 } 1442 1443 /* 1444 * helper to update/remove inline back ref 1445 */ 1446 static noinline_for_stack 1447 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1448 struct btrfs_root *root, 1449 struct btrfs_path *path, 1450 struct btrfs_extent_inline_ref *iref, 1451 int refs_to_mod, 1452 struct btrfs_delayed_extent_op *extent_op) 1453 { 1454 struct extent_buffer *leaf; 1455 struct btrfs_extent_item *ei; 1456 struct btrfs_extent_data_ref *dref = NULL; 1457 struct btrfs_shared_data_ref *sref = NULL; 1458 unsigned long ptr; 1459 unsigned long end; 1460 u32 item_size; 1461 int size; 1462 int type; 1463 int ret; 1464 u64 refs; 1465 1466 leaf = path->nodes[0]; 1467 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1468 refs = btrfs_extent_refs(leaf, ei); 1469 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1470 refs += refs_to_mod; 1471 btrfs_set_extent_refs(leaf, ei, refs); 1472 if (extent_op) 1473 __run_delayed_extent_op(extent_op, leaf, ei); 1474 1475 type = btrfs_extent_inline_ref_type(leaf, iref); 1476 1477 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1478 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1479 refs = btrfs_extent_data_ref_count(leaf, dref); 1480 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1481 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1482 refs = btrfs_shared_data_ref_count(leaf, sref); 1483 } else { 1484 refs = 1; 1485 BUG_ON(refs_to_mod != -1); 1486 } 1487 1488 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1489 refs += refs_to_mod; 1490 1491 if (refs > 0) { 1492 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1493 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1494 else 1495 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1496 } else { 1497 size = btrfs_extent_inline_ref_size(type); 1498 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1499 ptr = (unsigned long)iref; 1500 end = (unsigned long)ei + item_size; 1501 if (ptr + size < end) 1502 memmove_extent_buffer(leaf, ptr, ptr + size, 1503 end - ptr - size); 1504 item_size -= size; 1505 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1506 BUG_ON(ret); 1507 } 1508 btrfs_mark_buffer_dirty(leaf); 1509 return 0; 1510 } 1511 1512 static noinline_for_stack 1513 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1514 struct btrfs_root *root, 1515 struct btrfs_path *path, 1516 u64 bytenr, u64 num_bytes, u64 parent, 1517 u64 root_objectid, u64 owner, 1518 u64 offset, int refs_to_add, 1519 struct btrfs_delayed_extent_op *extent_op) 1520 { 1521 struct btrfs_extent_inline_ref *iref; 1522 int ret; 1523 1524 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1525 bytenr, num_bytes, parent, 1526 root_objectid, owner, offset, 1); 1527 if (ret == 0) { 1528 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1529 ret = update_inline_extent_backref(trans, root, path, iref, 1530 refs_to_add, extent_op); 1531 } else if (ret == -ENOENT) { 1532 ret = setup_inline_extent_backref(trans, root, path, iref, 1533 parent, root_objectid, 1534 owner, offset, refs_to_add, 1535 extent_op); 1536 } 1537 return ret; 1538 } 1539 1540 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1541 struct btrfs_root *root, 1542 struct btrfs_path *path, 1543 u64 bytenr, u64 parent, u64 root_objectid, 1544 u64 owner, u64 offset, int refs_to_add) 1545 { 1546 int ret; 1547 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1548 BUG_ON(refs_to_add != 1); 1549 ret = insert_tree_block_ref(trans, root, path, bytenr, 1550 parent, root_objectid); 1551 } else { 1552 ret = insert_extent_data_ref(trans, root, path, bytenr, 1553 parent, root_objectid, 1554 owner, offset, refs_to_add); 1555 } 1556 return ret; 1557 } 1558 1559 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1560 struct btrfs_root *root, 1561 struct btrfs_path *path, 1562 struct btrfs_extent_inline_ref *iref, 1563 int refs_to_drop, int is_data) 1564 { 1565 int ret; 1566 1567 BUG_ON(!is_data && refs_to_drop != 1); 1568 if (iref) { 1569 ret = update_inline_extent_backref(trans, root, path, iref, 1570 -refs_to_drop, NULL); 1571 } else if (is_data) { 1572 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1573 } else { 1574 ret = btrfs_del_item(trans, root, path); 1575 } 1576 return ret; 1577 } 1578 1579 static void btrfs_issue_discard(struct block_device *bdev, 1580 u64 start, u64 len) 1581 { 1582 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1583 DISCARD_FL_BARRIER); 1584 } 1585 1586 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1587 u64 num_bytes) 1588 { 1589 int ret; 1590 u64 map_length = num_bytes; 1591 struct btrfs_multi_bio *multi = NULL; 1592 1593 if (!btrfs_test_opt(root, DISCARD)) 1594 return 0; 1595 1596 /* Tell the block device(s) that the sectors can be discarded */ 1597 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1598 bytenr, &map_length, &multi, 0); 1599 if (!ret) { 1600 struct btrfs_bio_stripe *stripe = multi->stripes; 1601 int i; 1602 1603 if (map_length > num_bytes) 1604 map_length = num_bytes; 1605 1606 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1607 btrfs_issue_discard(stripe->dev->bdev, 1608 stripe->physical, 1609 map_length); 1610 } 1611 kfree(multi); 1612 } 1613 1614 return ret; 1615 } 1616 1617 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1618 struct btrfs_root *root, 1619 u64 bytenr, u64 num_bytes, u64 parent, 1620 u64 root_objectid, u64 owner, u64 offset) 1621 { 1622 int ret; 1623 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1624 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1625 1626 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1627 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1628 parent, root_objectid, (int)owner, 1629 BTRFS_ADD_DELAYED_REF, NULL); 1630 } else { 1631 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1632 parent, root_objectid, owner, offset, 1633 BTRFS_ADD_DELAYED_REF, NULL); 1634 } 1635 return ret; 1636 } 1637 1638 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1639 struct btrfs_root *root, 1640 u64 bytenr, u64 num_bytes, 1641 u64 parent, u64 root_objectid, 1642 u64 owner, u64 offset, int refs_to_add, 1643 struct btrfs_delayed_extent_op *extent_op) 1644 { 1645 struct btrfs_path *path; 1646 struct extent_buffer *leaf; 1647 struct btrfs_extent_item *item; 1648 u64 refs; 1649 int ret; 1650 int err = 0; 1651 1652 path = btrfs_alloc_path(); 1653 if (!path) 1654 return -ENOMEM; 1655 1656 path->reada = 1; 1657 path->leave_spinning = 1; 1658 /* this will setup the path even if it fails to insert the back ref */ 1659 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1660 path, bytenr, num_bytes, parent, 1661 root_objectid, owner, offset, 1662 refs_to_add, extent_op); 1663 if (ret == 0) 1664 goto out; 1665 1666 if (ret != -EAGAIN) { 1667 err = ret; 1668 goto out; 1669 } 1670 1671 leaf = path->nodes[0]; 1672 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1673 refs = btrfs_extent_refs(leaf, item); 1674 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1675 if (extent_op) 1676 __run_delayed_extent_op(extent_op, leaf, item); 1677 1678 btrfs_mark_buffer_dirty(leaf); 1679 btrfs_release_path(root->fs_info->extent_root, path); 1680 1681 path->reada = 1; 1682 path->leave_spinning = 1; 1683 1684 /* now insert the actual backref */ 1685 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1686 path, bytenr, parent, root_objectid, 1687 owner, offset, refs_to_add); 1688 BUG_ON(ret); 1689 out: 1690 btrfs_free_path(path); 1691 return err; 1692 } 1693 1694 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1695 struct btrfs_root *root, 1696 struct btrfs_delayed_ref_node *node, 1697 struct btrfs_delayed_extent_op *extent_op, 1698 int insert_reserved) 1699 { 1700 int ret = 0; 1701 struct btrfs_delayed_data_ref *ref; 1702 struct btrfs_key ins; 1703 u64 parent = 0; 1704 u64 ref_root = 0; 1705 u64 flags = 0; 1706 1707 ins.objectid = node->bytenr; 1708 ins.offset = node->num_bytes; 1709 ins.type = BTRFS_EXTENT_ITEM_KEY; 1710 1711 ref = btrfs_delayed_node_to_data_ref(node); 1712 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1713 parent = ref->parent; 1714 else 1715 ref_root = ref->root; 1716 1717 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1718 if (extent_op) { 1719 BUG_ON(extent_op->update_key); 1720 flags |= extent_op->flags_to_set; 1721 } 1722 ret = alloc_reserved_file_extent(trans, root, 1723 parent, ref_root, flags, 1724 ref->objectid, ref->offset, 1725 &ins, node->ref_mod); 1726 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1727 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1728 node->num_bytes, parent, 1729 ref_root, ref->objectid, 1730 ref->offset, node->ref_mod, 1731 extent_op); 1732 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1733 ret = __btrfs_free_extent(trans, root, node->bytenr, 1734 node->num_bytes, parent, 1735 ref_root, ref->objectid, 1736 ref->offset, node->ref_mod, 1737 extent_op); 1738 } else { 1739 BUG(); 1740 } 1741 return ret; 1742 } 1743 1744 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1745 struct extent_buffer *leaf, 1746 struct btrfs_extent_item *ei) 1747 { 1748 u64 flags = btrfs_extent_flags(leaf, ei); 1749 if (extent_op->update_flags) { 1750 flags |= extent_op->flags_to_set; 1751 btrfs_set_extent_flags(leaf, ei, flags); 1752 } 1753 1754 if (extent_op->update_key) { 1755 struct btrfs_tree_block_info *bi; 1756 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1757 bi = (struct btrfs_tree_block_info *)(ei + 1); 1758 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1759 } 1760 } 1761 1762 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1763 struct btrfs_root *root, 1764 struct btrfs_delayed_ref_node *node, 1765 struct btrfs_delayed_extent_op *extent_op) 1766 { 1767 struct btrfs_key key; 1768 struct btrfs_path *path; 1769 struct btrfs_extent_item *ei; 1770 struct extent_buffer *leaf; 1771 u32 item_size; 1772 int ret; 1773 int err = 0; 1774 1775 path = btrfs_alloc_path(); 1776 if (!path) 1777 return -ENOMEM; 1778 1779 key.objectid = node->bytenr; 1780 key.type = BTRFS_EXTENT_ITEM_KEY; 1781 key.offset = node->num_bytes; 1782 1783 path->reada = 1; 1784 path->leave_spinning = 1; 1785 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1786 path, 0, 1); 1787 if (ret < 0) { 1788 err = ret; 1789 goto out; 1790 } 1791 if (ret > 0) { 1792 err = -EIO; 1793 goto out; 1794 } 1795 1796 leaf = path->nodes[0]; 1797 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1798 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1799 if (item_size < sizeof(*ei)) { 1800 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1801 path, (u64)-1, 0); 1802 if (ret < 0) { 1803 err = ret; 1804 goto out; 1805 } 1806 leaf = path->nodes[0]; 1807 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1808 } 1809 #endif 1810 BUG_ON(item_size < sizeof(*ei)); 1811 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1812 __run_delayed_extent_op(extent_op, leaf, ei); 1813 1814 btrfs_mark_buffer_dirty(leaf); 1815 out: 1816 btrfs_free_path(path); 1817 return err; 1818 } 1819 1820 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 1821 struct btrfs_root *root, 1822 struct btrfs_delayed_ref_node *node, 1823 struct btrfs_delayed_extent_op *extent_op, 1824 int insert_reserved) 1825 { 1826 int ret = 0; 1827 struct btrfs_delayed_tree_ref *ref; 1828 struct btrfs_key ins; 1829 u64 parent = 0; 1830 u64 ref_root = 0; 1831 1832 ins.objectid = node->bytenr; 1833 ins.offset = node->num_bytes; 1834 ins.type = BTRFS_EXTENT_ITEM_KEY; 1835 1836 ref = btrfs_delayed_node_to_tree_ref(node); 1837 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1838 parent = ref->parent; 1839 else 1840 ref_root = ref->root; 1841 1842 BUG_ON(node->ref_mod != 1); 1843 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1844 BUG_ON(!extent_op || !extent_op->update_flags || 1845 !extent_op->update_key); 1846 ret = alloc_reserved_tree_block(trans, root, 1847 parent, ref_root, 1848 extent_op->flags_to_set, 1849 &extent_op->key, 1850 ref->level, &ins); 1851 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1852 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1853 node->num_bytes, parent, ref_root, 1854 ref->level, 0, 1, extent_op); 1855 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1856 ret = __btrfs_free_extent(trans, root, node->bytenr, 1857 node->num_bytes, parent, ref_root, 1858 ref->level, 0, 1, extent_op); 1859 } else { 1860 BUG(); 1861 } 1862 return ret; 1863 } 1864 1865 1866 /* helper function to actually process a single delayed ref entry */ 1867 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1868 struct btrfs_root *root, 1869 struct btrfs_delayed_ref_node *node, 1870 struct btrfs_delayed_extent_op *extent_op, 1871 int insert_reserved) 1872 { 1873 int ret; 1874 if (btrfs_delayed_ref_is_head(node)) { 1875 struct btrfs_delayed_ref_head *head; 1876 /* 1877 * we've hit the end of the chain and we were supposed 1878 * to insert this extent into the tree. But, it got 1879 * deleted before we ever needed to insert it, so all 1880 * we have to do is clean up the accounting 1881 */ 1882 BUG_ON(extent_op); 1883 head = btrfs_delayed_node_to_head(node); 1884 if (insert_reserved) { 1885 int mark_free = 0; 1886 struct extent_buffer *must_clean = NULL; 1887 1888 ret = pin_down_bytes(trans, root, NULL, 1889 node->bytenr, node->num_bytes, 1890 head->is_data, 1, &must_clean); 1891 if (ret > 0) 1892 mark_free = 1; 1893 1894 if (must_clean) { 1895 clean_tree_block(NULL, root, must_clean); 1896 btrfs_tree_unlock(must_clean); 1897 free_extent_buffer(must_clean); 1898 } 1899 if (head->is_data) { 1900 ret = btrfs_del_csums(trans, root, 1901 node->bytenr, 1902 node->num_bytes); 1903 BUG_ON(ret); 1904 } 1905 if (mark_free) { 1906 ret = btrfs_free_reserved_extent(root, 1907 node->bytenr, 1908 node->num_bytes); 1909 BUG_ON(ret); 1910 } 1911 } 1912 mutex_unlock(&head->mutex); 1913 return 0; 1914 } 1915 1916 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1917 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1918 ret = run_delayed_tree_ref(trans, root, node, extent_op, 1919 insert_reserved); 1920 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1921 node->type == BTRFS_SHARED_DATA_REF_KEY) 1922 ret = run_delayed_data_ref(trans, root, node, extent_op, 1923 insert_reserved); 1924 else 1925 BUG(); 1926 return ret; 1927 } 1928 1929 static noinline struct btrfs_delayed_ref_node * 1930 select_delayed_ref(struct btrfs_delayed_ref_head *head) 1931 { 1932 struct rb_node *node; 1933 struct btrfs_delayed_ref_node *ref; 1934 int action = BTRFS_ADD_DELAYED_REF; 1935 again: 1936 /* 1937 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 1938 * this prevents ref count from going down to zero when 1939 * there still are pending delayed ref. 1940 */ 1941 node = rb_prev(&head->node.rb_node); 1942 while (1) { 1943 if (!node) 1944 break; 1945 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1946 rb_node); 1947 if (ref->bytenr != head->node.bytenr) 1948 break; 1949 if (ref->action == action) 1950 return ref; 1951 node = rb_prev(node); 1952 } 1953 if (action == BTRFS_ADD_DELAYED_REF) { 1954 action = BTRFS_DROP_DELAYED_REF; 1955 goto again; 1956 } 1957 return NULL; 1958 } 1959 1960 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 1961 struct btrfs_root *root, 1962 struct list_head *cluster) 1963 { 1964 struct btrfs_delayed_ref_root *delayed_refs; 1965 struct btrfs_delayed_ref_node *ref; 1966 struct btrfs_delayed_ref_head *locked_ref = NULL; 1967 struct btrfs_delayed_extent_op *extent_op; 1968 int ret; 1969 int count = 0; 1970 int must_insert_reserved = 0; 1971 1972 delayed_refs = &trans->transaction->delayed_refs; 1973 while (1) { 1974 if (!locked_ref) { 1975 /* pick a new head ref from the cluster list */ 1976 if (list_empty(cluster)) 1977 break; 1978 1979 locked_ref = list_entry(cluster->next, 1980 struct btrfs_delayed_ref_head, cluster); 1981 1982 /* grab the lock that says we are going to process 1983 * all the refs for this head */ 1984 ret = btrfs_delayed_ref_lock(trans, locked_ref); 1985 1986 /* 1987 * we may have dropped the spin lock to get the head 1988 * mutex lock, and that might have given someone else 1989 * time to free the head. If that's true, it has been 1990 * removed from our list and we can move on. 1991 */ 1992 if (ret == -EAGAIN) { 1993 locked_ref = NULL; 1994 count++; 1995 continue; 1996 } 1997 } 1998 1999 /* 2000 * record the must insert reserved flag before we 2001 * drop the spin lock. 2002 */ 2003 must_insert_reserved = locked_ref->must_insert_reserved; 2004 locked_ref->must_insert_reserved = 0; 2005 2006 extent_op = locked_ref->extent_op; 2007 locked_ref->extent_op = NULL; 2008 2009 /* 2010 * locked_ref is the head node, so we have to go one 2011 * node back for any delayed ref updates 2012 */ 2013 ref = select_delayed_ref(locked_ref); 2014 if (!ref) { 2015 /* All delayed refs have been processed, Go ahead 2016 * and send the head node to run_one_delayed_ref, 2017 * so that any accounting fixes can happen 2018 */ 2019 ref = &locked_ref->node; 2020 2021 if (extent_op && must_insert_reserved) { 2022 kfree(extent_op); 2023 extent_op = NULL; 2024 } 2025 2026 if (extent_op) { 2027 spin_unlock(&delayed_refs->lock); 2028 2029 ret = run_delayed_extent_op(trans, root, 2030 ref, extent_op); 2031 BUG_ON(ret); 2032 kfree(extent_op); 2033 2034 cond_resched(); 2035 spin_lock(&delayed_refs->lock); 2036 continue; 2037 } 2038 2039 list_del_init(&locked_ref->cluster); 2040 locked_ref = NULL; 2041 } 2042 2043 ref->in_tree = 0; 2044 rb_erase(&ref->rb_node, &delayed_refs->root); 2045 delayed_refs->num_entries--; 2046 2047 spin_unlock(&delayed_refs->lock); 2048 2049 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2050 must_insert_reserved); 2051 BUG_ON(ret); 2052 2053 btrfs_put_delayed_ref(ref); 2054 kfree(extent_op); 2055 count++; 2056 2057 cond_resched(); 2058 spin_lock(&delayed_refs->lock); 2059 } 2060 return count; 2061 } 2062 2063 /* 2064 * this starts processing the delayed reference count updates and 2065 * extent insertions we have queued up so far. count can be 2066 * 0, which means to process everything in the tree at the start 2067 * of the run (but not newly added entries), or it can be some target 2068 * number you'd like to process. 2069 */ 2070 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2071 struct btrfs_root *root, unsigned long count) 2072 { 2073 struct rb_node *node; 2074 struct btrfs_delayed_ref_root *delayed_refs; 2075 struct btrfs_delayed_ref_node *ref; 2076 struct list_head cluster; 2077 int ret; 2078 int run_all = count == (unsigned long)-1; 2079 int run_most = 0; 2080 2081 if (root == root->fs_info->extent_root) 2082 root = root->fs_info->tree_root; 2083 2084 delayed_refs = &trans->transaction->delayed_refs; 2085 INIT_LIST_HEAD(&cluster); 2086 again: 2087 spin_lock(&delayed_refs->lock); 2088 if (count == 0) { 2089 count = delayed_refs->num_entries * 2; 2090 run_most = 1; 2091 } 2092 while (1) { 2093 if (!(run_all || run_most) && 2094 delayed_refs->num_heads_ready < 64) 2095 break; 2096 2097 /* 2098 * go find something we can process in the rbtree. We start at 2099 * the beginning of the tree, and then build a cluster 2100 * of refs to process starting at the first one we are able to 2101 * lock 2102 */ 2103 ret = btrfs_find_ref_cluster(trans, &cluster, 2104 delayed_refs->run_delayed_start); 2105 if (ret) 2106 break; 2107 2108 ret = run_clustered_refs(trans, root, &cluster); 2109 BUG_ON(ret < 0); 2110 2111 count -= min_t(unsigned long, ret, count); 2112 2113 if (count == 0) 2114 break; 2115 } 2116 2117 if (run_all) { 2118 node = rb_first(&delayed_refs->root); 2119 if (!node) 2120 goto out; 2121 count = (unsigned long)-1; 2122 2123 while (node) { 2124 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2125 rb_node); 2126 if (btrfs_delayed_ref_is_head(ref)) { 2127 struct btrfs_delayed_ref_head *head; 2128 2129 head = btrfs_delayed_node_to_head(ref); 2130 atomic_inc(&ref->refs); 2131 2132 spin_unlock(&delayed_refs->lock); 2133 mutex_lock(&head->mutex); 2134 mutex_unlock(&head->mutex); 2135 2136 btrfs_put_delayed_ref(ref); 2137 cond_resched(); 2138 goto again; 2139 } 2140 node = rb_next(node); 2141 } 2142 spin_unlock(&delayed_refs->lock); 2143 schedule_timeout(1); 2144 goto again; 2145 } 2146 out: 2147 spin_unlock(&delayed_refs->lock); 2148 return 0; 2149 } 2150 2151 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2152 struct btrfs_root *root, 2153 u64 bytenr, u64 num_bytes, u64 flags, 2154 int is_data) 2155 { 2156 struct btrfs_delayed_extent_op *extent_op; 2157 int ret; 2158 2159 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2160 if (!extent_op) 2161 return -ENOMEM; 2162 2163 extent_op->flags_to_set = flags; 2164 extent_op->update_flags = 1; 2165 extent_op->update_key = 0; 2166 extent_op->is_data = is_data ? 1 : 0; 2167 2168 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2169 if (ret) 2170 kfree(extent_op); 2171 return ret; 2172 } 2173 2174 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2175 struct btrfs_root *root, 2176 struct btrfs_path *path, 2177 u64 objectid, u64 offset, u64 bytenr) 2178 { 2179 struct btrfs_delayed_ref_head *head; 2180 struct btrfs_delayed_ref_node *ref; 2181 struct btrfs_delayed_data_ref *data_ref; 2182 struct btrfs_delayed_ref_root *delayed_refs; 2183 struct rb_node *node; 2184 int ret = 0; 2185 2186 ret = -ENOENT; 2187 delayed_refs = &trans->transaction->delayed_refs; 2188 spin_lock(&delayed_refs->lock); 2189 head = btrfs_find_delayed_ref_head(trans, bytenr); 2190 if (!head) 2191 goto out; 2192 2193 if (!mutex_trylock(&head->mutex)) { 2194 atomic_inc(&head->node.refs); 2195 spin_unlock(&delayed_refs->lock); 2196 2197 btrfs_release_path(root->fs_info->extent_root, path); 2198 2199 mutex_lock(&head->mutex); 2200 mutex_unlock(&head->mutex); 2201 btrfs_put_delayed_ref(&head->node); 2202 return -EAGAIN; 2203 } 2204 2205 node = rb_prev(&head->node.rb_node); 2206 if (!node) 2207 goto out_unlock; 2208 2209 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2210 2211 if (ref->bytenr != bytenr) 2212 goto out_unlock; 2213 2214 ret = 1; 2215 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2216 goto out_unlock; 2217 2218 data_ref = btrfs_delayed_node_to_data_ref(ref); 2219 2220 node = rb_prev(node); 2221 if (node) { 2222 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2223 if (ref->bytenr == bytenr) 2224 goto out_unlock; 2225 } 2226 2227 if (data_ref->root != root->root_key.objectid || 2228 data_ref->objectid != objectid || data_ref->offset != offset) 2229 goto out_unlock; 2230 2231 ret = 0; 2232 out_unlock: 2233 mutex_unlock(&head->mutex); 2234 out: 2235 spin_unlock(&delayed_refs->lock); 2236 return ret; 2237 } 2238 2239 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2240 struct btrfs_root *root, 2241 struct btrfs_path *path, 2242 u64 objectid, u64 offset, u64 bytenr) 2243 { 2244 struct btrfs_root *extent_root = root->fs_info->extent_root; 2245 struct extent_buffer *leaf; 2246 struct btrfs_extent_data_ref *ref; 2247 struct btrfs_extent_inline_ref *iref; 2248 struct btrfs_extent_item *ei; 2249 struct btrfs_key key; 2250 u32 item_size; 2251 int ret; 2252 2253 key.objectid = bytenr; 2254 key.offset = (u64)-1; 2255 key.type = BTRFS_EXTENT_ITEM_KEY; 2256 2257 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2258 if (ret < 0) 2259 goto out; 2260 BUG_ON(ret == 0); 2261 2262 ret = -ENOENT; 2263 if (path->slots[0] == 0) 2264 goto out; 2265 2266 path->slots[0]--; 2267 leaf = path->nodes[0]; 2268 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2269 2270 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2271 goto out; 2272 2273 ret = 1; 2274 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2275 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2276 if (item_size < sizeof(*ei)) { 2277 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2278 goto out; 2279 } 2280 #endif 2281 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2282 2283 if (item_size != sizeof(*ei) + 2284 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2285 goto out; 2286 2287 if (btrfs_extent_generation(leaf, ei) <= 2288 btrfs_root_last_snapshot(&root->root_item)) 2289 goto out; 2290 2291 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2292 if (btrfs_extent_inline_ref_type(leaf, iref) != 2293 BTRFS_EXTENT_DATA_REF_KEY) 2294 goto out; 2295 2296 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2297 if (btrfs_extent_refs(leaf, ei) != 2298 btrfs_extent_data_ref_count(leaf, ref) || 2299 btrfs_extent_data_ref_root(leaf, ref) != 2300 root->root_key.objectid || 2301 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2302 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2303 goto out; 2304 2305 ret = 0; 2306 out: 2307 return ret; 2308 } 2309 2310 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2311 struct btrfs_root *root, 2312 u64 objectid, u64 offset, u64 bytenr) 2313 { 2314 struct btrfs_path *path; 2315 int ret; 2316 int ret2; 2317 2318 path = btrfs_alloc_path(); 2319 if (!path) 2320 return -ENOENT; 2321 2322 do { 2323 ret = check_committed_ref(trans, root, path, objectid, 2324 offset, bytenr); 2325 if (ret && ret != -ENOENT) 2326 goto out; 2327 2328 ret2 = check_delayed_ref(trans, root, path, objectid, 2329 offset, bytenr); 2330 } while (ret2 == -EAGAIN); 2331 2332 if (ret2 && ret2 != -ENOENT) { 2333 ret = ret2; 2334 goto out; 2335 } 2336 2337 if (ret != -ENOENT || ret2 != -ENOENT) 2338 ret = 0; 2339 out: 2340 btrfs_free_path(path); 2341 return ret; 2342 } 2343 2344 #if 0 2345 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2346 struct extent_buffer *buf, u32 nr_extents) 2347 { 2348 struct btrfs_key key; 2349 struct btrfs_file_extent_item *fi; 2350 u64 root_gen; 2351 u32 nritems; 2352 int i; 2353 int level; 2354 int ret = 0; 2355 int shared = 0; 2356 2357 if (!root->ref_cows) 2358 return 0; 2359 2360 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 2361 shared = 0; 2362 root_gen = root->root_key.offset; 2363 } else { 2364 shared = 1; 2365 root_gen = trans->transid - 1; 2366 } 2367 2368 level = btrfs_header_level(buf); 2369 nritems = btrfs_header_nritems(buf); 2370 2371 if (level == 0) { 2372 struct btrfs_leaf_ref *ref; 2373 struct btrfs_extent_info *info; 2374 2375 ref = btrfs_alloc_leaf_ref(root, nr_extents); 2376 if (!ref) { 2377 ret = -ENOMEM; 2378 goto out; 2379 } 2380 2381 ref->root_gen = root_gen; 2382 ref->bytenr = buf->start; 2383 ref->owner = btrfs_header_owner(buf); 2384 ref->generation = btrfs_header_generation(buf); 2385 ref->nritems = nr_extents; 2386 info = ref->extents; 2387 2388 for (i = 0; nr_extents > 0 && i < nritems; i++) { 2389 u64 disk_bytenr; 2390 btrfs_item_key_to_cpu(buf, &key, i); 2391 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2392 continue; 2393 fi = btrfs_item_ptr(buf, i, 2394 struct btrfs_file_extent_item); 2395 if (btrfs_file_extent_type(buf, fi) == 2396 BTRFS_FILE_EXTENT_INLINE) 2397 continue; 2398 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2399 if (disk_bytenr == 0) 2400 continue; 2401 2402 info->bytenr = disk_bytenr; 2403 info->num_bytes = 2404 btrfs_file_extent_disk_num_bytes(buf, fi); 2405 info->objectid = key.objectid; 2406 info->offset = key.offset; 2407 info++; 2408 } 2409 2410 ret = btrfs_add_leaf_ref(root, ref, shared); 2411 if (ret == -EEXIST && shared) { 2412 struct btrfs_leaf_ref *old; 2413 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 2414 BUG_ON(!old); 2415 btrfs_remove_leaf_ref(root, old); 2416 btrfs_free_leaf_ref(root, old); 2417 ret = btrfs_add_leaf_ref(root, ref, shared); 2418 } 2419 WARN_ON(ret); 2420 btrfs_free_leaf_ref(root, ref); 2421 } 2422 out: 2423 return ret; 2424 } 2425 2426 /* when a block goes through cow, we update the reference counts of 2427 * everything that block points to. The internal pointers of the block 2428 * can be in just about any order, and it is likely to have clusters of 2429 * things that are close together and clusters of things that are not. 2430 * 2431 * To help reduce the seeks that come with updating all of these reference 2432 * counts, sort them by byte number before actual updates are done. 2433 * 2434 * struct refsort is used to match byte number to slot in the btree block. 2435 * we sort based on the byte number and then use the slot to actually 2436 * find the item. 2437 * 2438 * struct refsort is smaller than strcut btrfs_item and smaller than 2439 * struct btrfs_key_ptr. Since we're currently limited to the page size 2440 * for a btree block, there's no way for a kmalloc of refsorts for a 2441 * single node to be bigger than a page. 2442 */ 2443 struct refsort { 2444 u64 bytenr; 2445 u32 slot; 2446 }; 2447 2448 /* 2449 * for passing into sort() 2450 */ 2451 static int refsort_cmp(const void *a_void, const void *b_void) 2452 { 2453 const struct refsort *a = a_void; 2454 const struct refsort *b = b_void; 2455 2456 if (a->bytenr < b->bytenr) 2457 return -1; 2458 if (a->bytenr > b->bytenr) 2459 return 1; 2460 return 0; 2461 } 2462 #endif 2463 2464 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2465 struct btrfs_root *root, 2466 struct extent_buffer *buf, 2467 int full_backref, int inc) 2468 { 2469 u64 bytenr; 2470 u64 num_bytes; 2471 u64 parent; 2472 u64 ref_root; 2473 u32 nritems; 2474 struct btrfs_key key; 2475 struct btrfs_file_extent_item *fi; 2476 int i; 2477 int level; 2478 int ret = 0; 2479 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2480 u64, u64, u64, u64, u64, u64); 2481 2482 ref_root = btrfs_header_owner(buf); 2483 nritems = btrfs_header_nritems(buf); 2484 level = btrfs_header_level(buf); 2485 2486 if (!root->ref_cows && level == 0) 2487 return 0; 2488 2489 if (inc) 2490 process_func = btrfs_inc_extent_ref; 2491 else 2492 process_func = btrfs_free_extent; 2493 2494 if (full_backref) 2495 parent = buf->start; 2496 else 2497 parent = 0; 2498 2499 for (i = 0; i < nritems; i++) { 2500 if (level == 0) { 2501 btrfs_item_key_to_cpu(buf, &key, i); 2502 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2503 continue; 2504 fi = btrfs_item_ptr(buf, i, 2505 struct btrfs_file_extent_item); 2506 if (btrfs_file_extent_type(buf, fi) == 2507 BTRFS_FILE_EXTENT_INLINE) 2508 continue; 2509 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2510 if (bytenr == 0) 2511 continue; 2512 2513 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2514 key.offset -= btrfs_file_extent_offset(buf, fi); 2515 ret = process_func(trans, root, bytenr, num_bytes, 2516 parent, ref_root, key.objectid, 2517 key.offset); 2518 if (ret) 2519 goto fail; 2520 } else { 2521 bytenr = btrfs_node_blockptr(buf, i); 2522 num_bytes = btrfs_level_size(root, level - 1); 2523 ret = process_func(trans, root, bytenr, num_bytes, 2524 parent, ref_root, level - 1, 0); 2525 if (ret) 2526 goto fail; 2527 } 2528 } 2529 return 0; 2530 fail: 2531 BUG(); 2532 return ret; 2533 } 2534 2535 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2536 struct extent_buffer *buf, int full_backref) 2537 { 2538 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2539 } 2540 2541 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2542 struct extent_buffer *buf, int full_backref) 2543 { 2544 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2545 } 2546 2547 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2548 struct btrfs_root *root, 2549 struct btrfs_path *path, 2550 struct btrfs_block_group_cache *cache) 2551 { 2552 int ret; 2553 struct btrfs_root *extent_root = root->fs_info->extent_root; 2554 unsigned long bi; 2555 struct extent_buffer *leaf; 2556 2557 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2558 if (ret < 0) 2559 goto fail; 2560 BUG_ON(ret); 2561 2562 leaf = path->nodes[0]; 2563 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2564 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2565 btrfs_mark_buffer_dirty(leaf); 2566 btrfs_release_path(extent_root, path); 2567 fail: 2568 if (ret) 2569 return ret; 2570 return 0; 2571 2572 } 2573 2574 static struct btrfs_block_group_cache * 2575 next_block_group(struct btrfs_root *root, 2576 struct btrfs_block_group_cache *cache) 2577 { 2578 struct rb_node *node; 2579 spin_lock(&root->fs_info->block_group_cache_lock); 2580 node = rb_next(&cache->cache_node); 2581 btrfs_put_block_group(cache); 2582 if (node) { 2583 cache = rb_entry(node, struct btrfs_block_group_cache, 2584 cache_node); 2585 atomic_inc(&cache->count); 2586 } else 2587 cache = NULL; 2588 spin_unlock(&root->fs_info->block_group_cache_lock); 2589 return cache; 2590 } 2591 2592 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2593 struct btrfs_root *root) 2594 { 2595 struct btrfs_block_group_cache *cache; 2596 int err = 0; 2597 struct btrfs_path *path; 2598 u64 last = 0; 2599 2600 path = btrfs_alloc_path(); 2601 if (!path) 2602 return -ENOMEM; 2603 2604 while (1) { 2605 if (last == 0) { 2606 err = btrfs_run_delayed_refs(trans, root, 2607 (unsigned long)-1); 2608 BUG_ON(err); 2609 } 2610 2611 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2612 while (cache) { 2613 if (cache->dirty) 2614 break; 2615 cache = next_block_group(root, cache); 2616 } 2617 if (!cache) { 2618 if (last == 0) 2619 break; 2620 last = 0; 2621 continue; 2622 } 2623 2624 cache->dirty = 0; 2625 last = cache->key.objectid + cache->key.offset; 2626 2627 err = write_one_cache_group(trans, root, path, cache); 2628 BUG_ON(err); 2629 btrfs_put_block_group(cache); 2630 } 2631 2632 btrfs_free_path(path); 2633 return 0; 2634 } 2635 2636 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2637 { 2638 struct btrfs_block_group_cache *block_group; 2639 int readonly = 0; 2640 2641 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2642 if (!block_group || block_group->ro) 2643 readonly = 1; 2644 if (block_group) 2645 btrfs_put_block_group(block_group); 2646 return readonly; 2647 } 2648 2649 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2650 u64 total_bytes, u64 bytes_used, 2651 struct btrfs_space_info **space_info) 2652 { 2653 struct btrfs_space_info *found; 2654 2655 found = __find_space_info(info, flags); 2656 if (found) { 2657 spin_lock(&found->lock); 2658 found->total_bytes += total_bytes; 2659 found->bytes_used += bytes_used; 2660 found->full = 0; 2661 spin_unlock(&found->lock); 2662 *space_info = found; 2663 return 0; 2664 } 2665 found = kzalloc(sizeof(*found), GFP_NOFS); 2666 if (!found) 2667 return -ENOMEM; 2668 2669 INIT_LIST_HEAD(&found->block_groups); 2670 init_rwsem(&found->groups_sem); 2671 spin_lock_init(&found->lock); 2672 found->flags = flags; 2673 found->total_bytes = total_bytes; 2674 found->bytes_used = bytes_used; 2675 found->bytes_pinned = 0; 2676 found->bytes_reserved = 0; 2677 found->bytes_readonly = 0; 2678 found->bytes_delalloc = 0; 2679 found->full = 0; 2680 found->force_alloc = 0; 2681 *space_info = found; 2682 list_add_rcu(&found->list, &info->space_info); 2683 atomic_set(&found->caching_threads, 0); 2684 return 0; 2685 } 2686 2687 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2688 { 2689 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2690 BTRFS_BLOCK_GROUP_RAID1 | 2691 BTRFS_BLOCK_GROUP_RAID10 | 2692 BTRFS_BLOCK_GROUP_DUP); 2693 if (extra_flags) { 2694 if (flags & BTRFS_BLOCK_GROUP_DATA) 2695 fs_info->avail_data_alloc_bits |= extra_flags; 2696 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2697 fs_info->avail_metadata_alloc_bits |= extra_flags; 2698 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2699 fs_info->avail_system_alloc_bits |= extra_flags; 2700 } 2701 } 2702 2703 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 2704 { 2705 spin_lock(&cache->space_info->lock); 2706 spin_lock(&cache->lock); 2707 if (!cache->ro) { 2708 cache->space_info->bytes_readonly += cache->key.offset - 2709 btrfs_block_group_used(&cache->item); 2710 cache->ro = 1; 2711 } 2712 spin_unlock(&cache->lock); 2713 spin_unlock(&cache->space_info->lock); 2714 } 2715 2716 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2717 { 2718 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2719 2720 if (num_devices == 1) 2721 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2722 if (num_devices < 4) 2723 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2724 2725 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2726 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2727 BTRFS_BLOCK_GROUP_RAID10))) { 2728 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2729 } 2730 2731 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2732 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2733 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2734 } 2735 2736 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2737 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2738 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2739 (flags & BTRFS_BLOCK_GROUP_DUP))) 2740 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2741 return flags; 2742 } 2743 2744 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 2745 { 2746 struct btrfs_fs_info *info = root->fs_info; 2747 u64 alloc_profile; 2748 2749 if (data) { 2750 alloc_profile = info->avail_data_alloc_bits & 2751 info->data_alloc_profile; 2752 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 2753 } else if (root == root->fs_info->chunk_root) { 2754 alloc_profile = info->avail_system_alloc_bits & 2755 info->system_alloc_profile; 2756 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 2757 } else { 2758 alloc_profile = info->avail_metadata_alloc_bits & 2759 info->metadata_alloc_profile; 2760 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 2761 } 2762 2763 return btrfs_reduce_alloc_profile(root, data); 2764 } 2765 2766 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2767 { 2768 u64 alloc_target; 2769 2770 alloc_target = btrfs_get_alloc_profile(root, 1); 2771 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2772 alloc_target); 2773 } 2774 2775 static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) 2776 { 2777 u64 num_bytes; 2778 int level; 2779 2780 level = BTRFS_MAX_LEVEL - 2; 2781 /* 2782 * NOTE: these calculations are absolutely the worst possible case. 2783 * This assumes that _every_ item we insert will require a new leaf, and 2784 * that the tree has grown to its maximum level size. 2785 */ 2786 2787 /* 2788 * for every item we insert we could insert both an extent item and a 2789 * extent ref item. Then for ever item we insert, we will need to cow 2790 * both the original leaf, plus the leaf to the left and right of it. 2791 * 2792 * Unless we are talking about the extent root, then we just want the 2793 * number of items * 2, since we just need the extent item plus its ref. 2794 */ 2795 if (root == root->fs_info->extent_root) 2796 num_bytes = num_items * 2; 2797 else 2798 num_bytes = (num_items + (2 * num_items)) * 3; 2799 2800 /* 2801 * num_bytes is total number of leaves we could need times the leaf 2802 * size, and then for every leaf we could end up cow'ing 2 nodes per 2803 * level, down to the leaf level. 2804 */ 2805 num_bytes = (num_bytes * root->leafsize) + 2806 (num_bytes * (level * 2)) * root->nodesize; 2807 2808 return num_bytes; 2809 } 2810 2811 /* 2812 * Unreserve metadata space for delalloc. If we have less reserved credits than 2813 * we have extents, this function does nothing. 2814 */ 2815 int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, 2816 struct inode *inode, int num_items) 2817 { 2818 struct btrfs_fs_info *info = root->fs_info; 2819 struct btrfs_space_info *meta_sinfo; 2820 u64 num_bytes; 2821 u64 alloc_target; 2822 bool bug = false; 2823 2824 /* get the space info for where the metadata will live */ 2825 alloc_target = btrfs_get_alloc_profile(root, 0); 2826 meta_sinfo = __find_space_info(info, alloc_target); 2827 2828 num_bytes = calculate_bytes_needed(root->fs_info->extent_root, 2829 num_items); 2830 2831 spin_lock(&meta_sinfo->lock); 2832 spin_lock(&BTRFS_I(inode)->accounting_lock); 2833 if (BTRFS_I(inode)->reserved_extents <= 2834 BTRFS_I(inode)->outstanding_extents) { 2835 spin_unlock(&BTRFS_I(inode)->accounting_lock); 2836 spin_unlock(&meta_sinfo->lock); 2837 return 0; 2838 } 2839 spin_unlock(&BTRFS_I(inode)->accounting_lock); 2840 2841 BTRFS_I(inode)->reserved_extents--; 2842 BUG_ON(BTRFS_I(inode)->reserved_extents < 0); 2843 2844 if (meta_sinfo->bytes_delalloc < num_bytes) { 2845 bug = true; 2846 meta_sinfo->bytes_delalloc = 0; 2847 } else { 2848 meta_sinfo->bytes_delalloc -= num_bytes; 2849 } 2850 spin_unlock(&meta_sinfo->lock); 2851 2852 BUG_ON(bug); 2853 2854 return 0; 2855 } 2856 2857 static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) 2858 { 2859 u64 thresh; 2860 2861 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2862 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2863 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 2864 meta_sinfo->bytes_may_use; 2865 2866 thresh = meta_sinfo->total_bytes - thresh; 2867 thresh *= 80; 2868 do_div(thresh, 100); 2869 if (thresh <= meta_sinfo->bytes_delalloc) 2870 meta_sinfo->force_delalloc = 1; 2871 else 2872 meta_sinfo->force_delalloc = 0; 2873 } 2874 2875 struct async_flush { 2876 struct btrfs_root *root; 2877 struct btrfs_space_info *info; 2878 struct btrfs_work work; 2879 }; 2880 2881 static noinline void flush_delalloc_async(struct btrfs_work *work) 2882 { 2883 struct async_flush *async; 2884 struct btrfs_root *root; 2885 struct btrfs_space_info *info; 2886 2887 async = container_of(work, struct async_flush, work); 2888 root = async->root; 2889 info = async->info; 2890 2891 btrfs_start_delalloc_inodes(root, 0); 2892 wake_up(&info->flush_wait); 2893 btrfs_wait_ordered_extents(root, 0, 0); 2894 2895 spin_lock(&info->lock); 2896 info->flushing = 0; 2897 spin_unlock(&info->lock); 2898 wake_up(&info->flush_wait); 2899 2900 kfree(async); 2901 } 2902 2903 static void wait_on_flush(struct btrfs_space_info *info) 2904 { 2905 DEFINE_WAIT(wait); 2906 u64 used; 2907 2908 while (1) { 2909 prepare_to_wait(&info->flush_wait, &wait, 2910 TASK_UNINTERRUPTIBLE); 2911 spin_lock(&info->lock); 2912 if (!info->flushing) { 2913 spin_unlock(&info->lock); 2914 break; 2915 } 2916 2917 used = info->bytes_used + info->bytes_reserved + 2918 info->bytes_pinned + info->bytes_readonly + 2919 info->bytes_super + info->bytes_root + 2920 info->bytes_may_use + info->bytes_delalloc; 2921 if (used < info->total_bytes) { 2922 spin_unlock(&info->lock); 2923 break; 2924 } 2925 spin_unlock(&info->lock); 2926 schedule(); 2927 } 2928 finish_wait(&info->flush_wait, &wait); 2929 } 2930 2931 static void flush_delalloc(struct btrfs_root *root, 2932 struct btrfs_space_info *info) 2933 { 2934 struct async_flush *async; 2935 bool wait = false; 2936 2937 spin_lock(&info->lock); 2938 2939 if (!info->flushing) { 2940 info->flushing = 1; 2941 init_waitqueue_head(&info->flush_wait); 2942 } else { 2943 wait = true; 2944 } 2945 2946 spin_unlock(&info->lock); 2947 2948 if (wait) { 2949 wait_on_flush(info); 2950 return; 2951 } 2952 2953 async = kzalloc(sizeof(*async), GFP_NOFS); 2954 if (!async) 2955 goto flush; 2956 2957 async->root = root; 2958 async->info = info; 2959 async->work.func = flush_delalloc_async; 2960 2961 btrfs_queue_worker(&root->fs_info->enospc_workers, 2962 &async->work); 2963 wait_on_flush(info); 2964 return; 2965 2966 flush: 2967 btrfs_start_delalloc_inodes(root, 0); 2968 btrfs_wait_ordered_extents(root, 0, 0); 2969 2970 spin_lock(&info->lock); 2971 info->flushing = 0; 2972 spin_unlock(&info->lock); 2973 wake_up(&info->flush_wait); 2974 } 2975 2976 static int maybe_allocate_chunk(struct btrfs_root *root, 2977 struct btrfs_space_info *info) 2978 { 2979 struct btrfs_super_block *disk_super = &root->fs_info->super_copy; 2980 struct btrfs_trans_handle *trans; 2981 bool wait = false; 2982 int ret = 0; 2983 u64 min_metadata; 2984 u64 free_space; 2985 2986 free_space = btrfs_super_total_bytes(disk_super); 2987 /* 2988 * we allow the metadata to grow to a max of either 10gb or 5% of the 2989 * space in the volume. 2990 */ 2991 min_metadata = min((u64)10 * 1024 * 1024 * 1024, 2992 div64_u64(free_space * 5, 100)); 2993 if (info->total_bytes >= min_metadata) { 2994 spin_unlock(&info->lock); 2995 return 0; 2996 } 2997 2998 if (info->full) { 2999 spin_unlock(&info->lock); 3000 return 0; 3001 } 3002 3003 if (!info->allocating_chunk) { 3004 info->force_alloc = 1; 3005 info->allocating_chunk = 1; 3006 init_waitqueue_head(&info->allocate_wait); 3007 } else { 3008 wait = true; 3009 } 3010 3011 spin_unlock(&info->lock); 3012 3013 if (wait) { 3014 wait_event(info->allocate_wait, 3015 !info->allocating_chunk); 3016 return 1; 3017 } 3018 3019 trans = btrfs_start_transaction(root, 1); 3020 if (!trans) { 3021 ret = -ENOMEM; 3022 goto out; 3023 } 3024 3025 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3026 4096 + 2 * 1024 * 1024, 3027 info->flags, 0); 3028 btrfs_end_transaction(trans, root); 3029 if (ret) 3030 goto out; 3031 out: 3032 spin_lock(&info->lock); 3033 info->allocating_chunk = 0; 3034 spin_unlock(&info->lock); 3035 wake_up(&info->allocate_wait); 3036 3037 if (ret) 3038 return 0; 3039 return 1; 3040 } 3041 3042 /* 3043 * Reserve metadata space for delalloc. 3044 */ 3045 int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, 3046 struct inode *inode, int num_items) 3047 { 3048 struct btrfs_fs_info *info = root->fs_info; 3049 struct btrfs_space_info *meta_sinfo; 3050 u64 num_bytes; 3051 u64 used; 3052 u64 alloc_target; 3053 int flushed = 0; 3054 int force_delalloc; 3055 3056 /* get the space info for where the metadata will live */ 3057 alloc_target = btrfs_get_alloc_profile(root, 0); 3058 meta_sinfo = __find_space_info(info, alloc_target); 3059 3060 num_bytes = calculate_bytes_needed(root->fs_info->extent_root, 3061 num_items); 3062 again: 3063 spin_lock(&meta_sinfo->lock); 3064 3065 force_delalloc = meta_sinfo->force_delalloc; 3066 3067 if (unlikely(!meta_sinfo->bytes_root)) 3068 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); 3069 3070 if (!flushed) 3071 meta_sinfo->bytes_delalloc += num_bytes; 3072 3073 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 3074 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 3075 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 3076 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; 3077 3078 if (used > meta_sinfo->total_bytes) { 3079 flushed++; 3080 3081 if (flushed == 1) { 3082 if (maybe_allocate_chunk(root, meta_sinfo)) 3083 goto again; 3084 flushed++; 3085 } else { 3086 spin_unlock(&meta_sinfo->lock); 3087 } 3088 3089 if (flushed == 2) { 3090 filemap_flush(inode->i_mapping); 3091 goto again; 3092 } else if (flushed == 3) { 3093 flush_delalloc(root, meta_sinfo); 3094 goto again; 3095 } 3096 spin_lock(&meta_sinfo->lock); 3097 meta_sinfo->bytes_delalloc -= num_bytes; 3098 spin_unlock(&meta_sinfo->lock); 3099 printk(KERN_ERR "enospc, has %d, reserved %d\n", 3100 BTRFS_I(inode)->outstanding_extents, 3101 BTRFS_I(inode)->reserved_extents); 3102 dump_space_info(meta_sinfo, 0, 0); 3103 return -ENOSPC; 3104 } 3105 3106 BTRFS_I(inode)->reserved_extents++; 3107 check_force_delalloc(meta_sinfo); 3108 spin_unlock(&meta_sinfo->lock); 3109 3110 if (!flushed && force_delalloc) 3111 filemap_flush(inode->i_mapping); 3112 3113 return 0; 3114 } 3115 3116 /* 3117 * unreserve num_items number of items worth of metadata space. This needs to 3118 * be paired with btrfs_reserve_metadata_space. 3119 * 3120 * NOTE: if you have the option, run this _AFTER_ you do a 3121 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref 3122 * oprations which will result in more used metadata, so we want to make sure we 3123 * can do that without issue. 3124 */ 3125 int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) 3126 { 3127 struct btrfs_fs_info *info = root->fs_info; 3128 struct btrfs_space_info *meta_sinfo; 3129 u64 num_bytes; 3130 u64 alloc_target; 3131 bool bug = false; 3132 3133 /* get the space info for where the metadata will live */ 3134 alloc_target = btrfs_get_alloc_profile(root, 0); 3135 meta_sinfo = __find_space_info(info, alloc_target); 3136 3137 num_bytes = calculate_bytes_needed(root, num_items); 3138 3139 spin_lock(&meta_sinfo->lock); 3140 if (meta_sinfo->bytes_may_use < num_bytes) { 3141 bug = true; 3142 meta_sinfo->bytes_may_use = 0; 3143 } else { 3144 meta_sinfo->bytes_may_use -= num_bytes; 3145 } 3146 spin_unlock(&meta_sinfo->lock); 3147 3148 BUG_ON(bug); 3149 3150 return 0; 3151 } 3152 3153 /* 3154 * Reserve some metadata space for use. We'll calculate the worste case number 3155 * of bytes that would be needed to modify num_items number of items. If we 3156 * have space, fantastic, if not, you get -ENOSPC. Please call 3157 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of 3158 * items you reserved, since whatever metadata you needed should have already 3159 * been allocated. 3160 * 3161 * This will commit the transaction to make more space if we don't have enough 3162 * metadata space. THe only time we don't do this is if we're reserving space 3163 * inside of a transaction, then we will just return -ENOSPC and it is the 3164 * callers responsibility to handle it properly. 3165 */ 3166 int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) 3167 { 3168 struct btrfs_fs_info *info = root->fs_info; 3169 struct btrfs_space_info *meta_sinfo; 3170 u64 num_bytes; 3171 u64 used; 3172 u64 alloc_target; 3173 int retries = 0; 3174 3175 /* get the space info for where the metadata will live */ 3176 alloc_target = btrfs_get_alloc_profile(root, 0); 3177 meta_sinfo = __find_space_info(info, alloc_target); 3178 3179 num_bytes = calculate_bytes_needed(root, num_items); 3180 again: 3181 spin_lock(&meta_sinfo->lock); 3182 3183 if (unlikely(!meta_sinfo->bytes_root)) 3184 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); 3185 3186 if (!retries) 3187 meta_sinfo->bytes_may_use += num_bytes; 3188 3189 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 3190 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 3191 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 3192 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; 3193 3194 if (used > meta_sinfo->total_bytes) { 3195 retries++; 3196 if (retries == 1) { 3197 if (maybe_allocate_chunk(root, meta_sinfo)) 3198 goto again; 3199 retries++; 3200 } else { 3201 spin_unlock(&meta_sinfo->lock); 3202 } 3203 3204 if (retries == 2) { 3205 flush_delalloc(root, meta_sinfo); 3206 goto again; 3207 } 3208 spin_lock(&meta_sinfo->lock); 3209 meta_sinfo->bytes_may_use -= num_bytes; 3210 spin_unlock(&meta_sinfo->lock); 3211 3212 dump_space_info(meta_sinfo, 0, 0); 3213 return -ENOSPC; 3214 } 3215 3216 check_force_delalloc(meta_sinfo); 3217 spin_unlock(&meta_sinfo->lock); 3218 3219 return 0; 3220 } 3221 3222 /* 3223 * This will check the space that the inode allocates from to make sure we have 3224 * enough space for bytes. 3225 */ 3226 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 3227 u64 bytes) 3228 { 3229 struct btrfs_space_info *data_sinfo; 3230 int ret = 0, committed = 0; 3231 3232 /* make sure bytes are sectorsize aligned */ 3233 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3234 3235 data_sinfo = BTRFS_I(inode)->space_info; 3236 if (!data_sinfo) 3237 goto alloc; 3238 3239 again: 3240 /* make sure we have enough space to handle the data first */ 3241 spin_lock(&data_sinfo->lock); 3242 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 3243 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 3244 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 3245 data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { 3246 struct btrfs_trans_handle *trans; 3247 3248 /* 3249 * if we don't have enough free bytes in this space then we need 3250 * to alloc a new chunk. 3251 */ 3252 if (!data_sinfo->full) { 3253 u64 alloc_target; 3254 3255 data_sinfo->force_alloc = 1; 3256 spin_unlock(&data_sinfo->lock); 3257 alloc: 3258 alloc_target = btrfs_get_alloc_profile(root, 1); 3259 trans = btrfs_start_transaction(root, 1); 3260 if (!trans) 3261 return -ENOMEM; 3262 3263 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3264 bytes + 2 * 1024 * 1024, 3265 alloc_target, 0); 3266 btrfs_end_transaction(trans, root); 3267 if (ret) 3268 return ret; 3269 3270 if (!data_sinfo) { 3271 btrfs_set_inode_space_info(root, inode); 3272 data_sinfo = BTRFS_I(inode)->space_info; 3273 } 3274 goto again; 3275 } 3276 spin_unlock(&data_sinfo->lock); 3277 3278 /* commit the current transaction and try again */ 3279 if (!committed && !root->fs_info->open_ioctl_trans) { 3280 committed = 1; 3281 trans = btrfs_join_transaction(root, 1); 3282 if (!trans) 3283 return -ENOMEM; 3284 ret = btrfs_commit_transaction(trans, root); 3285 if (ret) 3286 return ret; 3287 goto again; 3288 } 3289 3290 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 3291 ", %llu bytes_used, %llu bytes_reserved, " 3292 "%llu bytes_pinned, %llu bytes_readonly, %llu may use " 3293 "%llu total\n", (unsigned long long)bytes, 3294 (unsigned long long)data_sinfo->bytes_delalloc, 3295 (unsigned long long)data_sinfo->bytes_used, 3296 (unsigned long long)data_sinfo->bytes_reserved, 3297 (unsigned long long)data_sinfo->bytes_pinned, 3298 (unsigned long long)data_sinfo->bytes_readonly, 3299 (unsigned long long)data_sinfo->bytes_may_use, 3300 (unsigned long long)data_sinfo->total_bytes); 3301 return -ENOSPC; 3302 } 3303 data_sinfo->bytes_may_use += bytes; 3304 BTRFS_I(inode)->reserved_bytes += bytes; 3305 spin_unlock(&data_sinfo->lock); 3306 3307 return 0; 3308 } 3309 3310 /* 3311 * if there was an error for whatever reason after calling 3312 * btrfs_check_data_free_space, call this so we can cleanup the counters. 3313 */ 3314 void btrfs_free_reserved_data_space(struct btrfs_root *root, 3315 struct inode *inode, u64 bytes) 3316 { 3317 struct btrfs_space_info *data_sinfo; 3318 3319 /* make sure bytes are sectorsize aligned */ 3320 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3321 3322 data_sinfo = BTRFS_I(inode)->space_info; 3323 spin_lock(&data_sinfo->lock); 3324 data_sinfo->bytes_may_use -= bytes; 3325 BTRFS_I(inode)->reserved_bytes -= bytes; 3326 spin_unlock(&data_sinfo->lock); 3327 } 3328 3329 /* called when we are adding a delalloc extent to the inode's io_tree */ 3330 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, 3331 u64 bytes) 3332 { 3333 struct btrfs_space_info *data_sinfo; 3334 3335 /* get the space info for where this inode will be storing its data */ 3336 data_sinfo = BTRFS_I(inode)->space_info; 3337 3338 /* make sure we have enough space to handle the data first */ 3339 spin_lock(&data_sinfo->lock); 3340 data_sinfo->bytes_delalloc += bytes; 3341 3342 /* 3343 * we are adding a delalloc extent without calling 3344 * btrfs_check_data_free_space first. This happens on a weird 3345 * writepage condition, but shouldn't hurt our accounting 3346 */ 3347 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { 3348 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; 3349 BTRFS_I(inode)->reserved_bytes = 0; 3350 } else { 3351 data_sinfo->bytes_may_use -= bytes; 3352 BTRFS_I(inode)->reserved_bytes -= bytes; 3353 } 3354 3355 spin_unlock(&data_sinfo->lock); 3356 } 3357 3358 /* called when we are clearing an delalloc extent from the inode's io_tree */ 3359 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 3360 u64 bytes) 3361 { 3362 struct btrfs_space_info *info; 3363 3364 info = BTRFS_I(inode)->space_info; 3365 3366 spin_lock(&info->lock); 3367 info->bytes_delalloc -= bytes; 3368 spin_unlock(&info->lock); 3369 } 3370 3371 static void force_metadata_allocation(struct btrfs_fs_info *info) 3372 { 3373 struct list_head *head = &info->space_info; 3374 struct btrfs_space_info *found; 3375 3376 rcu_read_lock(); 3377 list_for_each_entry_rcu(found, head, list) { 3378 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3379 found->force_alloc = 1; 3380 } 3381 rcu_read_unlock(); 3382 } 3383 3384 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3385 struct btrfs_root *extent_root, u64 alloc_bytes, 3386 u64 flags, int force) 3387 { 3388 struct btrfs_space_info *space_info; 3389 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3390 u64 thresh; 3391 int ret = 0; 3392 3393 mutex_lock(&fs_info->chunk_mutex); 3394 3395 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3396 3397 space_info = __find_space_info(extent_root->fs_info, flags); 3398 if (!space_info) { 3399 ret = update_space_info(extent_root->fs_info, flags, 3400 0, 0, &space_info); 3401 BUG_ON(ret); 3402 } 3403 BUG_ON(!space_info); 3404 3405 spin_lock(&space_info->lock); 3406 if (space_info->force_alloc) 3407 force = 1; 3408 if (space_info->full) { 3409 spin_unlock(&space_info->lock); 3410 goto out; 3411 } 3412 3413 thresh = space_info->total_bytes - space_info->bytes_readonly; 3414 thresh = div_factor(thresh, 8); 3415 if (!force && 3416 (space_info->bytes_used + space_info->bytes_pinned + 3417 space_info->bytes_reserved + alloc_bytes) < thresh) { 3418 spin_unlock(&space_info->lock); 3419 goto out; 3420 } 3421 spin_unlock(&space_info->lock); 3422 3423 /* 3424 * if we're doing a data chunk, go ahead and make sure that 3425 * we keep a reasonable number of metadata chunks allocated in the 3426 * FS as well. 3427 */ 3428 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { 3429 fs_info->data_chunk_allocations++; 3430 if (!(fs_info->data_chunk_allocations % 3431 fs_info->metadata_ratio)) 3432 force_metadata_allocation(fs_info); 3433 } 3434 3435 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3436 spin_lock(&space_info->lock); 3437 if (ret) 3438 space_info->full = 1; 3439 space_info->force_alloc = 0; 3440 spin_unlock(&space_info->lock); 3441 out: 3442 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3443 return ret; 3444 } 3445 3446 static int update_block_group(struct btrfs_trans_handle *trans, 3447 struct btrfs_root *root, 3448 u64 bytenr, u64 num_bytes, int alloc, 3449 int mark_free) 3450 { 3451 struct btrfs_block_group_cache *cache; 3452 struct btrfs_fs_info *info = root->fs_info; 3453 u64 total = num_bytes; 3454 u64 old_val; 3455 u64 byte_in_group; 3456 3457 /* block accounting for super block */ 3458 spin_lock(&info->delalloc_lock); 3459 old_val = btrfs_super_bytes_used(&info->super_copy); 3460 if (alloc) 3461 old_val += num_bytes; 3462 else 3463 old_val -= num_bytes; 3464 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3465 spin_unlock(&info->delalloc_lock); 3466 3467 while (total) { 3468 cache = btrfs_lookup_block_group(info, bytenr); 3469 if (!cache) 3470 return -1; 3471 byte_in_group = bytenr - cache->key.objectid; 3472 WARN_ON(byte_in_group > cache->key.offset); 3473 3474 spin_lock(&cache->space_info->lock); 3475 spin_lock(&cache->lock); 3476 cache->dirty = 1; 3477 old_val = btrfs_block_group_used(&cache->item); 3478 num_bytes = min(total, cache->key.offset - byte_in_group); 3479 if (alloc) { 3480 old_val += num_bytes; 3481 btrfs_set_block_group_used(&cache->item, old_val); 3482 cache->reserved -= num_bytes; 3483 cache->space_info->bytes_used += num_bytes; 3484 cache->space_info->bytes_reserved -= num_bytes; 3485 if (cache->ro) 3486 cache->space_info->bytes_readonly -= num_bytes; 3487 spin_unlock(&cache->lock); 3488 spin_unlock(&cache->space_info->lock); 3489 } else { 3490 old_val -= num_bytes; 3491 cache->space_info->bytes_used -= num_bytes; 3492 if (cache->ro) 3493 cache->space_info->bytes_readonly += num_bytes; 3494 btrfs_set_block_group_used(&cache->item, old_val); 3495 spin_unlock(&cache->lock); 3496 spin_unlock(&cache->space_info->lock); 3497 if (mark_free) { 3498 int ret; 3499 3500 ret = btrfs_discard_extent(root, bytenr, 3501 num_bytes); 3502 WARN_ON(ret); 3503 3504 ret = btrfs_add_free_space(cache, bytenr, 3505 num_bytes); 3506 WARN_ON(ret); 3507 } 3508 } 3509 btrfs_put_block_group(cache); 3510 total -= num_bytes; 3511 bytenr += num_bytes; 3512 } 3513 return 0; 3514 } 3515 3516 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 3517 { 3518 struct btrfs_block_group_cache *cache; 3519 u64 bytenr; 3520 3521 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 3522 if (!cache) 3523 return 0; 3524 3525 bytenr = cache->key.objectid; 3526 btrfs_put_block_group(cache); 3527 3528 return bytenr; 3529 } 3530 3531 /* 3532 * this function must be called within transaction 3533 */ 3534 int btrfs_pin_extent(struct btrfs_root *root, 3535 u64 bytenr, u64 num_bytes, int reserved) 3536 { 3537 struct btrfs_fs_info *fs_info = root->fs_info; 3538 struct btrfs_block_group_cache *cache; 3539 3540 cache = btrfs_lookup_block_group(fs_info, bytenr); 3541 BUG_ON(!cache); 3542 3543 spin_lock(&cache->space_info->lock); 3544 spin_lock(&cache->lock); 3545 cache->pinned += num_bytes; 3546 cache->space_info->bytes_pinned += num_bytes; 3547 if (reserved) { 3548 cache->reserved -= num_bytes; 3549 cache->space_info->bytes_reserved -= num_bytes; 3550 } 3551 spin_unlock(&cache->lock); 3552 spin_unlock(&cache->space_info->lock); 3553 3554 btrfs_put_block_group(cache); 3555 3556 set_extent_dirty(fs_info->pinned_extents, 3557 bytenr, bytenr + num_bytes - 1, GFP_NOFS); 3558 return 0; 3559 } 3560 3561 static int update_reserved_extents(struct btrfs_block_group_cache *cache, 3562 u64 num_bytes, int reserve) 3563 { 3564 spin_lock(&cache->space_info->lock); 3565 spin_lock(&cache->lock); 3566 if (reserve) { 3567 cache->reserved += num_bytes; 3568 cache->space_info->bytes_reserved += num_bytes; 3569 } else { 3570 cache->reserved -= num_bytes; 3571 cache->space_info->bytes_reserved -= num_bytes; 3572 } 3573 spin_unlock(&cache->lock); 3574 spin_unlock(&cache->space_info->lock); 3575 return 0; 3576 } 3577 3578 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 3579 struct btrfs_root *root) 3580 { 3581 struct btrfs_fs_info *fs_info = root->fs_info; 3582 struct btrfs_caching_control *next; 3583 struct btrfs_caching_control *caching_ctl; 3584 struct btrfs_block_group_cache *cache; 3585 3586 down_write(&fs_info->extent_commit_sem); 3587 3588 list_for_each_entry_safe(caching_ctl, next, 3589 &fs_info->caching_block_groups, list) { 3590 cache = caching_ctl->block_group; 3591 if (block_group_cache_done(cache)) { 3592 cache->last_byte_to_unpin = (u64)-1; 3593 list_del_init(&caching_ctl->list); 3594 put_caching_control(caching_ctl); 3595 } else { 3596 cache->last_byte_to_unpin = caching_ctl->progress; 3597 } 3598 } 3599 3600 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 3601 fs_info->pinned_extents = &fs_info->freed_extents[1]; 3602 else 3603 fs_info->pinned_extents = &fs_info->freed_extents[0]; 3604 3605 up_write(&fs_info->extent_commit_sem); 3606 return 0; 3607 } 3608 3609 static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 3610 { 3611 struct btrfs_fs_info *fs_info = root->fs_info; 3612 struct btrfs_block_group_cache *cache = NULL; 3613 u64 len; 3614 3615 while (start <= end) { 3616 if (!cache || 3617 start >= cache->key.objectid + cache->key.offset) { 3618 if (cache) 3619 btrfs_put_block_group(cache); 3620 cache = btrfs_lookup_block_group(fs_info, start); 3621 BUG_ON(!cache); 3622 } 3623 3624 len = cache->key.objectid + cache->key.offset - start; 3625 len = min(len, end + 1 - start); 3626 3627 if (start < cache->last_byte_to_unpin) { 3628 len = min(len, cache->last_byte_to_unpin - start); 3629 btrfs_add_free_space(cache, start, len); 3630 } 3631 3632 spin_lock(&cache->space_info->lock); 3633 spin_lock(&cache->lock); 3634 cache->pinned -= len; 3635 cache->space_info->bytes_pinned -= len; 3636 spin_unlock(&cache->lock); 3637 spin_unlock(&cache->space_info->lock); 3638 3639 start += len; 3640 } 3641 3642 if (cache) 3643 btrfs_put_block_group(cache); 3644 return 0; 3645 } 3646 3647 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3648 struct btrfs_root *root) 3649 { 3650 struct btrfs_fs_info *fs_info = root->fs_info; 3651 struct extent_io_tree *unpin; 3652 u64 start; 3653 u64 end; 3654 int ret; 3655 3656 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 3657 unpin = &fs_info->freed_extents[1]; 3658 else 3659 unpin = &fs_info->freed_extents[0]; 3660 3661 while (1) { 3662 ret = find_first_extent_bit(unpin, 0, &start, &end, 3663 EXTENT_DIRTY); 3664 if (ret) 3665 break; 3666 3667 ret = btrfs_discard_extent(root, start, end + 1 - start); 3668 3669 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3670 unpin_extent_range(root, start, end); 3671 cond_resched(); 3672 } 3673 3674 return ret; 3675 } 3676 3677 static int pin_down_bytes(struct btrfs_trans_handle *trans, 3678 struct btrfs_root *root, 3679 struct btrfs_path *path, 3680 u64 bytenr, u64 num_bytes, 3681 int is_data, int reserved, 3682 struct extent_buffer **must_clean) 3683 { 3684 int err = 0; 3685 struct extent_buffer *buf; 3686 3687 if (is_data) 3688 goto pinit; 3689 3690 /* 3691 * discard is sloooow, and so triggering discards on 3692 * individual btree blocks isn't a good plan. Just 3693 * pin everything in discard mode. 3694 */ 3695 if (btrfs_test_opt(root, DISCARD)) 3696 goto pinit; 3697 3698 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3699 if (!buf) 3700 goto pinit; 3701 3702 /* we can reuse a block if it hasn't been written 3703 * and it is from this transaction. We can't 3704 * reuse anything from the tree log root because 3705 * it has tiny sub-transactions. 3706 */ 3707 if (btrfs_buffer_uptodate(buf, 0) && 3708 btrfs_try_tree_lock(buf)) { 3709 u64 header_owner = btrfs_header_owner(buf); 3710 u64 header_transid = btrfs_header_generation(buf); 3711 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3712 header_transid == trans->transid && 3713 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3714 *must_clean = buf; 3715 return 1; 3716 } 3717 btrfs_tree_unlock(buf); 3718 } 3719 free_extent_buffer(buf); 3720 pinit: 3721 if (path) 3722 btrfs_set_path_blocking(path); 3723 /* unlocks the pinned mutex */ 3724 btrfs_pin_extent(root, bytenr, num_bytes, reserved); 3725 3726 BUG_ON(err < 0); 3727 return 0; 3728 } 3729 3730 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3731 struct btrfs_root *root, 3732 u64 bytenr, u64 num_bytes, u64 parent, 3733 u64 root_objectid, u64 owner_objectid, 3734 u64 owner_offset, int refs_to_drop, 3735 struct btrfs_delayed_extent_op *extent_op) 3736 { 3737 struct btrfs_key key; 3738 struct btrfs_path *path; 3739 struct btrfs_fs_info *info = root->fs_info; 3740 struct btrfs_root *extent_root = info->extent_root; 3741 struct extent_buffer *leaf; 3742 struct btrfs_extent_item *ei; 3743 struct btrfs_extent_inline_ref *iref; 3744 int ret; 3745 int is_data; 3746 int extent_slot = 0; 3747 int found_extent = 0; 3748 int num_to_del = 1; 3749 u32 item_size; 3750 u64 refs; 3751 3752 path = btrfs_alloc_path(); 3753 if (!path) 3754 return -ENOMEM; 3755 3756 path->reada = 1; 3757 path->leave_spinning = 1; 3758 3759 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 3760 BUG_ON(!is_data && refs_to_drop != 1); 3761 3762 ret = lookup_extent_backref(trans, extent_root, path, &iref, 3763 bytenr, num_bytes, parent, 3764 root_objectid, owner_objectid, 3765 owner_offset); 3766 if (ret == 0) { 3767 extent_slot = path->slots[0]; 3768 while (extent_slot >= 0) { 3769 btrfs_item_key_to_cpu(path->nodes[0], &key, 3770 extent_slot); 3771 if (key.objectid != bytenr) 3772 break; 3773 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3774 key.offset == num_bytes) { 3775 found_extent = 1; 3776 break; 3777 } 3778 if (path->slots[0] - extent_slot > 5) 3779 break; 3780 extent_slot--; 3781 } 3782 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3783 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 3784 if (found_extent && item_size < sizeof(*ei)) 3785 found_extent = 0; 3786 #endif 3787 if (!found_extent) { 3788 BUG_ON(iref); 3789 ret = remove_extent_backref(trans, extent_root, path, 3790 NULL, refs_to_drop, 3791 is_data); 3792 BUG_ON(ret); 3793 btrfs_release_path(extent_root, path); 3794 path->leave_spinning = 1; 3795 3796 key.objectid = bytenr; 3797 key.type = BTRFS_EXTENT_ITEM_KEY; 3798 key.offset = num_bytes; 3799 3800 ret = btrfs_search_slot(trans, extent_root, 3801 &key, path, -1, 1); 3802 if (ret) { 3803 printk(KERN_ERR "umm, got %d back from search" 3804 ", was looking for %llu\n", ret, 3805 (unsigned long long)bytenr); 3806 btrfs_print_leaf(extent_root, path->nodes[0]); 3807 } 3808 BUG_ON(ret); 3809 extent_slot = path->slots[0]; 3810 } 3811 } else { 3812 btrfs_print_leaf(extent_root, path->nodes[0]); 3813 WARN_ON(1); 3814 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3815 "parent %llu root %llu owner %llu offset %llu\n", 3816 (unsigned long long)bytenr, 3817 (unsigned long long)parent, 3818 (unsigned long long)root_objectid, 3819 (unsigned long long)owner_objectid, 3820 (unsigned long long)owner_offset); 3821 } 3822 3823 leaf = path->nodes[0]; 3824 item_size = btrfs_item_size_nr(leaf, extent_slot); 3825 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3826 if (item_size < sizeof(*ei)) { 3827 BUG_ON(found_extent || extent_slot != path->slots[0]); 3828 ret = convert_extent_item_v0(trans, extent_root, path, 3829 owner_objectid, 0); 3830 BUG_ON(ret < 0); 3831 3832 btrfs_release_path(extent_root, path); 3833 path->leave_spinning = 1; 3834 3835 key.objectid = bytenr; 3836 key.type = BTRFS_EXTENT_ITEM_KEY; 3837 key.offset = num_bytes; 3838 3839 ret = btrfs_search_slot(trans, extent_root, &key, path, 3840 -1, 1); 3841 if (ret) { 3842 printk(KERN_ERR "umm, got %d back from search" 3843 ", was looking for %llu\n", ret, 3844 (unsigned long long)bytenr); 3845 btrfs_print_leaf(extent_root, path->nodes[0]); 3846 } 3847 BUG_ON(ret); 3848 extent_slot = path->slots[0]; 3849 leaf = path->nodes[0]; 3850 item_size = btrfs_item_size_nr(leaf, extent_slot); 3851 } 3852 #endif 3853 BUG_ON(item_size < sizeof(*ei)); 3854 ei = btrfs_item_ptr(leaf, extent_slot, 3855 struct btrfs_extent_item); 3856 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3857 struct btrfs_tree_block_info *bi; 3858 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 3859 bi = (struct btrfs_tree_block_info *)(ei + 1); 3860 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 3861 } 3862 3863 refs = btrfs_extent_refs(leaf, ei); 3864 BUG_ON(refs < refs_to_drop); 3865 refs -= refs_to_drop; 3866 3867 if (refs > 0) { 3868 if (extent_op) 3869 __run_delayed_extent_op(extent_op, leaf, ei); 3870 /* 3871 * In the case of inline back ref, reference count will 3872 * be updated by remove_extent_backref 3873 */ 3874 if (iref) { 3875 BUG_ON(!found_extent); 3876 } else { 3877 btrfs_set_extent_refs(leaf, ei, refs); 3878 btrfs_mark_buffer_dirty(leaf); 3879 } 3880 if (found_extent) { 3881 ret = remove_extent_backref(trans, extent_root, path, 3882 iref, refs_to_drop, 3883 is_data); 3884 BUG_ON(ret); 3885 } 3886 } else { 3887 int mark_free = 0; 3888 struct extent_buffer *must_clean = NULL; 3889 3890 if (found_extent) { 3891 BUG_ON(is_data && refs_to_drop != 3892 extent_data_ref_count(root, path, iref)); 3893 if (iref) { 3894 BUG_ON(path->slots[0] != extent_slot); 3895 } else { 3896 BUG_ON(path->slots[0] != extent_slot + 1); 3897 path->slots[0] = extent_slot; 3898 num_to_del = 2; 3899 } 3900 } 3901 3902 ret = pin_down_bytes(trans, root, path, bytenr, 3903 num_bytes, is_data, 0, &must_clean); 3904 if (ret > 0) 3905 mark_free = 1; 3906 BUG_ON(ret < 0); 3907 /* 3908 * it is going to be very rare for someone to be waiting 3909 * on the block we're freeing. del_items might need to 3910 * schedule, so rather than get fancy, just force it 3911 * to blocking here 3912 */ 3913 if (must_clean) 3914 btrfs_set_lock_blocking(must_clean); 3915 3916 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3917 num_to_del); 3918 BUG_ON(ret); 3919 btrfs_release_path(extent_root, path); 3920 3921 if (must_clean) { 3922 clean_tree_block(NULL, root, must_clean); 3923 btrfs_tree_unlock(must_clean); 3924 free_extent_buffer(must_clean); 3925 } 3926 3927 if (is_data) { 3928 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3929 BUG_ON(ret); 3930 } else { 3931 invalidate_mapping_pages(info->btree_inode->i_mapping, 3932 bytenr >> PAGE_CACHE_SHIFT, 3933 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 3934 } 3935 3936 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 3937 mark_free); 3938 BUG_ON(ret); 3939 } 3940 btrfs_free_path(path); 3941 return ret; 3942 } 3943 3944 /* 3945 * when we free an extent, it is possible (and likely) that we free the last 3946 * delayed ref for that extent as well. This searches the delayed ref tree for 3947 * a given extent, and if there are no other delayed refs to be processed, it 3948 * removes it from the tree. 3949 */ 3950 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 3951 struct btrfs_root *root, u64 bytenr) 3952 { 3953 struct btrfs_delayed_ref_head *head; 3954 struct btrfs_delayed_ref_root *delayed_refs; 3955 struct btrfs_delayed_ref_node *ref; 3956 struct rb_node *node; 3957 int ret; 3958 3959 delayed_refs = &trans->transaction->delayed_refs; 3960 spin_lock(&delayed_refs->lock); 3961 head = btrfs_find_delayed_ref_head(trans, bytenr); 3962 if (!head) 3963 goto out; 3964 3965 node = rb_prev(&head->node.rb_node); 3966 if (!node) 3967 goto out; 3968 3969 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3970 3971 /* there are still entries for this ref, we can't drop it */ 3972 if (ref->bytenr == bytenr) 3973 goto out; 3974 3975 if (head->extent_op) { 3976 if (!head->must_insert_reserved) 3977 goto out; 3978 kfree(head->extent_op); 3979 head->extent_op = NULL; 3980 } 3981 3982 /* 3983 * waiting for the lock here would deadlock. If someone else has it 3984 * locked they are already in the process of dropping it anyway 3985 */ 3986 if (!mutex_trylock(&head->mutex)) 3987 goto out; 3988 3989 /* 3990 * at this point we have a head with no other entries. Go 3991 * ahead and process it. 3992 */ 3993 head->node.in_tree = 0; 3994 rb_erase(&head->node.rb_node, &delayed_refs->root); 3995 3996 delayed_refs->num_entries--; 3997 3998 /* 3999 * we don't take a ref on the node because we're removing it from the 4000 * tree, so we just steal the ref the tree was holding. 4001 */ 4002 delayed_refs->num_heads--; 4003 if (list_empty(&head->cluster)) 4004 delayed_refs->num_heads_ready--; 4005 4006 list_del_init(&head->cluster); 4007 spin_unlock(&delayed_refs->lock); 4008 4009 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 4010 &head->node, head->extent_op, 4011 head->must_insert_reserved); 4012 BUG_ON(ret); 4013 btrfs_put_delayed_ref(&head->node); 4014 return 0; 4015 out: 4016 spin_unlock(&delayed_refs->lock); 4017 return 0; 4018 } 4019 4020 int btrfs_free_extent(struct btrfs_trans_handle *trans, 4021 struct btrfs_root *root, 4022 u64 bytenr, u64 num_bytes, u64 parent, 4023 u64 root_objectid, u64 owner, u64 offset) 4024 { 4025 int ret; 4026 4027 /* 4028 * tree log blocks never actually go into the extent allocation 4029 * tree, just update pinning info and exit early. 4030 */ 4031 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 4032 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 4033 /* unlocks the pinned mutex */ 4034 btrfs_pin_extent(root, bytenr, num_bytes, 1); 4035 ret = 0; 4036 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 4037 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 4038 parent, root_objectid, (int)owner, 4039 BTRFS_DROP_DELAYED_REF, NULL); 4040 BUG_ON(ret); 4041 ret = check_ref_cleanup(trans, root, bytenr); 4042 BUG_ON(ret); 4043 } else { 4044 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 4045 parent, root_objectid, owner, 4046 offset, BTRFS_DROP_DELAYED_REF, NULL); 4047 BUG_ON(ret); 4048 } 4049 return ret; 4050 } 4051 4052 int btrfs_free_tree_block(struct btrfs_trans_handle *trans, 4053 struct btrfs_root *root, 4054 u64 bytenr, u32 blocksize, 4055 u64 parent, u64 root_objectid, int level) 4056 { 4057 u64 used; 4058 spin_lock(&root->node_lock); 4059 used = btrfs_root_used(&root->root_item) - blocksize; 4060 btrfs_set_root_used(&root->root_item, used); 4061 spin_unlock(&root->node_lock); 4062 4063 return btrfs_free_extent(trans, root, bytenr, blocksize, 4064 parent, root_objectid, level, 0); 4065 } 4066 4067 static u64 stripe_align(struct btrfs_root *root, u64 val) 4068 { 4069 u64 mask = ((u64)root->stripesize - 1); 4070 u64 ret = (val + mask) & ~mask; 4071 return ret; 4072 } 4073 4074 /* 4075 * when we wait for progress in the block group caching, its because 4076 * our allocation attempt failed at least once. So, we must sleep 4077 * and let some progress happen before we try again. 4078 * 4079 * This function will sleep at least once waiting for new free space to 4080 * show up, and then it will check the block group free space numbers 4081 * for our min num_bytes. Another option is to have it go ahead 4082 * and look in the rbtree for a free extent of a given size, but this 4083 * is a good start. 4084 */ 4085 static noinline int 4086 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 4087 u64 num_bytes) 4088 { 4089 struct btrfs_caching_control *caching_ctl; 4090 DEFINE_WAIT(wait); 4091 4092 caching_ctl = get_caching_control(cache); 4093 if (!caching_ctl) 4094 return 0; 4095 4096 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4097 (cache->free_space >= num_bytes)); 4098 4099 put_caching_control(caching_ctl); 4100 return 0; 4101 } 4102 4103 static noinline int 4104 wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 4105 { 4106 struct btrfs_caching_control *caching_ctl; 4107 DEFINE_WAIT(wait); 4108 4109 caching_ctl = get_caching_control(cache); 4110 if (!caching_ctl) 4111 return 0; 4112 4113 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 4114 4115 put_caching_control(caching_ctl); 4116 return 0; 4117 } 4118 4119 enum btrfs_loop_type { 4120 LOOP_FIND_IDEAL = 0, 4121 LOOP_CACHING_NOWAIT = 1, 4122 LOOP_CACHING_WAIT = 2, 4123 LOOP_ALLOC_CHUNK = 3, 4124 LOOP_NO_EMPTY_SIZE = 4, 4125 }; 4126 4127 /* 4128 * walks the btree of allocated extents and find a hole of a given size. 4129 * The key ins is changed to record the hole: 4130 * ins->objectid == block start 4131 * ins->flags = BTRFS_EXTENT_ITEM_KEY 4132 * ins->offset == number of blocks 4133 * Any available blocks before search_start are skipped. 4134 */ 4135 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 4136 struct btrfs_root *orig_root, 4137 u64 num_bytes, u64 empty_size, 4138 u64 search_start, u64 search_end, 4139 u64 hint_byte, struct btrfs_key *ins, 4140 u64 exclude_start, u64 exclude_nr, 4141 int data) 4142 { 4143 int ret = 0; 4144 struct btrfs_root *root = orig_root->fs_info->extent_root; 4145 struct btrfs_free_cluster *last_ptr = NULL; 4146 struct btrfs_block_group_cache *block_group = NULL; 4147 int empty_cluster = 2 * 1024 * 1024; 4148 int allowed_chunk_alloc = 0; 4149 int done_chunk_alloc = 0; 4150 struct btrfs_space_info *space_info; 4151 int last_ptr_loop = 0; 4152 int loop = 0; 4153 bool found_uncached_bg = false; 4154 bool failed_cluster_refill = false; 4155 bool failed_alloc = false; 4156 u64 ideal_cache_percent = 0; 4157 u64 ideal_cache_offset = 0; 4158 4159 WARN_ON(num_bytes < root->sectorsize); 4160 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4161 ins->objectid = 0; 4162 ins->offset = 0; 4163 4164 space_info = __find_space_info(root->fs_info, data); 4165 4166 if (orig_root->ref_cows || empty_size) 4167 allowed_chunk_alloc = 1; 4168 4169 if (data & BTRFS_BLOCK_GROUP_METADATA) { 4170 last_ptr = &root->fs_info->meta_alloc_cluster; 4171 if (!btrfs_test_opt(root, SSD)) 4172 empty_cluster = 64 * 1024; 4173 } 4174 4175 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4176 last_ptr = &root->fs_info->data_alloc_cluster; 4177 } 4178 4179 if (last_ptr) { 4180 spin_lock(&last_ptr->lock); 4181 if (last_ptr->block_group) 4182 hint_byte = last_ptr->window_start; 4183 spin_unlock(&last_ptr->lock); 4184 } 4185 4186 search_start = max(search_start, first_logical_byte(root, 0)); 4187 search_start = max(search_start, hint_byte); 4188 4189 if (!last_ptr) 4190 empty_cluster = 0; 4191 4192 if (search_start == hint_byte) { 4193 ideal_cache: 4194 block_group = btrfs_lookup_block_group(root->fs_info, 4195 search_start); 4196 /* 4197 * we don't want to use the block group if it doesn't match our 4198 * allocation bits, or if its not cached. 4199 * 4200 * However if we are re-searching with an ideal block group 4201 * picked out then we don't care that the block group is cached. 4202 */ 4203 if (block_group && block_group_bits(block_group, data) && 4204 (block_group->cached != BTRFS_CACHE_NO || 4205 search_start == ideal_cache_offset)) { 4206 down_read(&space_info->groups_sem); 4207 if (list_empty(&block_group->list) || 4208 block_group->ro) { 4209 /* 4210 * someone is removing this block group, 4211 * we can't jump into the have_block_group 4212 * target because our list pointers are not 4213 * valid 4214 */ 4215 btrfs_put_block_group(block_group); 4216 up_read(&space_info->groups_sem); 4217 } else { 4218 goto have_block_group; 4219 } 4220 } else if (block_group) { 4221 btrfs_put_block_group(block_group); 4222 } 4223 } 4224 search: 4225 down_read(&space_info->groups_sem); 4226 list_for_each_entry(block_group, &space_info->block_groups, list) { 4227 u64 offset; 4228 int cached; 4229 4230 atomic_inc(&block_group->count); 4231 search_start = block_group->key.objectid; 4232 4233 have_block_group: 4234 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4235 u64 free_percent; 4236 4237 free_percent = btrfs_block_group_used(&block_group->item); 4238 free_percent *= 100; 4239 free_percent = div64_u64(free_percent, 4240 block_group->key.offset); 4241 free_percent = 100 - free_percent; 4242 if (free_percent > ideal_cache_percent && 4243 likely(!block_group->ro)) { 4244 ideal_cache_offset = block_group->key.objectid; 4245 ideal_cache_percent = free_percent; 4246 } 4247 4248 /* 4249 * We only want to start kthread caching if we are at 4250 * the point where we will wait for caching to make 4251 * progress, or if our ideal search is over and we've 4252 * found somebody to start caching. 4253 */ 4254 if (loop > LOOP_CACHING_NOWAIT || 4255 (loop > LOOP_FIND_IDEAL && 4256 atomic_read(&space_info->caching_threads) < 2)) { 4257 ret = cache_block_group(block_group); 4258 BUG_ON(ret); 4259 } 4260 found_uncached_bg = true; 4261 4262 /* 4263 * If loop is set for cached only, try the next block 4264 * group. 4265 */ 4266 if (loop == LOOP_FIND_IDEAL) 4267 goto loop; 4268 } 4269 4270 cached = block_group_cache_done(block_group); 4271 if (unlikely(!cached)) 4272 found_uncached_bg = true; 4273 4274 if (unlikely(block_group->ro)) 4275 goto loop; 4276 4277 /* 4278 * Ok we want to try and use the cluster allocator, so lets look 4279 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 4280 * have tried the cluster allocator plenty of times at this 4281 * point and not have found anything, so we are likely way too 4282 * fragmented for the clustering stuff to find anything, so lets 4283 * just skip it and let the allocator find whatever block it can 4284 * find 4285 */ 4286 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 4287 /* 4288 * the refill lock keeps out other 4289 * people trying to start a new cluster 4290 */ 4291 spin_lock(&last_ptr->refill_lock); 4292 if (last_ptr->block_group && 4293 (last_ptr->block_group->ro || 4294 !block_group_bits(last_ptr->block_group, data))) { 4295 offset = 0; 4296 goto refill_cluster; 4297 } 4298 4299 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 4300 num_bytes, search_start); 4301 if (offset) { 4302 /* we have a block, we're done */ 4303 spin_unlock(&last_ptr->refill_lock); 4304 goto checks; 4305 } 4306 4307 spin_lock(&last_ptr->lock); 4308 /* 4309 * whoops, this cluster doesn't actually point to 4310 * this block group. Get a ref on the block 4311 * group is does point to and try again 4312 */ 4313 if (!last_ptr_loop && last_ptr->block_group && 4314 last_ptr->block_group != block_group) { 4315 4316 btrfs_put_block_group(block_group); 4317 block_group = last_ptr->block_group; 4318 atomic_inc(&block_group->count); 4319 spin_unlock(&last_ptr->lock); 4320 spin_unlock(&last_ptr->refill_lock); 4321 4322 last_ptr_loop = 1; 4323 search_start = block_group->key.objectid; 4324 /* 4325 * we know this block group is properly 4326 * in the list because 4327 * btrfs_remove_block_group, drops the 4328 * cluster before it removes the block 4329 * group from the list 4330 */ 4331 goto have_block_group; 4332 } 4333 spin_unlock(&last_ptr->lock); 4334 refill_cluster: 4335 /* 4336 * this cluster didn't work out, free it and 4337 * start over 4338 */ 4339 btrfs_return_cluster_to_free_space(NULL, last_ptr); 4340 4341 last_ptr_loop = 0; 4342 4343 /* allocate a cluster in this block group */ 4344 ret = btrfs_find_space_cluster(trans, root, 4345 block_group, last_ptr, 4346 offset, num_bytes, 4347 empty_cluster + empty_size); 4348 if (ret == 0) { 4349 /* 4350 * now pull our allocation out of this 4351 * cluster 4352 */ 4353 offset = btrfs_alloc_from_cluster(block_group, 4354 last_ptr, num_bytes, 4355 search_start); 4356 if (offset) { 4357 /* we found one, proceed */ 4358 spin_unlock(&last_ptr->refill_lock); 4359 goto checks; 4360 } 4361 } else if (!cached && loop > LOOP_CACHING_NOWAIT 4362 && !failed_cluster_refill) { 4363 spin_unlock(&last_ptr->refill_lock); 4364 4365 failed_cluster_refill = true; 4366 wait_block_group_cache_progress(block_group, 4367 num_bytes + empty_cluster + empty_size); 4368 goto have_block_group; 4369 } 4370 4371 /* 4372 * at this point we either didn't find a cluster 4373 * or we weren't able to allocate a block from our 4374 * cluster. Free the cluster we've been trying 4375 * to use, and go to the next block group 4376 */ 4377 btrfs_return_cluster_to_free_space(NULL, last_ptr); 4378 spin_unlock(&last_ptr->refill_lock); 4379 goto loop; 4380 } 4381 4382 offset = btrfs_find_space_for_alloc(block_group, search_start, 4383 num_bytes, empty_size); 4384 /* 4385 * If we didn't find a chunk, and we haven't failed on this 4386 * block group before, and this block group is in the middle of 4387 * caching and we are ok with waiting, then go ahead and wait 4388 * for progress to be made, and set failed_alloc to true. 4389 * 4390 * If failed_alloc is true then we've already waited on this 4391 * block group once and should move on to the next block group. 4392 */ 4393 if (!offset && !failed_alloc && !cached && 4394 loop > LOOP_CACHING_NOWAIT) { 4395 wait_block_group_cache_progress(block_group, 4396 num_bytes + empty_size); 4397 failed_alloc = true; 4398 goto have_block_group; 4399 } else if (!offset) { 4400 goto loop; 4401 } 4402 checks: 4403 search_start = stripe_align(root, offset); 4404 /* move on to the next group */ 4405 if (search_start + num_bytes >= search_end) { 4406 btrfs_add_free_space(block_group, offset, num_bytes); 4407 goto loop; 4408 } 4409 4410 /* move on to the next group */ 4411 if (search_start + num_bytes > 4412 block_group->key.objectid + block_group->key.offset) { 4413 btrfs_add_free_space(block_group, offset, num_bytes); 4414 goto loop; 4415 } 4416 4417 if (exclude_nr > 0 && 4418 (search_start + num_bytes > exclude_start && 4419 search_start < exclude_start + exclude_nr)) { 4420 search_start = exclude_start + exclude_nr; 4421 4422 btrfs_add_free_space(block_group, offset, num_bytes); 4423 /* 4424 * if search_start is still in this block group 4425 * then we just re-search this block group 4426 */ 4427 if (search_start >= block_group->key.objectid && 4428 search_start < (block_group->key.objectid + 4429 block_group->key.offset)) 4430 goto have_block_group; 4431 goto loop; 4432 } 4433 4434 ins->objectid = search_start; 4435 ins->offset = num_bytes; 4436 4437 if (offset < search_start) 4438 btrfs_add_free_space(block_group, offset, 4439 search_start - offset); 4440 BUG_ON(offset > search_start); 4441 4442 update_reserved_extents(block_group, num_bytes, 1); 4443 4444 /* we are all good, lets return */ 4445 break; 4446 loop: 4447 failed_cluster_refill = false; 4448 failed_alloc = false; 4449 btrfs_put_block_group(block_group); 4450 } 4451 up_read(&space_info->groups_sem); 4452 4453 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 4454 * for them to make caching progress. Also 4455 * determine the best possible bg to cache 4456 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 4457 * caching kthreads as we move along 4458 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 4459 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 4460 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 4461 * again 4462 */ 4463 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4464 (found_uncached_bg || empty_size || empty_cluster || 4465 allowed_chunk_alloc)) { 4466 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 4467 found_uncached_bg = false; 4468 loop++; 4469 if (!ideal_cache_percent && 4470 atomic_read(&space_info->caching_threads)) 4471 goto search; 4472 4473 /* 4474 * 1 of the following 2 things have happened so far 4475 * 4476 * 1) We found an ideal block group for caching that 4477 * is mostly full and will cache quickly, so we might 4478 * as well wait for it. 4479 * 4480 * 2) We searched for cached only and we didn't find 4481 * anything, and we didn't start any caching kthreads 4482 * either, so chances are we will loop through and 4483 * start a couple caching kthreads, and then come back 4484 * around and just wait for them. This will be slower 4485 * because we will have 2 caching kthreads reading at 4486 * the same time when we could have just started one 4487 * and waited for it to get far enough to give us an 4488 * allocation, so go ahead and go to the wait caching 4489 * loop. 4490 */ 4491 loop = LOOP_CACHING_WAIT; 4492 search_start = ideal_cache_offset; 4493 ideal_cache_percent = 0; 4494 goto ideal_cache; 4495 } else if (loop == LOOP_FIND_IDEAL) { 4496 /* 4497 * Didn't find a uncached bg, wait on anything we find 4498 * next. 4499 */ 4500 loop = LOOP_CACHING_WAIT; 4501 goto search; 4502 } 4503 4504 if (loop < LOOP_CACHING_WAIT) { 4505 loop++; 4506 goto search; 4507 } 4508 4509 if (loop == LOOP_ALLOC_CHUNK) { 4510 empty_size = 0; 4511 empty_cluster = 0; 4512 } 4513 4514 if (allowed_chunk_alloc) { 4515 ret = do_chunk_alloc(trans, root, num_bytes + 4516 2 * 1024 * 1024, data, 1); 4517 allowed_chunk_alloc = 0; 4518 done_chunk_alloc = 1; 4519 } else if (!done_chunk_alloc) { 4520 space_info->force_alloc = 1; 4521 } 4522 4523 if (loop < LOOP_NO_EMPTY_SIZE) { 4524 loop++; 4525 goto search; 4526 } 4527 ret = -ENOSPC; 4528 } else if (!ins->objectid) { 4529 ret = -ENOSPC; 4530 } 4531 4532 /* we found what we needed */ 4533 if (ins->objectid) { 4534 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 4535 trans->block_group = block_group->key.objectid; 4536 4537 btrfs_put_block_group(block_group); 4538 ret = 0; 4539 } 4540 4541 return ret; 4542 } 4543 4544 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 4545 int dump_block_groups) 4546 { 4547 struct btrfs_block_group_cache *cache; 4548 4549 spin_lock(&info->lock); 4550 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4551 (unsigned long long)(info->total_bytes - info->bytes_used - 4552 info->bytes_pinned - info->bytes_reserved - 4553 info->bytes_super), 4554 (info->full) ? "" : "not "); 4555 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4556 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" 4557 "\n", 4558 (unsigned long long)info->total_bytes, 4559 (unsigned long long)info->bytes_pinned, 4560 (unsigned long long)info->bytes_delalloc, 4561 (unsigned long long)info->bytes_may_use, 4562 (unsigned long long)info->bytes_used, 4563 (unsigned long long)info->bytes_root, 4564 (unsigned long long)info->bytes_super, 4565 (unsigned long long)info->bytes_reserved); 4566 spin_unlock(&info->lock); 4567 4568 if (!dump_block_groups) 4569 return; 4570 4571 down_read(&info->groups_sem); 4572 list_for_each_entry(cache, &info->block_groups, list) { 4573 spin_lock(&cache->lock); 4574 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 4575 "%llu pinned %llu reserved\n", 4576 (unsigned long long)cache->key.objectid, 4577 (unsigned long long)cache->key.offset, 4578 (unsigned long long)btrfs_block_group_used(&cache->item), 4579 (unsigned long long)cache->pinned, 4580 (unsigned long long)cache->reserved); 4581 btrfs_dump_free_space(cache, bytes); 4582 spin_unlock(&cache->lock); 4583 } 4584 up_read(&info->groups_sem); 4585 } 4586 4587 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4588 struct btrfs_root *root, 4589 u64 num_bytes, u64 min_alloc_size, 4590 u64 empty_size, u64 hint_byte, 4591 u64 search_end, struct btrfs_key *ins, 4592 u64 data) 4593 { 4594 int ret; 4595 u64 search_start = 0; 4596 4597 data = btrfs_get_alloc_profile(root, data); 4598 again: 4599 /* 4600 * the only place that sets empty_size is btrfs_realloc_node, which 4601 * is not called recursively on allocations 4602 */ 4603 if (empty_size || root->ref_cows) 4604 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4605 num_bytes + 2 * 1024 * 1024, data, 0); 4606 4607 WARN_ON(num_bytes < root->sectorsize); 4608 ret = find_free_extent(trans, root, num_bytes, empty_size, 4609 search_start, search_end, hint_byte, ins, 4610 trans->alloc_exclude_start, 4611 trans->alloc_exclude_nr, data); 4612 4613 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 4614 num_bytes = num_bytes >> 1; 4615 num_bytes = num_bytes & ~(root->sectorsize - 1); 4616 num_bytes = max(num_bytes, min_alloc_size); 4617 do_chunk_alloc(trans, root->fs_info->extent_root, 4618 num_bytes, data, 1); 4619 goto again; 4620 } 4621 if (ret == -ENOSPC) { 4622 struct btrfs_space_info *sinfo; 4623 4624 sinfo = __find_space_info(root->fs_info, data); 4625 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4626 "wanted %llu\n", (unsigned long long)data, 4627 (unsigned long long)num_bytes); 4628 dump_space_info(sinfo, num_bytes, 1); 4629 } 4630 4631 return ret; 4632 } 4633 4634 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 4635 { 4636 struct btrfs_block_group_cache *cache; 4637 int ret = 0; 4638 4639 cache = btrfs_lookup_block_group(root->fs_info, start); 4640 if (!cache) { 4641 printk(KERN_ERR "Unable to find block group for %llu\n", 4642 (unsigned long long)start); 4643 return -ENOSPC; 4644 } 4645 4646 ret = btrfs_discard_extent(root, start, len); 4647 4648 btrfs_add_free_space(cache, start, len); 4649 update_reserved_extents(cache, len, 0); 4650 btrfs_put_block_group(cache); 4651 4652 return ret; 4653 } 4654 4655 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4656 struct btrfs_root *root, 4657 u64 parent, u64 root_objectid, 4658 u64 flags, u64 owner, u64 offset, 4659 struct btrfs_key *ins, int ref_mod) 4660 { 4661 int ret; 4662 struct btrfs_fs_info *fs_info = root->fs_info; 4663 struct btrfs_extent_item *extent_item; 4664 struct btrfs_extent_inline_ref *iref; 4665 struct btrfs_path *path; 4666 struct extent_buffer *leaf; 4667 int type; 4668 u32 size; 4669 4670 if (parent > 0) 4671 type = BTRFS_SHARED_DATA_REF_KEY; 4672 else 4673 type = BTRFS_EXTENT_DATA_REF_KEY; 4674 4675 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 4676 4677 path = btrfs_alloc_path(); 4678 BUG_ON(!path); 4679 4680 path->leave_spinning = 1; 4681 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4682 ins, size); 4683 BUG_ON(ret); 4684 4685 leaf = path->nodes[0]; 4686 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4687 struct btrfs_extent_item); 4688 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 4689 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4690 btrfs_set_extent_flags(leaf, extent_item, 4691 flags | BTRFS_EXTENT_FLAG_DATA); 4692 4693 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 4694 btrfs_set_extent_inline_ref_type(leaf, iref, type); 4695 if (parent > 0) { 4696 struct btrfs_shared_data_ref *ref; 4697 ref = (struct btrfs_shared_data_ref *)(iref + 1); 4698 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4699 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 4700 } else { 4701 struct btrfs_extent_data_ref *ref; 4702 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 4703 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 4704 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 4705 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 4706 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 4707 } 4708 4709 btrfs_mark_buffer_dirty(path->nodes[0]); 4710 btrfs_free_path(path); 4711 4712 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4713 1, 0); 4714 if (ret) { 4715 printk(KERN_ERR "btrfs update block group failed for %llu " 4716 "%llu\n", (unsigned long long)ins->objectid, 4717 (unsigned long long)ins->offset); 4718 BUG(); 4719 } 4720 return ret; 4721 } 4722 4723 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 4724 struct btrfs_root *root, 4725 u64 parent, u64 root_objectid, 4726 u64 flags, struct btrfs_disk_key *key, 4727 int level, struct btrfs_key *ins) 4728 { 4729 int ret; 4730 struct btrfs_fs_info *fs_info = root->fs_info; 4731 struct btrfs_extent_item *extent_item; 4732 struct btrfs_tree_block_info *block_info; 4733 struct btrfs_extent_inline_ref *iref; 4734 struct btrfs_path *path; 4735 struct extent_buffer *leaf; 4736 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 4737 4738 path = btrfs_alloc_path(); 4739 BUG_ON(!path); 4740 4741 path->leave_spinning = 1; 4742 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4743 ins, size); 4744 BUG_ON(ret); 4745 4746 leaf = path->nodes[0]; 4747 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4748 struct btrfs_extent_item); 4749 btrfs_set_extent_refs(leaf, extent_item, 1); 4750 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4751 btrfs_set_extent_flags(leaf, extent_item, 4752 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 4753 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 4754 4755 btrfs_set_tree_block_key(leaf, block_info, key); 4756 btrfs_set_tree_block_level(leaf, block_info, level); 4757 4758 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 4759 if (parent > 0) { 4760 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4761 btrfs_set_extent_inline_ref_type(leaf, iref, 4762 BTRFS_SHARED_BLOCK_REF_KEY); 4763 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4764 } else { 4765 btrfs_set_extent_inline_ref_type(leaf, iref, 4766 BTRFS_TREE_BLOCK_REF_KEY); 4767 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 4768 } 4769 4770 btrfs_mark_buffer_dirty(leaf); 4771 btrfs_free_path(path); 4772 4773 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4774 1, 0); 4775 if (ret) { 4776 printk(KERN_ERR "btrfs update block group failed for %llu " 4777 "%llu\n", (unsigned long long)ins->objectid, 4778 (unsigned long long)ins->offset); 4779 BUG(); 4780 } 4781 return ret; 4782 } 4783 4784 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4785 struct btrfs_root *root, 4786 u64 root_objectid, u64 owner, 4787 u64 offset, struct btrfs_key *ins) 4788 { 4789 int ret; 4790 4791 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 4792 4793 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 4794 0, root_objectid, owner, offset, 4795 BTRFS_ADD_DELAYED_EXTENT, NULL); 4796 return ret; 4797 } 4798 4799 /* 4800 * this is used by the tree logging recovery code. It records that 4801 * an extent has been allocated and makes sure to clear the free 4802 * space cache bits as well 4803 */ 4804 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 4805 struct btrfs_root *root, 4806 u64 root_objectid, u64 owner, u64 offset, 4807 struct btrfs_key *ins) 4808 { 4809 int ret; 4810 struct btrfs_block_group_cache *block_group; 4811 struct btrfs_caching_control *caching_ctl; 4812 u64 start = ins->objectid; 4813 u64 num_bytes = ins->offset; 4814 4815 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4816 cache_block_group(block_group); 4817 caching_ctl = get_caching_control(block_group); 4818 4819 if (!caching_ctl) { 4820 BUG_ON(!block_group_cache_done(block_group)); 4821 ret = btrfs_remove_free_space(block_group, start, num_bytes); 4822 BUG_ON(ret); 4823 } else { 4824 mutex_lock(&caching_ctl->mutex); 4825 4826 if (start >= caching_ctl->progress) { 4827 ret = add_excluded_extent(root, start, num_bytes); 4828 BUG_ON(ret); 4829 } else if (start + num_bytes <= caching_ctl->progress) { 4830 ret = btrfs_remove_free_space(block_group, 4831 start, num_bytes); 4832 BUG_ON(ret); 4833 } else { 4834 num_bytes = caching_ctl->progress - start; 4835 ret = btrfs_remove_free_space(block_group, 4836 start, num_bytes); 4837 BUG_ON(ret); 4838 4839 start = caching_ctl->progress; 4840 num_bytes = ins->objectid + ins->offset - 4841 caching_ctl->progress; 4842 ret = add_excluded_extent(root, start, num_bytes); 4843 BUG_ON(ret); 4844 } 4845 4846 mutex_unlock(&caching_ctl->mutex); 4847 put_caching_control(caching_ctl); 4848 } 4849 4850 update_reserved_extents(block_group, ins->offset, 1); 4851 btrfs_put_block_group(block_group); 4852 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4853 0, owner, offset, ins, 1); 4854 return ret; 4855 } 4856 4857 /* 4858 * finds a free extent and does all the dirty work required for allocation 4859 * returns the key for the extent through ins, and a tree buffer for 4860 * the first block of the extent through buf. 4861 * 4862 * returns 0 if everything worked, non-zero otherwise. 4863 */ 4864 static int alloc_tree_block(struct btrfs_trans_handle *trans, 4865 struct btrfs_root *root, 4866 u64 num_bytes, u64 parent, u64 root_objectid, 4867 struct btrfs_disk_key *key, int level, 4868 u64 empty_size, u64 hint_byte, u64 search_end, 4869 struct btrfs_key *ins) 4870 { 4871 int ret; 4872 u64 flags = 0; 4873 4874 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4875 empty_size, hint_byte, search_end, 4876 ins, 0); 4877 if (ret) 4878 return ret; 4879 4880 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4881 if (parent == 0) 4882 parent = ins->objectid; 4883 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 4884 } else 4885 BUG_ON(parent > 0); 4886 4887 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4888 struct btrfs_delayed_extent_op *extent_op; 4889 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4890 BUG_ON(!extent_op); 4891 if (key) 4892 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 4893 else 4894 memset(&extent_op->key, 0, sizeof(extent_op->key)); 4895 extent_op->flags_to_set = flags; 4896 extent_op->update_key = 1; 4897 extent_op->update_flags = 1; 4898 extent_op->is_data = 0; 4899 4900 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, 4901 ins->offset, parent, root_objectid, 4902 level, BTRFS_ADD_DELAYED_EXTENT, 4903 extent_op); 4904 BUG_ON(ret); 4905 } 4906 4907 if (root_objectid == root->root_key.objectid) { 4908 u64 used; 4909 spin_lock(&root->node_lock); 4910 used = btrfs_root_used(&root->root_item) + num_bytes; 4911 btrfs_set_root_used(&root->root_item, used); 4912 spin_unlock(&root->node_lock); 4913 } 4914 return ret; 4915 } 4916 4917 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 4918 struct btrfs_root *root, 4919 u64 bytenr, u32 blocksize, 4920 int level) 4921 { 4922 struct extent_buffer *buf; 4923 4924 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 4925 if (!buf) 4926 return ERR_PTR(-ENOMEM); 4927 btrfs_set_header_generation(buf, trans->transid); 4928 btrfs_set_buffer_lockdep_class(buf, level); 4929 btrfs_tree_lock(buf); 4930 clean_tree_block(trans, root, buf); 4931 4932 btrfs_set_lock_blocking(buf); 4933 btrfs_set_buffer_uptodate(buf); 4934 4935 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4936 /* 4937 * we allow two log transactions at a time, use different 4938 * EXENT bit to differentiate dirty pages. 4939 */ 4940 if (root->log_transid % 2 == 0) 4941 set_extent_dirty(&root->dirty_log_pages, buf->start, 4942 buf->start + buf->len - 1, GFP_NOFS); 4943 else 4944 set_extent_new(&root->dirty_log_pages, buf->start, 4945 buf->start + buf->len - 1, GFP_NOFS); 4946 } else { 4947 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4948 buf->start + buf->len - 1, GFP_NOFS); 4949 } 4950 trans->blocks_used++; 4951 /* this returns a buffer locked for blocking */ 4952 return buf; 4953 } 4954 4955 /* 4956 * helper function to allocate a block for a given tree 4957 * returns the tree buffer or NULL. 4958 */ 4959 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4960 struct btrfs_root *root, u32 blocksize, 4961 u64 parent, u64 root_objectid, 4962 struct btrfs_disk_key *key, int level, 4963 u64 hint, u64 empty_size) 4964 { 4965 struct btrfs_key ins; 4966 int ret; 4967 struct extent_buffer *buf; 4968 4969 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 4970 key, level, empty_size, hint, (u64)-1, &ins); 4971 if (ret) { 4972 BUG_ON(ret > 0); 4973 return ERR_PTR(ret); 4974 } 4975 4976 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 4977 blocksize, level); 4978 return buf; 4979 } 4980 4981 struct walk_control { 4982 u64 refs[BTRFS_MAX_LEVEL]; 4983 u64 flags[BTRFS_MAX_LEVEL]; 4984 struct btrfs_key update_progress; 4985 int stage; 4986 int level; 4987 int shared_level; 4988 int update_ref; 4989 int keep_locks; 4990 int reada_slot; 4991 int reada_count; 4992 }; 4993 4994 #define DROP_REFERENCE 1 4995 #define UPDATE_BACKREF 2 4996 4997 static noinline void reada_walk_down(struct btrfs_trans_handle *trans, 4998 struct btrfs_root *root, 4999 struct walk_control *wc, 5000 struct btrfs_path *path) 5001 { 5002 u64 bytenr; 5003 u64 generation; 5004 u64 refs; 5005 u64 flags; 5006 u64 last = 0; 5007 u32 nritems; 5008 u32 blocksize; 5009 struct btrfs_key key; 5010 struct extent_buffer *eb; 5011 int ret; 5012 int slot; 5013 int nread = 0; 5014 5015 if (path->slots[wc->level] < wc->reada_slot) { 5016 wc->reada_count = wc->reada_count * 2 / 3; 5017 wc->reada_count = max(wc->reada_count, 2); 5018 } else { 5019 wc->reada_count = wc->reada_count * 3 / 2; 5020 wc->reada_count = min_t(int, wc->reada_count, 5021 BTRFS_NODEPTRS_PER_BLOCK(root)); 5022 } 5023 5024 eb = path->nodes[wc->level]; 5025 nritems = btrfs_header_nritems(eb); 5026 blocksize = btrfs_level_size(root, wc->level - 1); 5027 5028 for (slot = path->slots[wc->level]; slot < nritems; slot++) { 5029 if (nread >= wc->reada_count) 5030 break; 5031 5032 cond_resched(); 5033 bytenr = btrfs_node_blockptr(eb, slot); 5034 generation = btrfs_node_ptr_generation(eb, slot); 5035 5036 if (slot == path->slots[wc->level]) 5037 goto reada; 5038 5039 if (wc->stage == UPDATE_BACKREF && 5040 generation <= root->root_key.offset) 5041 continue; 5042 5043 /* We don't lock the tree block, it's OK to be racy here */ 5044 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5045 &refs, &flags); 5046 BUG_ON(ret); 5047 BUG_ON(refs == 0); 5048 5049 if (wc->stage == DROP_REFERENCE) { 5050 if (refs == 1) 5051 goto reada; 5052 5053 if (wc->level == 1 && 5054 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5055 continue; 5056 if (!wc->update_ref || 5057 generation <= root->root_key.offset) 5058 continue; 5059 btrfs_node_key_to_cpu(eb, &key, slot); 5060 ret = btrfs_comp_cpu_keys(&key, 5061 &wc->update_progress); 5062 if (ret < 0) 5063 continue; 5064 } else { 5065 if (wc->level == 1 && 5066 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5067 continue; 5068 } 5069 reada: 5070 ret = readahead_tree_block(root, bytenr, blocksize, 5071 generation); 5072 if (ret) 5073 break; 5074 last = bytenr + blocksize; 5075 nread++; 5076 } 5077 wc->reada_slot = slot; 5078 } 5079 5080 /* 5081 * hepler to process tree block while walking down the tree. 5082 * 5083 * when wc->stage == UPDATE_BACKREF, this function updates 5084 * back refs for pointers in the block. 5085 * 5086 * NOTE: return value 1 means we should stop walking down. 5087 */ 5088 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5089 struct btrfs_root *root, 5090 struct btrfs_path *path, 5091 struct walk_control *wc, int lookup_info) 5092 { 5093 int level = wc->level; 5094 struct extent_buffer *eb = path->nodes[level]; 5095 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5096 int ret; 5097 5098 if (wc->stage == UPDATE_BACKREF && 5099 btrfs_header_owner(eb) != root->root_key.objectid) 5100 return 1; 5101 5102 /* 5103 * when reference count of tree block is 1, it won't increase 5104 * again. once full backref flag is set, we never clear it. 5105 */ 5106 if (lookup_info && 5107 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5108 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 5109 BUG_ON(!path->locks[level]); 5110 ret = btrfs_lookup_extent_info(trans, root, 5111 eb->start, eb->len, 5112 &wc->refs[level], 5113 &wc->flags[level]); 5114 BUG_ON(ret); 5115 BUG_ON(wc->refs[level] == 0); 5116 } 5117 5118 if (wc->stage == DROP_REFERENCE) { 5119 if (wc->refs[level] > 1) 5120 return 1; 5121 5122 if (path->locks[level] && !wc->keep_locks) { 5123 btrfs_tree_unlock(eb); 5124 path->locks[level] = 0; 5125 } 5126 return 0; 5127 } 5128 5129 /* wc->stage == UPDATE_BACKREF */ 5130 if (!(wc->flags[level] & flag)) { 5131 BUG_ON(!path->locks[level]); 5132 ret = btrfs_inc_ref(trans, root, eb, 1); 5133 BUG_ON(ret); 5134 ret = btrfs_dec_ref(trans, root, eb, 0); 5135 BUG_ON(ret); 5136 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 5137 eb->len, flag, 0); 5138 BUG_ON(ret); 5139 wc->flags[level] |= flag; 5140 } 5141 5142 /* 5143 * the block is shared by multiple trees, so it's not good to 5144 * keep the tree lock 5145 */ 5146 if (path->locks[level] && level > 0) { 5147 btrfs_tree_unlock(eb); 5148 path->locks[level] = 0; 5149 } 5150 return 0; 5151 } 5152 5153 /* 5154 * hepler to process tree block pointer. 5155 * 5156 * when wc->stage == DROP_REFERENCE, this function checks 5157 * reference count of the block pointed to. if the block 5158 * is shared and we need update back refs for the subtree 5159 * rooted at the block, this function changes wc->stage to 5160 * UPDATE_BACKREF. if the block is shared and there is no 5161 * need to update back, this function drops the reference 5162 * to the block. 5163 * 5164 * NOTE: return value 1 means we should stop walking down. 5165 */ 5166 static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5167 struct btrfs_root *root, 5168 struct btrfs_path *path, 5169 struct walk_control *wc, int *lookup_info) 5170 { 5171 u64 bytenr; 5172 u64 generation; 5173 u64 parent; 5174 u32 blocksize; 5175 struct btrfs_key key; 5176 struct extent_buffer *next; 5177 int level = wc->level; 5178 int reada = 0; 5179 int ret = 0; 5180 5181 generation = btrfs_node_ptr_generation(path->nodes[level], 5182 path->slots[level]); 5183 /* 5184 * if the lower level block was created before the snapshot 5185 * was created, we know there is no need to update back refs 5186 * for the subtree 5187 */ 5188 if (wc->stage == UPDATE_BACKREF && 5189 generation <= root->root_key.offset) { 5190 *lookup_info = 1; 5191 return 1; 5192 } 5193 5194 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5195 blocksize = btrfs_level_size(root, level - 1); 5196 5197 next = btrfs_find_tree_block(root, bytenr, blocksize); 5198 if (!next) { 5199 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 5200 reada = 1; 5201 } 5202 btrfs_tree_lock(next); 5203 btrfs_set_lock_blocking(next); 5204 5205 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5206 &wc->refs[level - 1], 5207 &wc->flags[level - 1]); 5208 BUG_ON(ret); 5209 BUG_ON(wc->refs[level - 1] == 0); 5210 *lookup_info = 0; 5211 5212 if (wc->stage == DROP_REFERENCE) { 5213 if (wc->refs[level - 1] > 1) { 5214 if (level == 1 && 5215 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5216 goto skip; 5217 5218 if (!wc->update_ref || 5219 generation <= root->root_key.offset) 5220 goto skip; 5221 5222 btrfs_node_key_to_cpu(path->nodes[level], &key, 5223 path->slots[level]); 5224 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); 5225 if (ret < 0) 5226 goto skip; 5227 5228 wc->stage = UPDATE_BACKREF; 5229 wc->shared_level = level - 1; 5230 } 5231 } else { 5232 if (level == 1 && 5233 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5234 goto skip; 5235 } 5236 5237 if (!btrfs_buffer_uptodate(next, generation)) { 5238 btrfs_tree_unlock(next); 5239 free_extent_buffer(next); 5240 next = NULL; 5241 *lookup_info = 1; 5242 } 5243 5244 if (!next) { 5245 if (reada && level == 1) 5246 reada_walk_down(trans, root, wc, path); 5247 next = read_tree_block(root, bytenr, blocksize, generation); 5248 btrfs_tree_lock(next); 5249 btrfs_set_lock_blocking(next); 5250 } 5251 5252 level--; 5253 BUG_ON(level != btrfs_header_level(next)); 5254 path->nodes[level] = next; 5255 path->slots[level] = 0; 5256 path->locks[level] = 1; 5257 wc->level = level; 5258 if (wc->level == 1) 5259 wc->reada_slot = 0; 5260 return 0; 5261 skip: 5262 wc->refs[level - 1] = 0; 5263 wc->flags[level - 1] = 0; 5264 if (wc->stage == DROP_REFERENCE) { 5265 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5266 parent = path->nodes[level]->start; 5267 } else { 5268 BUG_ON(root->root_key.objectid != 5269 btrfs_header_owner(path->nodes[level])); 5270 parent = 0; 5271 } 5272 5273 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 5274 root->root_key.objectid, level - 1, 0); 5275 BUG_ON(ret); 5276 } 5277 btrfs_tree_unlock(next); 5278 free_extent_buffer(next); 5279 *lookup_info = 1; 5280 return 1; 5281 } 5282 5283 /* 5284 * hepler to process tree block while walking up the tree. 5285 * 5286 * when wc->stage == DROP_REFERENCE, this function drops 5287 * reference count on the block. 5288 * 5289 * when wc->stage == UPDATE_BACKREF, this function changes 5290 * wc->stage back to DROP_REFERENCE if we changed wc->stage 5291 * to UPDATE_BACKREF previously while processing the block. 5292 * 5293 * NOTE: return value 1 means we should stop walking up. 5294 */ 5295 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 5296 struct btrfs_root *root, 5297 struct btrfs_path *path, 5298 struct walk_control *wc) 5299 { 5300 int ret = 0; 5301 int level = wc->level; 5302 struct extent_buffer *eb = path->nodes[level]; 5303 u64 parent = 0; 5304 5305 if (wc->stage == UPDATE_BACKREF) { 5306 BUG_ON(wc->shared_level < level); 5307 if (level < wc->shared_level) 5308 goto out; 5309 5310 ret = find_next_key(path, level + 1, &wc->update_progress); 5311 if (ret > 0) 5312 wc->update_ref = 0; 5313 5314 wc->stage = DROP_REFERENCE; 5315 wc->shared_level = -1; 5316 path->slots[level] = 0; 5317 5318 /* 5319 * check reference count again if the block isn't locked. 5320 * we should start walking down the tree again if reference 5321 * count is one. 5322 */ 5323 if (!path->locks[level]) { 5324 BUG_ON(level == 0); 5325 btrfs_tree_lock(eb); 5326 btrfs_set_lock_blocking(eb); 5327 path->locks[level] = 1; 5328 5329 ret = btrfs_lookup_extent_info(trans, root, 5330 eb->start, eb->len, 5331 &wc->refs[level], 5332 &wc->flags[level]); 5333 BUG_ON(ret); 5334 BUG_ON(wc->refs[level] == 0); 5335 if (wc->refs[level] == 1) { 5336 btrfs_tree_unlock(eb); 5337 path->locks[level] = 0; 5338 return 1; 5339 } 5340 } 5341 } 5342 5343 /* wc->stage == DROP_REFERENCE */ 5344 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 5345 5346 if (wc->refs[level] == 1) { 5347 if (level == 0) { 5348 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5349 ret = btrfs_dec_ref(trans, root, eb, 1); 5350 else 5351 ret = btrfs_dec_ref(trans, root, eb, 0); 5352 BUG_ON(ret); 5353 } 5354 /* make block locked assertion in clean_tree_block happy */ 5355 if (!path->locks[level] && 5356 btrfs_header_generation(eb) == trans->transid) { 5357 btrfs_tree_lock(eb); 5358 btrfs_set_lock_blocking(eb); 5359 path->locks[level] = 1; 5360 } 5361 clean_tree_block(trans, root, eb); 5362 } 5363 5364 if (eb == root->node) { 5365 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5366 parent = eb->start; 5367 else 5368 BUG_ON(root->root_key.objectid != 5369 btrfs_header_owner(eb)); 5370 } else { 5371 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5372 parent = path->nodes[level + 1]->start; 5373 else 5374 BUG_ON(root->root_key.objectid != 5375 btrfs_header_owner(path->nodes[level + 1])); 5376 } 5377 5378 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, 5379 root->root_key.objectid, level, 0); 5380 BUG_ON(ret); 5381 out: 5382 wc->refs[level] = 0; 5383 wc->flags[level] = 0; 5384 return ret; 5385 } 5386 5387 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 5388 struct btrfs_root *root, 5389 struct btrfs_path *path, 5390 struct walk_control *wc) 5391 { 5392 int level = wc->level; 5393 int lookup_info = 1; 5394 int ret; 5395 5396 while (level >= 0) { 5397 if (path->slots[level] >= 5398 btrfs_header_nritems(path->nodes[level])) 5399 break; 5400 5401 ret = walk_down_proc(trans, root, path, wc, lookup_info); 5402 if (ret > 0) 5403 break; 5404 5405 if (level == 0) 5406 break; 5407 5408 ret = do_walk_down(trans, root, path, wc, &lookup_info); 5409 if (ret > 0) { 5410 path->slots[level]++; 5411 continue; 5412 } 5413 level = wc->level; 5414 } 5415 return 0; 5416 } 5417 5418 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 5419 struct btrfs_root *root, 5420 struct btrfs_path *path, 5421 struct walk_control *wc, int max_level) 5422 { 5423 int level = wc->level; 5424 int ret; 5425 5426 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 5427 while (level < max_level && path->nodes[level]) { 5428 wc->level = level; 5429 if (path->slots[level] + 1 < 5430 btrfs_header_nritems(path->nodes[level])) { 5431 path->slots[level]++; 5432 return 0; 5433 } else { 5434 ret = walk_up_proc(trans, root, path, wc); 5435 if (ret > 0) 5436 return 0; 5437 5438 if (path->locks[level]) { 5439 btrfs_tree_unlock(path->nodes[level]); 5440 path->locks[level] = 0; 5441 } 5442 free_extent_buffer(path->nodes[level]); 5443 path->nodes[level] = NULL; 5444 level++; 5445 } 5446 } 5447 return 1; 5448 } 5449 5450 /* 5451 * drop a subvolume tree. 5452 * 5453 * this function traverses the tree freeing any blocks that only 5454 * referenced by the tree. 5455 * 5456 * when a shared tree block is found. this function decreases its 5457 * reference count by one. if update_ref is true, this function 5458 * also make sure backrefs for the shared block and all lower level 5459 * blocks are properly updated. 5460 */ 5461 int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) 5462 { 5463 struct btrfs_path *path; 5464 struct btrfs_trans_handle *trans; 5465 struct btrfs_root *tree_root = root->fs_info->tree_root; 5466 struct btrfs_root_item *root_item = &root->root_item; 5467 struct walk_control *wc; 5468 struct btrfs_key key; 5469 int err = 0; 5470 int ret; 5471 int level; 5472 5473 path = btrfs_alloc_path(); 5474 BUG_ON(!path); 5475 5476 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5477 BUG_ON(!wc); 5478 5479 trans = btrfs_start_transaction(tree_root, 1); 5480 5481 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 5482 level = btrfs_header_level(root->node); 5483 path->nodes[level] = btrfs_lock_root_node(root); 5484 btrfs_set_lock_blocking(path->nodes[level]); 5485 path->slots[level] = 0; 5486 path->locks[level] = 1; 5487 memset(&wc->update_progress, 0, 5488 sizeof(wc->update_progress)); 5489 } else { 5490 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 5491 memcpy(&wc->update_progress, &key, 5492 sizeof(wc->update_progress)); 5493 5494 level = root_item->drop_level; 5495 BUG_ON(level == 0); 5496 path->lowest_level = level; 5497 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5498 path->lowest_level = 0; 5499 if (ret < 0) { 5500 err = ret; 5501 goto out; 5502 } 5503 WARN_ON(ret > 0); 5504 5505 /* 5506 * unlock our path, this is safe because only this 5507 * function is allowed to delete this snapshot 5508 */ 5509 btrfs_unlock_up_safe(path, 0); 5510 5511 level = btrfs_header_level(root->node); 5512 while (1) { 5513 btrfs_tree_lock(path->nodes[level]); 5514 btrfs_set_lock_blocking(path->nodes[level]); 5515 5516 ret = btrfs_lookup_extent_info(trans, root, 5517 path->nodes[level]->start, 5518 path->nodes[level]->len, 5519 &wc->refs[level], 5520 &wc->flags[level]); 5521 BUG_ON(ret); 5522 BUG_ON(wc->refs[level] == 0); 5523 5524 if (level == root_item->drop_level) 5525 break; 5526 5527 btrfs_tree_unlock(path->nodes[level]); 5528 WARN_ON(wc->refs[level] != 1); 5529 level--; 5530 } 5531 } 5532 5533 wc->level = level; 5534 wc->shared_level = -1; 5535 wc->stage = DROP_REFERENCE; 5536 wc->update_ref = update_ref; 5537 wc->keep_locks = 0; 5538 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 5539 5540 while (1) { 5541 ret = walk_down_tree(trans, root, path, wc); 5542 if (ret < 0) { 5543 err = ret; 5544 break; 5545 } 5546 5547 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 5548 if (ret < 0) { 5549 err = ret; 5550 break; 5551 } 5552 5553 if (ret > 0) { 5554 BUG_ON(wc->stage != DROP_REFERENCE); 5555 break; 5556 } 5557 5558 if (wc->stage == DROP_REFERENCE) { 5559 level = wc->level; 5560 btrfs_node_key(path->nodes[level], 5561 &root_item->drop_progress, 5562 path->slots[level]); 5563 root_item->drop_level = level; 5564 } 5565 5566 BUG_ON(wc->level == 0); 5567 if (trans->transaction->in_commit || 5568 trans->transaction->delayed_refs.flushing) { 5569 ret = btrfs_update_root(trans, tree_root, 5570 &root->root_key, 5571 root_item); 5572 BUG_ON(ret); 5573 5574 btrfs_end_transaction(trans, tree_root); 5575 trans = btrfs_start_transaction(tree_root, 1); 5576 } else { 5577 unsigned long update; 5578 update = trans->delayed_ref_updates; 5579 trans->delayed_ref_updates = 0; 5580 if (update) 5581 btrfs_run_delayed_refs(trans, tree_root, 5582 update); 5583 } 5584 } 5585 btrfs_release_path(root, path); 5586 BUG_ON(err); 5587 5588 ret = btrfs_del_root(trans, tree_root, &root->root_key); 5589 BUG_ON(ret); 5590 5591 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 5592 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 5593 NULL, NULL); 5594 BUG_ON(ret < 0); 5595 if (ret > 0) { 5596 ret = btrfs_del_orphan_item(trans, tree_root, 5597 root->root_key.objectid); 5598 BUG_ON(ret); 5599 } 5600 } 5601 5602 if (root->in_radix) { 5603 btrfs_free_fs_root(tree_root->fs_info, root); 5604 } else { 5605 free_extent_buffer(root->node); 5606 free_extent_buffer(root->commit_root); 5607 kfree(root); 5608 } 5609 out: 5610 btrfs_end_transaction(trans, tree_root); 5611 kfree(wc); 5612 btrfs_free_path(path); 5613 return err; 5614 } 5615 5616 /* 5617 * drop subtree rooted at tree block 'node'. 5618 * 5619 * NOTE: this function will unlock and release tree block 'node' 5620 */ 5621 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 5622 struct btrfs_root *root, 5623 struct extent_buffer *node, 5624 struct extent_buffer *parent) 5625 { 5626 struct btrfs_path *path; 5627 struct walk_control *wc; 5628 int level; 5629 int parent_level; 5630 int ret = 0; 5631 int wret; 5632 5633 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5634 5635 path = btrfs_alloc_path(); 5636 BUG_ON(!path); 5637 5638 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5639 BUG_ON(!wc); 5640 5641 btrfs_assert_tree_locked(parent); 5642 parent_level = btrfs_header_level(parent); 5643 extent_buffer_get(parent); 5644 path->nodes[parent_level] = parent; 5645 path->slots[parent_level] = btrfs_header_nritems(parent); 5646 5647 btrfs_assert_tree_locked(node); 5648 level = btrfs_header_level(node); 5649 path->nodes[level] = node; 5650 path->slots[level] = 0; 5651 path->locks[level] = 1; 5652 5653 wc->refs[parent_level] = 1; 5654 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5655 wc->level = level; 5656 wc->shared_level = -1; 5657 wc->stage = DROP_REFERENCE; 5658 wc->update_ref = 0; 5659 wc->keep_locks = 1; 5660 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 5661 5662 while (1) { 5663 wret = walk_down_tree(trans, root, path, wc); 5664 if (wret < 0) { 5665 ret = wret; 5666 break; 5667 } 5668 5669 wret = walk_up_tree(trans, root, path, wc, parent_level); 5670 if (wret < 0) 5671 ret = wret; 5672 if (wret != 0) 5673 break; 5674 } 5675 5676 kfree(wc); 5677 btrfs_free_path(path); 5678 return ret; 5679 } 5680 5681 #if 0 5682 static unsigned long calc_ra(unsigned long start, unsigned long last, 5683 unsigned long nr) 5684 { 5685 return min(last, start + nr - 1); 5686 } 5687 5688 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 5689 u64 len) 5690 { 5691 u64 page_start; 5692 u64 page_end; 5693 unsigned long first_index; 5694 unsigned long last_index; 5695 unsigned long i; 5696 struct page *page; 5697 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5698 struct file_ra_state *ra; 5699 struct btrfs_ordered_extent *ordered; 5700 unsigned int total_read = 0; 5701 unsigned int total_dirty = 0; 5702 int ret = 0; 5703 5704 ra = kzalloc(sizeof(*ra), GFP_NOFS); 5705 5706 mutex_lock(&inode->i_mutex); 5707 first_index = start >> PAGE_CACHE_SHIFT; 5708 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 5709 5710 /* make sure the dirty trick played by the caller work */ 5711 ret = invalidate_inode_pages2_range(inode->i_mapping, 5712 first_index, last_index); 5713 if (ret) 5714 goto out_unlock; 5715 5716 file_ra_state_init(ra, inode->i_mapping); 5717 5718 for (i = first_index ; i <= last_index; i++) { 5719 if (total_read % ra->ra_pages == 0) { 5720 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 5721 calc_ra(i, last_index, ra->ra_pages)); 5722 } 5723 total_read++; 5724 again: 5725 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 5726 BUG_ON(1); 5727 page = grab_cache_page(inode->i_mapping, i); 5728 if (!page) { 5729 ret = -ENOMEM; 5730 goto out_unlock; 5731 } 5732 if (!PageUptodate(page)) { 5733 btrfs_readpage(NULL, page); 5734 lock_page(page); 5735 if (!PageUptodate(page)) { 5736 unlock_page(page); 5737 page_cache_release(page); 5738 ret = -EIO; 5739 goto out_unlock; 5740 } 5741 } 5742 wait_on_page_writeback(page); 5743 5744 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 5745 page_end = page_start + PAGE_CACHE_SIZE - 1; 5746 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5747 5748 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5749 if (ordered) { 5750 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5751 unlock_page(page); 5752 page_cache_release(page); 5753 btrfs_start_ordered_extent(inode, ordered, 1); 5754 btrfs_put_ordered_extent(ordered); 5755 goto again; 5756 } 5757 set_page_extent_mapped(page); 5758 5759 if (i == first_index) 5760 set_extent_bits(io_tree, page_start, page_end, 5761 EXTENT_BOUNDARY, GFP_NOFS); 5762 btrfs_set_extent_delalloc(inode, page_start, page_end); 5763 5764 set_page_dirty(page); 5765 total_dirty++; 5766 5767 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5768 unlock_page(page); 5769 page_cache_release(page); 5770 } 5771 5772 out_unlock: 5773 kfree(ra); 5774 mutex_unlock(&inode->i_mutex); 5775 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 5776 return ret; 5777 } 5778 5779 static noinline int relocate_data_extent(struct inode *reloc_inode, 5780 struct btrfs_key *extent_key, 5781 u64 offset) 5782 { 5783 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5784 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 5785 struct extent_map *em; 5786 u64 start = extent_key->objectid - offset; 5787 u64 end = start + extent_key->offset - 1; 5788 5789 em = alloc_extent_map(GFP_NOFS); 5790 BUG_ON(!em || IS_ERR(em)); 5791 5792 em->start = start; 5793 em->len = extent_key->offset; 5794 em->block_len = extent_key->offset; 5795 em->block_start = extent_key->objectid; 5796 em->bdev = root->fs_info->fs_devices->latest_bdev; 5797 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5798 5799 /* setup extent map to cheat btrfs_readpage */ 5800 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5801 while (1) { 5802 int ret; 5803 write_lock(&em_tree->lock); 5804 ret = add_extent_mapping(em_tree, em); 5805 write_unlock(&em_tree->lock); 5806 if (ret != -EEXIST) { 5807 free_extent_map(em); 5808 break; 5809 } 5810 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 5811 } 5812 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5813 5814 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 5815 } 5816 5817 struct btrfs_ref_path { 5818 u64 extent_start; 5819 u64 nodes[BTRFS_MAX_LEVEL]; 5820 u64 root_objectid; 5821 u64 root_generation; 5822 u64 owner_objectid; 5823 u32 num_refs; 5824 int lowest_level; 5825 int current_level; 5826 int shared_level; 5827 5828 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 5829 u64 new_nodes[BTRFS_MAX_LEVEL]; 5830 }; 5831 5832 struct disk_extent { 5833 u64 ram_bytes; 5834 u64 disk_bytenr; 5835 u64 disk_num_bytes; 5836 u64 offset; 5837 u64 num_bytes; 5838 u8 compression; 5839 u8 encryption; 5840 u16 other_encoding; 5841 }; 5842 5843 static int is_cowonly_root(u64 root_objectid) 5844 { 5845 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 5846 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 5847 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 5848 root_objectid == BTRFS_DEV_TREE_OBJECTID || 5849 root_objectid == BTRFS_TREE_LOG_OBJECTID || 5850 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 5851 return 1; 5852 return 0; 5853 } 5854 5855 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 5856 struct btrfs_root *extent_root, 5857 struct btrfs_ref_path *ref_path, 5858 int first_time) 5859 { 5860 struct extent_buffer *leaf; 5861 struct btrfs_path *path; 5862 struct btrfs_extent_ref *ref; 5863 struct btrfs_key key; 5864 struct btrfs_key found_key; 5865 u64 bytenr; 5866 u32 nritems; 5867 int level; 5868 int ret = 1; 5869 5870 path = btrfs_alloc_path(); 5871 if (!path) 5872 return -ENOMEM; 5873 5874 if (first_time) { 5875 ref_path->lowest_level = -1; 5876 ref_path->current_level = -1; 5877 ref_path->shared_level = -1; 5878 goto walk_up; 5879 } 5880 walk_down: 5881 level = ref_path->current_level - 1; 5882 while (level >= -1) { 5883 u64 parent; 5884 if (level < ref_path->lowest_level) 5885 break; 5886 5887 if (level >= 0) 5888 bytenr = ref_path->nodes[level]; 5889 else 5890 bytenr = ref_path->extent_start; 5891 BUG_ON(bytenr == 0); 5892 5893 parent = ref_path->nodes[level + 1]; 5894 ref_path->nodes[level + 1] = 0; 5895 ref_path->current_level = level; 5896 BUG_ON(parent == 0); 5897 5898 key.objectid = bytenr; 5899 key.offset = parent + 1; 5900 key.type = BTRFS_EXTENT_REF_KEY; 5901 5902 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5903 if (ret < 0) 5904 goto out; 5905 BUG_ON(ret == 0); 5906 5907 leaf = path->nodes[0]; 5908 nritems = btrfs_header_nritems(leaf); 5909 if (path->slots[0] >= nritems) { 5910 ret = btrfs_next_leaf(extent_root, path); 5911 if (ret < 0) 5912 goto out; 5913 if (ret > 0) 5914 goto next; 5915 leaf = path->nodes[0]; 5916 } 5917 5918 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5919 if (found_key.objectid == bytenr && 5920 found_key.type == BTRFS_EXTENT_REF_KEY) { 5921 if (level < ref_path->shared_level) 5922 ref_path->shared_level = level; 5923 goto found; 5924 } 5925 next: 5926 level--; 5927 btrfs_release_path(extent_root, path); 5928 cond_resched(); 5929 } 5930 /* reached lowest level */ 5931 ret = 1; 5932 goto out; 5933 walk_up: 5934 level = ref_path->current_level; 5935 while (level < BTRFS_MAX_LEVEL - 1) { 5936 u64 ref_objectid; 5937 5938 if (level >= 0) 5939 bytenr = ref_path->nodes[level]; 5940 else 5941 bytenr = ref_path->extent_start; 5942 5943 BUG_ON(bytenr == 0); 5944 5945 key.objectid = bytenr; 5946 key.offset = 0; 5947 key.type = BTRFS_EXTENT_REF_KEY; 5948 5949 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5950 if (ret < 0) 5951 goto out; 5952 5953 leaf = path->nodes[0]; 5954 nritems = btrfs_header_nritems(leaf); 5955 if (path->slots[0] >= nritems) { 5956 ret = btrfs_next_leaf(extent_root, path); 5957 if (ret < 0) 5958 goto out; 5959 if (ret > 0) { 5960 /* the extent was freed by someone */ 5961 if (ref_path->lowest_level == level) 5962 goto out; 5963 btrfs_release_path(extent_root, path); 5964 goto walk_down; 5965 } 5966 leaf = path->nodes[0]; 5967 } 5968 5969 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5970 if (found_key.objectid != bytenr || 5971 found_key.type != BTRFS_EXTENT_REF_KEY) { 5972 /* the extent was freed by someone */ 5973 if (ref_path->lowest_level == level) { 5974 ret = 1; 5975 goto out; 5976 } 5977 btrfs_release_path(extent_root, path); 5978 goto walk_down; 5979 } 5980 found: 5981 ref = btrfs_item_ptr(leaf, path->slots[0], 5982 struct btrfs_extent_ref); 5983 ref_objectid = btrfs_ref_objectid(leaf, ref); 5984 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 5985 if (first_time) { 5986 level = (int)ref_objectid; 5987 BUG_ON(level >= BTRFS_MAX_LEVEL); 5988 ref_path->lowest_level = level; 5989 ref_path->current_level = level; 5990 ref_path->nodes[level] = bytenr; 5991 } else { 5992 WARN_ON(ref_objectid != level); 5993 } 5994 } else { 5995 WARN_ON(level != -1); 5996 } 5997 first_time = 0; 5998 5999 if (ref_path->lowest_level == level) { 6000 ref_path->owner_objectid = ref_objectid; 6001 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 6002 } 6003 6004 /* 6005 * the block is tree root or the block isn't in reference 6006 * counted tree. 6007 */ 6008 if (found_key.objectid == found_key.offset || 6009 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 6010 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 6011 ref_path->root_generation = 6012 btrfs_ref_generation(leaf, ref); 6013 if (level < 0) { 6014 /* special reference from the tree log */ 6015 ref_path->nodes[0] = found_key.offset; 6016 ref_path->current_level = 0; 6017 } 6018 ret = 0; 6019 goto out; 6020 } 6021 6022 level++; 6023 BUG_ON(ref_path->nodes[level] != 0); 6024 ref_path->nodes[level] = found_key.offset; 6025 ref_path->current_level = level; 6026 6027 /* 6028 * the reference was created in the running transaction, 6029 * no need to continue walking up. 6030 */ 6031 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 6032 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 6033 ref_path->root_generation = 6034 btrfs_ref_generation(leaf, ref); 6035 ret = 0; 6036 goto out; 6037 } 6038 6039 btrfs_release_path(extent_root, path); 6040 cond_resched(); 6041 } 6042 /* reached max tree level, but no tree root found. */ 6043 BUG(); 6044 out: 6045 btrfs_free_path(path); 6046 return ret; 6047 } 6048 6049 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 6050 struct btrfs_root *extent_root, 6051 struct btrfs_ref_path *ref_path, 6052 u64 extent_start) 6053 { 6054 memset(ref_path, 0, sizeof(*ref_path)); 6055 ref_path->extent_start = extent_start; 6056 6057 return __next_ref_path(trans, extent_root, ref_path, 1); 6058 } 6059 6060 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 6061 struct btrfs_root *extent_root, 6062 struct btrfs_ref_path *ref_path) 6063 { 6064 return __next_ref_path(trans, extent_root, ref_path, 0); 6065 } 6066 6067 static noinline int get_new_locations(struct inode *reloc_inode, 6068 struct btrfs_key *extent_key, 6069 u64 offset, int no_fragment, 6070 struct disk_extent **extents, 6071 int *nr_extents) 6072 { 6073 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 6074 struct btrfs_path *path; 6075 struct btrfs_file_extent_item *fi; 6076 struct extent_buffer *leaf; 6077 struct disk_extent *exts = *extents; 6078 struct btrfs_key found_key; 6079 u64 cur_pos; 6080 u64 last_byte; 6081 u32 nritems; 6082 int nr = 0; 6083 int max = *nr_extents; 6084 int ret; 6085 6086 WARN_ON(!no_fragment && *extents); 6087 if (!exts) { 6088 max = 1; 6089 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 6090 if (!exts) 6091 return -ENOMEM; 6092 } 6093 6094 path = btrfs_alloc_path(); 6095 BUG_ON(!path); 6096 6097 cur_pos = extent_key->objectid - offset; 6098 last_byte = extent_key->objectid + extent_key->offset; 6099 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 6100 cur_pos, 0); 6101 if (ret < 0) 6102 goto out; 6103 if (ret > 0) { 6104 ret = -ENOENT; 6105 goto out; 6106 } 6107 6108 while (1) { 6109 leaf = path->nodes[0]; 6110 nritems = btrfs_header_nritems(leaf); 6111 if (path->slots[0] >= nritems) { 6112 ret = btrfs_next_leaf(root, path); 6113 if (ret < 0) 6114 goto out; 6115 if (ret > 0) 6116 break; 6117 leaf = path->nodes[0]; 6118 } 6119 6120 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6121 if (found_key.offset != cur_pos || 6122 found_key.type != BTRFS_EXTENT_DATA_KEY || 6123 found_key.objectid != reloc_inode->i_ino) 6124 break; 6125 6126 fi = btrfs_item_ptr(leaf, path->slots[0], 6127 struct btrfs_file_extent_item); 6128 if (btrfs_file_extent_type(leaf, fi) != 6129 BTRFS_FILE_EXTENT_REG || 6130 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 6131 break; 6132 6133 if (nr == max) { 6134 struct disk_extent *old = exts; 6135 max *= 2; 6136 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 6137 memcpy(exts, old, sizeof(*exts) * nr); 6138 if (old != *extents) 6139 kfree(old); 6140 } 6141 6142 exts[nr].disk_bytenr = 6143 btrfs_file_extent_disk_bytenr(leaf, fi); 6144 exts[nr].disk_num_bytes = 6145 btrfs_file_extent_disk_num_bytes(leaf, fi); 6146 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 6147 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6148 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 6149 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 6150 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 6151 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 6152 fi); 6153 BUG_ON(exts[nr].offset > 0); 6154 BUG_ON(exts[nr].compression || exts[nr].encryption); 6155 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 6156 6157 cur_pos += exts[nr].num_bytes; 6158 nr++; 6159 6160 if (cur_pos + offset >= last_byte) 6161 break; 6162 6163 if (no_fragment) { 6164 ret = 1; 6165 goto out; 6166 } 6167 path->slots[0]++; 6168 } 6169 6170 BUG_ON(cur_pos + offset > last_byte); 6171 if (cur_pos + offset < last_byte) { 6172 ret = -ENOENT; 6173 goto out; 6174 } 6175 ret = 0; 6176 out: 6177 btrfs_free_path(path); 6178 if (ret) { 6179 if (exts != *extents) 6180 kfree(exts); 6181 } else { 6182 *extents = exts; 6183 *nr_extents = nr; 6184 } 6185 return ret; 6186 } 6187 6188 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 6189 struct btrfs_root *root, 6190 struct btrfs_path *path, 6191 struct btrfs_key *extent_key, 6192 struct btrfs_key *leaf_key, 6193 struct btrfs_ref_path *ref_path, 6194 struct disk_extent *new_extents, 6195 int nr_extents) 6196 { 6197 struct extent_buffer *leaf; 6198 struct btrfs_file_extent_item *fi; 6199 struct inode *inode = NULL; 6200 struct btrfs_key key; 6201 u64 lock_start = 0; 6202 u64 lock_end = 0; 6203 u64 num_bytes; 6204 u64 ext_offset; 6205 u64 search_end = (u64)-1; 6206 u32 nritems; 6207 int nr_scaned = 0; 6208 int extent_locked = 0; 6209 int extent_type; 6210 int ret; 6211 6212 memcpy(&key, leaf_key, sizeof(key)); 6213 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 6214 if (key.objectid < ref_path->owner_objectid || 6215 (key.objectid == ref_path->owner_objectid && 6216 key.type < BTRFS_EXTENT_DATA_KEY)) { 6217 key.objectid = ref_path->owner_objectid; 6218 key.type = BTRFS_EXTENT_DATA_KEY; 6219 key.offset = 0; 6220 } 6221 } 6222 6223 while (1) { 6224 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 6225 if (ret < 0) 6226 goto out; 6227 6228 leaf = path->nodes[0]; 6229 nritems = btrfs_header_nritems(leaf); 6230 next: 6231 if (extent_locked && ret > 0) { 6232 /* 6233 * the file extent item was modified by someone 6234 * before the extent got locked. 6235 */ 6236 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6237 lock_end, GFP_NOFS); 6238 extent_locked = 0; 6239 } 6240 6241 if (path->slots[0] >= nritems) { 6242 if (++nr_scaned > 2) 6243 break; 6244 6245 BUG_ON(extent_locked); 6246 ret = btrfs_next_leaf(root, path); 6247 if (ret < 0) 6248 goto out; 6249 if (ret > 0) 6250 break; 6251 leaf = path->nodes[0]; 6252 nritems = btrfs_header_nritems(leaf); 6253 } 6254 6255 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 6256 6257 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 6258 if ((key.objectid > ref_path->owner_objectid) || 6259 (key.objectid == ref_path->owner_objectid && 6260 key.type > BTRFS_EXTENT_DATA_KEY) || 6261 key.offset >= search_end) 6262 break; 6263 } 6264 6265 if (inode && key.objectid != inode->i_ino) { 6266 BUG_ON(extent_locked); 6267 btrfs_release_path(root, path); 6268 mutex_unlock(&inode->i_mutex); 6269 iput(inode); 6270 inode = NULL; 6271 continue; 6272 } 6273 6274 if (key.type != BTRFS_EXTENT_DATA_KEY) { 6275 path->slots[0]++; 6276 ret = 1; 6277 goto next; 6278 } 6279 fi = btrfs_item_ptr(leaf, path->slots[0], 6280 struct btrfs_file_extent_item); 6281 extent_type = btrfs_file_extent_type(leaf, fi); 6282 if ((extent_type != BTRFS_FILE_EXTENT_REG && 6283 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 6284 (btrfs_file_extent_disk_bytenr(leaf, fi) != 6285 extent_key->objectid)) { 6286 path->slots[0]++; 6287 ret = 1; 6288 goto next; 6289 } 6290 6291 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6292 ext_offset = btrfs_file_extent_offset(leaf, fi); 6293 6294 if (search_end == (u64)-1) { 6295 search_end = key.offset - ext_offset + 6296 btrfs_file_extent_ram_bytes(leaf, fi); 6297 } 6298 6299 if (!extent_locked) { 6300 lock_start = key.offset; 6301 lock_end = lock_start + num_bytes - 1; 6302 } else { 6303 if (lock_start > key.offset || 6304 lock_end + 1 < key.offset + num_bytes) { 6305 unlock_extent(&BTRFS_I(inode)->io_tree, 6306 lock_start, lock_end, GFP_NOFS); 6307 extent_locked = 0; 6308 } 6309 } 6310 6311 if (!inode) { 6312 btrfs_release_path(root, path); 6313 6314 inode = btrfs_iget_locked(root->fs_info->sb, 6315 key.objectid, root); 6316 if (inode->i_state & I_NEW) { 6317 BTRFS_I(inode)->root = root; 6318 BTRFS_I(inode)->location.objectid = 6319 key.objectid; 6320 BTRFS_I(inode)->location.type = 6321 BTRFS_INODE_ITEM_KEY; 6322 BTRFS_I(inode)->location.offset = 0; 6323 btrfs_read_locked_inode(inode); 6324 unlock_new_inode(inode); 6325 } 6326 /* 6327 * some code call btrfs_commit_transaction while 6328 * holding the i_mutex, so we can't use mutex_lock 6329 * here. 6330 */ 6331 if (is_bad_inode(inode) || 6332 !mutex_trylock(&inode->i_mutex)) { 6333 iput(inode); 6334 inode = NULL; 6335 key.offset = (u64)-1; 6336 goto skip; 6337 } 6338 } 6339 6340 if (!extent_locked) { 6341 struct btrfs_ordered_extent *ordered; 6342 6343 btrfs_release_path(root, path); 6344 6345 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6346 lock_end, GFP_NOFS); 6347 ordered = btrfs_lookup_first_ordered_extent(inode, 6348 lock_end); 6349 if (ordered && 6350 ordered->file_offset <= lock_end && 6351 ordered->file_offset + ordered->len > lock_start) { 6352 unlock_extent(&BTRFS_I(inode)->io_tree, 6353 lock_start, lock_end, GFP_NOFS); 6354 btrfs_start_ordered_extent(inode, ordered, 1); 6355 btrfs_put_ordered_extent(ordered); 6356 key.offset += num_bytes; 6357 goto skip; 6358 } 6359 if (ordered) 6360 btrfs_put_ordered_extent(ordered); 6361 6362 extent_locked = 1; 6363 continue; 6364 } 6365 6366 if (nr_extents == 1) { 6367 /* update extent pointer in place */ 6368 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6369 new_extents[0].disk_bytenr); 6370 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6371 new_extents[0].disk_num_bytes); 6372 btrfs_mark_buffer_dirty(leaf); 6373 6374 btrfs_drop_extent_cache(inode, key.offset, 6375 key.offset + num_bytes - 1, 0); 6376 6377 ret = btrfs_inc_extent_ref(trans, root, 6378 new_extents[0].disk_bytenr, 6379 new_extents[0].disk_num_bytes, 6380 leaf->start, 6381 root->root_key.objectid, 6382 trans->transid, 6383 key.objectid); 6384 BUG_ON(ret); 6385 6386 ret = btrfs_free_extent(trans, root, 6387 extent_key->objectid, 6388 extent_key->offset, 6389 leaf->start, 6390 btrfs_header_owner(leaf), 6391 btrfs_header_generation(leaf), 6392 key.objectid, 0); 6393 BUG_ON(ret); 6394 6395 btrfs_release_path(root, path); 6396 key.offset += num_bytes; 6397 } else { 6398 BUG_ON(1); 6399 #if 0 6400 u64 alloc_hint; 6401 u64 extent_len; 6402 int i; 6403 /* 6404 * drop old extent pointer at first, then insert the 6405 * new pointers one bye one 6406 */ 6407 btrfs_release_path(root, path); 6408 ret = btrfs_drop_extents(trans, root, inode, key.offset, 6409 key.offset + num_bytes, 6410 key.offset, &alloc_hint); 6411 BUG_ON(ret); 6412 6413 for (i = 0; i < nr_extents; i++) { 6414 if (ext_offset >= new_extents[i].num_bytes) { 6415 ext_offset -= new_extents[i].num_bytes; 6416 continue; 6417 } 6418 extent_len = min(new_extents[i].num_bytes - 6419 ext_offset, num_bytes); 6420 6421 ret = btrfs_insert_empty_item(trans, root, 6422 path, &key, 6423 sizeof(*fi)); 6424 BUG_ON(ret); 6425 6426 leaf = path->nodes[0]; 6427 fi = btrfs_item_ptr(leaf, path->slots[0], 6428 struct btrfs_file_extent_item); 6429 btrfs_set_file_extent_generation(leaf, fi, 6430 trans->transid); 6431 btrfs_set_file_extent_type(leaf, fi, 6432 BTRFS_FILE_EXTENT_REG); 6433 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6434 new_extents[i].disk_bytenr); 6435 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6436 new_extents[i].disk_num_bytes); 6437 btrfs_set_file_extent_ram_bytes(leaf, fi, 6438 new_extents[i].ram_bytes); 6439 6440 btrfs_set_file_extent_compression(leaf, fi, 6441 new_extents[i].compression); 6442 btrfs_set_file_extent_encryption(leaf, fi, 6443 new_extents[i].encryption); 6444 btrfs_set_file_extent_other_encoding(leaf, fi, 6445 new_extents[i].other_encoding); 6446 6447 btrfs_set_file_extent_num_bytes(leaf, fi, 6448 extent_len); 6449 ext_offset += new_extents[i].offset; 6450 btrfs_set_file_extent_offset(leaf, fi, 6451 ext_offset); 6452 btrfs_mark_buffer_dirty(leaf); 6453 6454 btrfs_drop_extent_cache(inode, key.offset, 6455 key.offset + extent_len - 1, 0); 6456 6457 ret = btrfs_inc_extent_ref(trans, root, 6458 new_extents[i].disk_bytenr, 6459 new_extents[i].disk_num_bytes, 6460 leaf->start, 6461 root->root_key.objectid, 6462 trans->transid, key.objectid); 6463 BUG_ON(ret); 6464 btrfs_release_path(root, path); 6465 6466 inode_add_bytes(inode, extent_len); 6467 6468 ext_offset = 0; 6469 num_bytes -= extent_len; 6470 key.offset += extent_len; 6471 6472 if (num_bytes == 0) 6473 break; 6474 } 6475 BUG_ON(i >= nr_extents); 6476 #endif 6477 } 6478 6479 if (extent_locked) { 6480 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6481 lock_end, GFP_NOFS); 6482 extent_locked = 0; 6483 } 6484 skip: 6485 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 6486 key.offset >= search_end) 6487 break; 6488 6489 cond_resched(); 6490 } 6491 ret = 0; 6492 out: 6493 btrfs_release_path(root, path); 6494 if (inode) { 6495 mutex_unlock(&inode->i_mutex); 6496 if (extent_locked) { 6497 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6498 lock_end, GFP_NOFS); 6499 } 6500 iput(inode); 6501 } 6502 return ret; 6503 } 6504 6505 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 6506 struct btrfs_root *root, 6507 struct extent_buffer *buf, u64 orig_start) 6508 { 6509 int level; 6510 int ret; 6511 6512 BUG_ON(btrfs_header_generation(buf) != trans->transid); 6513 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6514 6515 level = btrfs_header_level(buf); 6516 if (level == 0) { 6517 struct btrfs_leaf_ref *ref; 6518 struct btrfs_leaf_ref *orig_ref; 6519 6520 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 6521 if (!orig_ref) 6522 return -ENOENT; 6523 6524 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 6525 if (!ref) { 6526 btrfs_free_leaf_ref(root, orig_ref); 6527 return -ENOMEM; 6528 } 6529 6530 ref->nritems = orig_ref->nritems; 6531 memcpy(ref->extents, orig_ref->extents, 6532 sizeof(ref->extents[0]) * ref->nritems); 6533 6534 btrfs_free_leaf_ref(root, orig_ref); 6535 6536 ref->root_gen = trans->transid; 6537 ref->bytenr = buf->start; 6538 ref->owner = btrfs_header_owner(buf); 6539 ref->generation = btrfs_header_generation(buf); 6540 6541 ret = btrfs_add_leaf_ref(root, ref, 0); 6542 WARN_ON(ret); 6543 btrfs_free_leaf_ref(root, ref); 6544 } 6545 return 0; 6546 } 6547 6548 static noinline int invalidate_extent_cache(struct btrfs_root *root, 6549 struct extent_buffer *leaf, 6550 struct btrfs_block_group_cache *group, 6551 struct btrfs_root *target_root) 6552 { 6553 struct btrfs_key key; 6554 struct inode *inode = NULL; 6555 struct btrfs_file_extent_item *fi; 6556 u64 num_bytes; 6557 u64 skip_objectid = 0; 6558 u32 nritems; 6559 u32 i; 6560 6561 nritems = btrfs_header_nritems(leaf); 6562 for (i = 0; i < nritems; i++) { 6563 btrfs_item_key_to_cpu(leaf, &key, i); 6564 if (key.objectid == skip_objectid || 6565 key.type != BTRFS_EXTENT_DATA_KEY) 6566 continue; 6567 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6568 if (btrfs_file_extent_type(leaf, fi) == 6569 BTRFS_FILE_EXTENT_INLINE) 6570 continue; 6571 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 6572 continue; 6573 if (!inode || inode->i_ino != key.objectid) { 6574 iput(inode); 6575 inode = btrfs_ilookup(target_root->fs_info->sb, 6576 key.objectid, target_root, 1); 6577 } 6578 if (!inode) { 6579 skip_objectid = key.objectid; 6580 continue; 6581 } 6582 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6583 6584 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6585 key.offset + num_bytes - 1, GFP_NOFS); 6586 btrfs_drop_extent_cache(inode, key.offset, 6587 key.offset + num_bytes - 1, 1); 6588 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6589 key.offset + num_bytes - 1, GFP_NOFS); 6590 cond_resched(); 6591 } 6592 iput(inode); 6593 return 0; 6594 } 6595 6596 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 6597 struct btrfs_root *root, 6598 struct extent_buffer *leaf, 6599 struct btrfs_block_group_cache *group, 6600 struct inode *reloc_inode) 6601 { 6602 struct btrfs_key key; 6603 struct btrfs_key extent_key; 6604 struct btrfs_file_extent_item *fi; 6605 struct btrfs_leaf_ref *ref; 6606 struct disk_extent *new_extent; 6607 u64 bytenr; 6608 u64 num_bytes; 6609 u32 nritems; 6610 u32 i; 6611 int ext_index; 6612 int nr_extent; 6613 int ret; 6614 6615 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 6616 BUG_ON(!new_extent); 6617 6618 ref = btrfs_lookup_leaf_ref(root, leaf->start); 6619 BUG_ON(!ref); 6620 6621 ext_index = -1; 6622 nritems = btrfs_header_nritems(leaf); 6623 for (i = 0; i < nritems; i++) { 6624 btrfs_item_key_to_cpu(leaf, &key, i); 6625 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 6626 continue; 6627 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6628 if (btrfs_file_extent_type(leaf, fi) == 6629 BTRFS_FILE_EXTENT_INLINE) 6630 continue; 6631 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6632 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 6633 if (bytenr == 0) 6634 continue; 6635 6636 ext_index++; 6637 if (bytenr >= group->key.objectid + group->key.offset || 6638 bytenr + num_bytes <= group->key.objectid) 6639 continue; 6640 6641 extent_key.objectid = bytenr; 6642 extent_key.offset = num_bytes; 6643 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 6644 nr_extent = 1; 6645 ret = get_new_locations(reloc_inode, &extent_key, 6646 group->key.objectid, 1, 6647 &new_extent, &nr_extent); 6648 if (ret > 0) 6649 continue; 6650 BUG_ON(ret < 0); 6651 6652 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 6653 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 6654 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 6655 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 6656 6657 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6658 new_extent->disk_bytenr); 6659 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6660 new_extent->disk_num_bytes); 6661 btrfs_mark_buffer_dirty(leaf); 6662 6663 ret = btrfs_inc_extent_ref(trans, root, 6664 new_extent->disk_bytenr, 6665 new_extent->disk_num_bytes, 6666 leaf->start, 6667 root->root_key.objectid, 6668 trans->transid, key.objectid); 6669 BUG_ON(ret); 6670 6671 ret = btrfs_free_extent(trans, root, 6672 bytenr, num_bytes, leaf->start, 6673 btrfs_header_owner(leaf), 6674 btrfs_header_generation(leaf), 6675 key.objectid, 0); 6676 BUG_ON(ret); 6677 cond_resched(); 6678 } 6679 kfree(new_extent); 6680 BUG_ON(ext_index + 1 != ref->nritems); 6681 btrfs_free_leaf_ref(root, ref); 6682 return 0; 6683 } 6684 6685 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 6686 struct btrfs_root *root) 6687 { 6688 struct btrfs_root *reloc_root; 6689 int ret; 6690 6691 if (root->reloc_root) { 6692 reloc_root = root->reloc_root; 6693 root->reloc_root = NULL; 6694 list_add(&reloc_root->dead_list, 6695 &root->fs_info->dead_reloc_roots); 6696 6697 btrfs_set_root_bytenr(&reloc_root->root_item, 6698 reloc_root->node->start); 6699 btrfs_set_root_level(&root->root_item, 6700 btrfs_header_level(reloc_root->node)); 6701 memset(&reloc_root->root_item.drop_progress, 0, 6702 sizeof(struct btrfs_disk_key)); 6703 reloc_root->root_item.drop_level = 0; 6704 6705 ret = btrfs_update_root(trans, root->fs_info->tree_root, 6706 &reloc_root->root_key, 6707 &reloc_root->root_item); 6708 BUG_ON(ret); 6709 } 6710 return 0; 6711 } 6712 6713 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 6714 { 6715 struct btrfs_trans_handle *trans; 6716 struct btrfs_root *reloc_root; 6717 struct btrfs_root *prev_root = NULL; 6718 struct list_head dead_roots; 6719 int ret; 6720 unsigned long nr; 6721 6722 INIT_LIST_HEAD(&dead_roots); 6723 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 6724 6725 while (!list_empty(&dead_roots)) { 6726 reloc_root = list_entry(dead_roots.prev, 6727 struct btrfs_root, dead_list); 6728 list_del_init(&reloc_root->dead_list); 6729 6730 BUG_ON(reloc_root->commit_root != NULL); 6731 while (1) { 6732 trans = btrfs_join_transaction(root, 1); 6733 BUG_ON(!trans); 6734 6735 mutex_lock(&root->fs_info->drop_mutex); 6736 ret = btrfs_drop_snapshot(trans, reloc_root); 6737 if (ret != -EAGAIN) 6738 break; 6739 mutex_unlock(&root->fs_info->drop_mutex); 6740 6741 nr = trans->blocks_used; 6742 ret = btrfs_end_transaction(trans, root); 6743 BUG_ON(ret); 6744 btrfs_btree_balance_dirty(root, nr); 6745 } 6746 6747 free_extent_buffer(reloc_root->node); 6748 6749 ret = btrfs_del_root(trans, root->fs_info->tree_root, 6750 &reloc_root->root_key); 6751 BUG_ON(ret); 6752 mutex_unlock(&root->fs_info->drop_mutex); 6753 6754 nr = trans->blocks_used; 6755 ret = btrfs_end_transaction(trans, root); 6756 BUG_ON(ret); 6757 btrfs_btree_balance_dirty(root, nr); 6758 6759 kfree(prev_root); 6760 prev_root = reloc_root; 6761 } 6762 if (prev_root) { 6763 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 6764 kfree(prev_root); 6765 } 6766 return 0; 6767 } 6768 6769 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 6770 { 6771 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 6772 return 0; 6773 } 6774 6775 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 6776 { 6777 struct btrfs_root *reloc_root; 6778 struct btrfs_trans_handle *trans; 6779 struct btrfs_key location; 6780 int found; 6781 int ret; 6782 6783 mutex_lock(&root->fs_info->tree_reloc_mutex); 6784 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 6785 BUG_ON(ret); 6786 found = !list_empty(&root->fs_info->dead_reloc_roots); 6787 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6788 6789 if (found) { 6790 trans = btrfs_start_transaction(root, 1); 6791 BUG_ON(!trans); 6792 ret = btrfs_commit_transaction(trans, root); 6793 BUG_ON(ret); 6794 } 6795 6796 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6797 location.offset = (u64)-1; 6798 location.type = BTRFS_ROOT_ITEM_KEY; 6799 6800 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 6801 BUG_ON(!reloc_root); 6802 btrfs_orphan_cleanup(reloc_root); 6803 return 0; 6804 } 6805 6806 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 6807 struct btrfs_root *root) 6808 { 6809 struct btrfs_root *reloc_root; 6810 struct extent_buffer *eb; 6811 struct btrfs_root_item *root_item; 6812 struct btrfs_key root_key; 6813 int ret; 6814 6815 BUG_ON(!root->ref_cows); 6816 if (root->reloc_root) 6817 return 0; 6818 6819 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 6820 BUG_ON(!root_item); 6821 6822 ret = btrfs_copy_root(trans, root, root->commit_root, 6823 &eb, BTRFS_TREE_RELOC_OBJECTID); 6824 BUG_ON(ret); 6825 6826 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 6827 root_key.offset = root->root_key.objectid; 6828 root_key.type = BTRFS_ROOT_ITEM_KEY; 6829 6830 memcpy(root_item, &root->root_item, sizeof(root_item)); 6831 btrfs_set_root_refs(root_item, 0); 6832 btrfs_set_root_bytenr(root_item, eb->start); 6833 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 6834 btrfs_set_root_generation(root_item, trans->transid); 6835 6836 btrfs_tree_unlock(eb); 6837 free_extent_buffer(eb); 6838 6839 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 6840 &root_key, root_item); 6841 BUG_ON(ret); 6842 kfree(root_item); 6843 6844 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 6845 &root_key); 6846 BUG_ON(!reloc_root); 6847 reloc_root->last_trans = trans->transid; 6848 reloc_root->commit_root = NULL; 6849 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 6850 6851 root->reloc_root = reloc_root; 6852 return 0; 6853 } 6854 6855 /* 6856 * Core function of space balance. 6857 * 6858 * The idea is using reloc trees to relocate tree blocks in reference 6859 * counted roots. There is one reloc tree for each subvol, and all 6860 * reloc trees share same root key objectid. Reloc trees are snapshots 6861 * of the latest committed roots of subvols (root->commit_root). 6862 * 6863 * To relocate a tree block referenced by a subvol, there are two steps. 6864 * COW the block through subvol's reloc tree, then update block pointer 6865 * in the subvol to point to the new block. Since all reloc trees share 6866 * same root key objectid, doing special handing for tree blocks owned 6867 * by them is easy. Once a tree block has been COWed in one reloc tree, 6868 * we can use the resulting new block directly when the same block is 6869 * required to COW again through other reloc trees. By this way, relocated 6870 * tree blocks are shared between reloc trees, so they are also shared 6871 * between subvols. 6872 */ 6873 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 6874 struct btrfs_root *root, 6875 struct btrfs_path *path, 6876 struct btrfs_key *first_key, 6877 struct btrfs_ref_path *ref_path, 6878 struct btrfs_block_group_cache *group, 6879 struct inode *reloc_inode) 6880 { 6881 struct btrfs_root *reloc_root; 6882 struct extent_buffer *eb = NULL; 6883 struct btrfs_key *keys; 6884 u64 *nodes; 6885 int level; 6886 int shared_level; 6887 int lowest_level = 0; 6888 int ret; 6889 6890 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 6891 lowest_level = ref_path->owner_objectid; 6892 6893 if (!root->ref_cows) { 6894 path->lowest_level = lowest_level; 6895 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 6896 BUG_ON(ret < 0); 6897 path->lowest_level = 0; 6898 btrfs_release_path(root, path); 6899 return 0; 6900 } 6901 6902 mutex_lock(&root->fs_info->tree_reloc_mutex); 6903 ret = init_reloc_tree(trans, root); 6904 BUG_ON(ret); 6905 reloc_root = root->reloc_root; 6906 6907 shared_level = ref_path->shared_level; 6908 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 6909 6910 keys = ref_path->node_keys; 6911 nodes = ref_path->new_nodes; 6912 memset(&keys[shared_level + 1], 0, 6913 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6914 memset(&nodes[shared_level + 1], 0, 6915 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6916 6917 if (nodes[lowest_level] == 0) { 6918 path->lowest_level = lowest_level; 6919 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6920 0, 1); 6921 BUG_ON(ret); 6922 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 6923 eb = path->nodes[level]; 6924 if (!eb || eb == reloc_root->node) 6925 break; 6926 nodes[level] = eb->start; 6927 if (level == 0) 6928 btrfs_item_key_to_cpu(eb, &keys[level], 0); 6929 else 6930 btrfs_node_key_to_cpu(eb, &keys[level], 0); 6931 } 6932 if (nodes[0] && 6933 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6934 eb = path->nodes[0]; 6935 ret = replace_extents_in_leaf(trans, reloc_root, eb, 6936 group, reloc_inode); 6937 BUG_ON(ret); 6938 } 6939 btrfs_release_path(reloc_root, path); 6940 } else { 6941 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 6942 lowest_level); 6943 BUG_ON(ret); 6944 } 6945 6946 /* 6947 * replace tree blocks in the fs tree with tree blocks in 6948 * the reloc tree. 6949 */ 6950 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 6951 BUG_ON(ret < 0); 6952 6953 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6954 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6955 0, 0); 6956 BUG_ON(ret); 6957 extent_buffer_get(path->nodes[0]); 6958 eb = path->nodes[0]; 6959 btrfs_release_path(reloc_root, path); 6960 ret = invalidate_extent_cache(reloc_root, eb, group, root); 6961 BUG_ON(ret); 6962 free_extent_buffer(eb); 6963 } 6964 6965 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6966 path->lowest_level = 0; 6967 return 0; 6968 } 6969 6970 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 6971 struct btrfs_root *root, 6972 struct btrfs_path *path, 6973 struct btrfs_key *first_key, 6974 struct btrfs_ref_path *ref_path) 6975 { 6976 int ret; 6977 6978 ret = relocate_one_path(trans, root, path, first_key, 6979 ref_path, NULL, NULL); 6980 BUG_ON(ret); 6981 6982 return 0; 6983 } 6984 6985 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 6986 struct btrfs_root *extent_root, 6987 struct btrfs_path *path, 6988 struct btrfs_key *extent_key) 6989 { 6990 int ret; 6991 6992 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 6993 if (ret) 6994 goto out; 6995 ret = btrfs_del_item(trans, extent_root, path); 6996 out: 6997 btrfs_release_path(extent_root, path); 6998 return ret; 6999 } 7000 7001 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 7002 struct btrfs_ref_path *ref_path) 7003 { 7004 struct btrfs_key root_key; 7005 7006 root_key.objectid = ref_path->root_objectid; 7007 root_key.type = BTRFS_ROOT_ITEM_KEY; 7008 if (is_cowonly_root(ref_path->root_objectid)) 7009 root_key.offset = 0; 7010 else 7011 root_key.offset = (u64)-1; 7012 7013 return btrfs_read_fs_root_no_name(fs_info, &root_key); 7014 } 7015 7016 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 7017 struct btrfs_path *path, 7018 struct btrfs_key *extent_key, 7019 struct btrfs_block_group_cache *group, 7020 struct inode *reloc_inode, int pass) 7021 { 7022 struct btrfs_trans_handle *trans; 7023 struct btrfs_root *found_root; 7024 struct btrfs_ref_path *ref_path = NULL; 7025 struct disk_extent *new_extents = NULL; 7026 int nr_extents = 0; 7027 int loops; 7028 int ret; 7029 int level; 7030 struct btrfs_key first_key; 7031 u64 prev_block = 0; 7032 7033 7034 trans = btrfs_start_transaction(extent_root, 1); 7035 BUG_ON(!trans); 7036 7037 if (extent_key->objectid == 0) { 7038 ret = del_extent_zero(trans, extent_root, path, extent_key); 7039 goto out; 7040 } 7041 7042 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 7043 if (!ref_path) { 7044 ret = -ENOMEM; 7045 goto out; 7046 } 7047 7048 for (loops = 0; ; loops++) { 7049 if (loops == 0) { 7050 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 7051 extent_key->objectid); 7052 } else { 7053 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 7054 } 7055 if (ret < 0) 7056 goto out; 7057 if (ret > 0) 7058 break; 7059 7060 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 7061 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 7062 continue; 7063 7064 found_root = read_ref_root(extent_root->fs_info, ref_path); 7065 BUG_ON(!found_root); 7066 /* 7067 * for reference counted tree, only process reference paths 7068 * rooted at the latest committed root. 7069 */ 7070 if (found_root->ref_cows && 7071 ref_path->root_generation != found_root->root_key.offset) 7072 continue; 7073 7074 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 7075 if (pass == 0) { 7076 /* 7077 * copy data extents to new locations 7078 */ 7079 u64 group_start = group->key.objectid; 7080 ret = relocate_data_extent(reloc_inode, 7081 extent_key, 7082 group_start); 7083 if (ret < 0) 7084 goto out; 7085 break; 7086 } 7087 level = 0; 7088 } else { 7089 level = ref_path->owner_objectid; 7090 } 7091 7092 if (prev_block != ref_path->nodes[level]) { 7093 struct extent_buffer *eb; 7094 u64 block_start = ref_path->nodes[level]; 7095 u64 block_size = btrfs_level_size(found_root, level); 7096 7097 eb = read_tree_block(found_root, block_start, 7098 block_size, 0); 7099 btrfs_tree_lock(eb); 7100 BUG_ON(level != btrfs_header_level(eb)); 7101 7102 if (level == 0) 7103 btrfs_item_key_to_cpu(eb, &first_key, 0); 7104 else 7105 btrfs_node_key_to_cpu(eb, &first_key, 0); 7106 7107 btrfs_tree_unlock(eb); 7108 free_extent_buffer(eb); 7109 prev_block = block_start; 7110 } 7111 7112 mutex_lock(&extent_root->fs_info->trans_mutex); 7113 btrfs_record_root_in_trans(found_root); 7114 mutex_unlock(&extent_root->fs_info->trans_mutex); 7115 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 7116 /* 7117 * try to update data extent references while 7118 * keeping metadata shared between snapshots. 7119 */ 7120 if (pass == 1) { 7121 ret = relocate_one_path(trans, found_root, 7122 path, &first_key, ref_path, 7123 group, reloc_inode); 7124 if (ret < 0) 7125 goto out; 7126 continue; 7127 } 7128 /* 7129 * use fallback method to process the remaining 7130 * references. 7131 */ 7132 if (!new_extents) { 7133 u64 group_start = group->key.objectid; 7134 new_extents = kmalloc(sizeof(*new_extents), 7135 GFP_NOFS); 7136 nr_extents = 1; 7137 ret = get_new_locations(reloc_inode, 7138 extent_key, 7139 group_start, 1, 7140 &new_extents, 7141 &nr_extents); 7142 if (ret) 7143 goto out; 7144 } 7145 ret = replace_one_extent(trans, found_root, 7146 path, extent_key, 7147 &first_key, ref_path, 7148 new_extents, nr_extents); 7149 } else { 7150 ret = relocate_tree_block(trans, found_root, path, 7151 &first_key, ref_path); 7152 } 7153 if (ret < 0) 7154 goto out; 7155 } 7156 ret = 0; 7157 out: 7158 btrfs_end_transaction(trans, extent_root); 7159 kfree(new_extents); 7160 kfree(ref_path); 7161 return ret; 7162 } 7163 #endif 7164 7165 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 7166 { 7167 u64 num_devices; 7168 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 7169 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 7170 7171 num_devices = root->fs_info->fs_devices->rw_devices; 7172 if (num_devices == 1) { 7173 stripped |= BTRFS_BLOCK_GROUP_DUP; 7174 stripped = flags & ~stripped; 7175 7176 /* turn raid0 into single device chunks */ 7177 if (flags & BTRFS_BLOCK_GROUP_RAID0) 7178 return stripped; 7179 7180 /* turn mirroring into duplication */ 7181 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 7182 BTRFS_BLOCK_GROUP_RAID10)) 7183 return stripped | BTRFS_BLOCK_GROUP_DUP; 7184 return flags; 7185 } else { 7186 /* they already had raid on here, just return */ 7187 if (flags & stripped) 7188 return flags; 7189 7190 stripped |= BTRFS_BLOCK_GROUP_DUP; 7191 stripped = flags & ~stripped; 7192 7193 /* switch duplicated blocks with raid1 */ 7194 if (flags & BTRFS_BLOCK_GROUP_DUP) 7195 return stripped | BTRFS_BLOCK_GROUP_RAID1; 7196 7197 /* turn single device chunks into raid0 */ 7198 return stripped | BTRFS_BLOCK_GROUP_RAID0; 7199 } 7200 return flags; 7201 } 7202 7203 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 7204 struct btrfs_block_group_cache *shrink_block_group, 7205 int force) 7206 { 7207 struct btrfs_trans_handle *trans; 7208 u64 new_alloc_flags; 7209 u64 calc; 7210 7211 spin_lock(&shrink_block_group->lock); 7212 if (btrfs_block_group_used(&shrink_block_group->item) + 7213 shrink_block_group->reserved > 0) { 7214 spin_unlock(&shrink_block_group->lock); 7215 7216 trans = btrfs_start_transaction(root, 1); 7217 spin_lock(&shrink_block_group->lock); 7218 7219 new_alloc_flags = update_block_group_flags(root, 7220 shrink_block_group->flags); 7221 if (new_alloc_flags != shrink_block_group->flags) { 7222 calc = 7223 btrfs_block_group_used(&shrink_block_group->item); 7224 } else { 7225 calc = shrink_block_group->key.offset; 7226 } 7227 spin_unlock(&shrink_block_group->lock); 7228 7229 do_chunk_alloc(trans, root->fs_info->extent_root, 7230 calc + 2 * 1024 * 1024, new_alloc_flags, force); 7231 7232 btrfs_end_transaction(trans, root); 7233 } else 7234 spin_unlock(&shrink_block_group->lock); 7235 return 0; 7236 } 7237 7238 7239 int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 7240 struct btrfs_block_group_cache *group) 7241 7242 { 7243 __alloc_chunk_for_shrink(root, group, 1); 7244 set_block_group_readonly(group); 7245 return 0; 7246 } 7247 7248 /* 7249 * checks to see if its even possible to relocate this block group. 7250 * 7251 * @return - -1 if it's not a good idea to relocate this block group, 0 if its 7252 * ok to go ahead and try. 7253 */ 7254 int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) 7255 { 7256 struct btrfs_block_group_cache *block_group; 7257 struct btrfs_space_info *space_info; 7258 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 7259 struct btrfs_device *device; 7260 int full = 0; 7261 int ret = 0; 7262 7263 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 7264 7265 /* odd, couldn't find the block group, leave it alone */ 7266 if (!block_group) 7267 return -1; 7268 7269 /* no bytes used, we're good */ 7270 if (!btrfs_block_group_used(&block_group->item)) 7271 goto out; 7272 7273 space_info = block_group->space_info; 7274 spin_lock(&space_info->lock); 7275 7276 full = space_info->full; 7277 7278 /* 7279 * if this is the last block group we have in this space, we can't 7280 * relocate it unless we're able to allocate a new chunk below. 7281 * 7282 * Otherwise, we need to make sure we have room in the space to handle 7283 * all of the extents from this block group. If we can, we're good 7284 */ 7285 if ((space_info->total_bytes != block_group->key.offset) && 7286 (space_info->bytes_used + space_info->bytes_reserved + 7287 space_info->bytes_pinned + space_info->bytes_readonly + 7288 btrfs_block_group_used(&block_group->item) < 7289 space_info->total_bytes)) { 7290 spin_unlock(&space_info->lock); 7291 goto out; 7292 } 7293 spin_unlock(&space_info->lock); 7294 7295 /* 7296 * ok we don't have enough space, but maybe we have free space on our 7297 * devices to allocate new chunks for relocation, so loop through our 7298 * alloc devices and guess if we have enough space. However, if we 7299 * were marked as full, then we know there aren't enough chunks, and we 7300 * can just return. 7301 */ 7302 ret = -1; 7303 if (full) 7304 goto out; 7305 7306 mutex_lock(&root->fs_info->chunk_mutex); 7307 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 7308 u64 min_free = btrfs_block_group_used(&block_group->item); 7309 u64 dev_offset, max_avail; 7310 7311 /* 7312 * check to make sure we can actually find a chunk with enough 7313 * space to fit our block group in. 7314 */ 7315 if (device->total_bytes > device->bytes_used + min_free) { 7316 ret = find_free_dev_extent(NULL, device, min_free, 7317 &dev_offset, &max_avail); 7318 if (!ret) 7319 break; 7320 ret = -1; 7321 } 7322 } 7323 mutex_unlock(&root->fs_info->chunk_mutex); 7324 out: 7325 btrfs_put_block_group(block_group); 7326 return ret; 7327 } 7328 7329 static int find_first_block_group(struct btrfs_root *root, 7330 struct btrfs_path *path, struct btrfs_key *key) 7331 { 7332 int ret = 0; 7333 struct btrfs_key found_key; 7334 struct extent_buffer *leaf; 7335 int slot; 7336 7337 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 7338 if (ret < 0) 7339 goto out; 7340 7341 while (1) { 7342 slot = path->slots[0]; 7343 leaf = path->nodes[0]; 7344 if (slot >= btrfs_header_nritems(leaf)) { 7345 ret = btrfs_next_leaf(root, path); 7346 if (ret == 0) 7347 continue; 7348 if (ret < 0) 7349 goto out; 7350 break; 7351 } 7352 btrfs_item_key_to_cpu(leaf, &found_key, slot); 7353 7354 if (found_key.objectid >= key->objectid && 7355 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 7356 ret = 0; 7357 goto out; 7358 } 7359 path->slots[0]++; 7360 } 7361 ret = -ENOENT; 7362 out: 7363 return ret; 7364 } 7365 7366 int btrfs_free_block_groups(struct btrfs_fs_info *info) 7367 { 7368 struct btrfs_block_group_cache *block_group; 7369 struct btrfs_space_info *space_info; 7370 struct btrfs_caching_control *caching_ctl; 7371 struct rb_node *n; 7372 7373 down_write(&info->extent_commit_sem); 7374 while (!list_empty(&info->caching_block_groups)) { 7375 caching_ctl = list_entry(info->caching_block_groups.next, 7376 struct btrfs_caching_control, list); 7377 list_del(&caching_ctl->list); 7378 put_caching_control(caching_ctl); 7379 } 7380 up_write(&info->extent_commit_sem); 7381 7382 spin_lock(&info->block_group_cache_lock); 7383 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 7384 block_group = rb_entry(n, struct btrfs_block_group_cache, 7385 cache_node); 7386 rb_erase(&block_group->cache_node, 7387 &info->block_group_cache_tree); 7388 spin_unlock(&info->block_group_cache_lock); 7389 7390 down_write(&block_group->space_info->groups_sem); 7391 list_del(&block_group->list); 7392 up_write(&block_group->space_info->groups_sem); 7393 7394 if (block_group->cached == BTRFS_CACHE_STARTED) 7395 wait_block_group_cache_done(block_group); 7396 7397 btrfs_remove_free_space_cache(block_group); 7398 7399 WARN_ON(atomic_read(&block_group->count) != 1); 7400 kfree(block_group); 7401 7402 spin_lock(&info->block_group_cache_lock); 7403 } 7404 spin_unlock(&info->block_group_cache_lock); 7405 7406 /* now that all the block groups are freed, go through and 7407 * free all the space_info structs. This is only called during 7408 * the final stages of unmount, and so we know nobody is 7409 * using them. We call synchronize_rcu() once before we start, 7410 * just to be on the safe side. 7411 */ 7412 synchronize_rcu(); 7413 7414 while(!list_empty(&info->space_info)) { 7415 space_info = list_entry(info->space_info.next, 7416 struct btrfs_space_info, 7417 list); 7418 7419 list_del(&space_info->list); 7420 kfree(space_info); 7421 } 7422 return 0; 7423 } 7424 7425 int btrfs_read_block_groups(struct btrfs_root *root) 7426 { 7427 struct btrfs_path *path; 7428 int ret; 7429 struct btrfs_block_group_cache *cache; 7430 struct btrfs_fs_info *info = root->fs_info; 7431 struct btrfs_space_info *space_info; 7432 struct btrfs_key key; 7433 struct btrfs_key found_key; 7434 struct extent_buffer *leaf; 7435 7436 root = info->extent_root; 7437 key.objectid = 0; 7438 key.offset = 0; 7439 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 7440 path = btrfs_alloc_path(); 7441 if (!path) 7442 return -ENOMEM; 7443 7444 while (1) { 7445 ret = find_first_block_group(root, path, &key); 7446 if (ret > 0) { 7447 ret = 0; 7448 goto error; 7449 } 7450 if (ret != 0) 7451 goto error; 7452 7453 leaf = path->nodes[0]; 7454 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 7455 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7456 if (!cache) { 7457 ret = -ENOMEM; 7458 break; 7459 } 7460 7461 atomic_set(&cache->count, 1); 7462 spin_lock_init(&cache->lock); 7463 spin_lock_init(&cache->tree_lock); 7464 cache->fs_info = info; 7465 INIT_LIST_HEAD(&cache->list); 7466 INIT_LIST_HEAD(&cache->cluster_list); 7467 7468 /* 7469 * we only want to have 32k of ram per block group for keeping 7470 * track of free space, and if we pass 1/2 of that we want to 7471 * start converting things over to using bitmaps 7472 */ 7473 cache->extents_thresh = ((1024 * 32) / 2) / 7474 sizeof(struct btrfs_free_space); 7475 7476 read_extent_buffer(leaf, &cache->item, 7477 btrfs_item_ptr_offset(leaf, path->slots[0]), 7478 sizeof(cache->item)); 7479 memcpy(&cache->key, &found_key, sizeof(found_key)); 7480 7481 key.objectid = found_key.objectid + found_key.offset; 7482 btrfs_release_path(root, path); 7483 cache->flags = btrfs_block_group_flags(&cache->item); 7484 cache->sectorsize = root->sectorsize; 7485 7486 /* 7487 * check for two cases, either we are full, and therefore 7488 * don't need to bother with the caching work since we won't 7489 * find any space, or we are empty, and we can just add all 7490 * the space in and be done with it. This saves us _alot_ of 7491 * time, particularly in the full case. 7492 */ 7493 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7494 exclude_super_stripes(root, cache); 7495 cache->last_byte_to_unpin = (u64)-1; 7496 cache->cached = BTRFS_CACHE_FINISHED; 7497 free_excluded_extents(root, cache); 7498 } else if (btrfs_block_group_used(&cache->item) == 0) { 7499 exclude_super_stripes(root, cache); 7500 cache->last_byte_to_unpin = (u64)-1; 7501 cache->cached = BTRFS_CACHE_FINISHED; 7502 add_new_free_space(cache, root->fs_info, 7503 found_key.objectid, 7504 found_key.objectid + 7505 found_key.offset); 7506 free_excluded_extents(root, cache); 7507 } 7508 7509 ret = update_space_info(info, cache->flags, found_key.offset, 7510 btrfs_block_group_used(&cache->item), 7511 &space_info); 7512 BUG_ON(ret); 7513 cache->space_info = space_info; 7514 spin_lock(&cache->space_info->lock); 7515 cache->space_info->bytes_super += cache->bytes_super; 7516 spin_unlock(&cache->space_info->lock); 7517 7518 down_write(&space_info->groups_sem); 7519 list_add_tail(&cache->list, &space_info->block_groups); 7520 up_write(&space_info->groups_sem); 7521 7522 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7523 BUG_ON(ret); 7524 7525 set_avail_alloc_bits(root->fs_info, cache->flags); 7526 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7527 set_block_group_readonly(cache); 7528 } 7529 ret = 0; 7530 error: 7531 btrfs_free_path(path); 7532 return ret; 7533 } 7534 7535 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7536 struct btrfs_root *root, u64 bytes_used, 7537 u64 type, u64 chunk_objectid, u64 chunk_offset, 7538 u64 size) 7539 { 7540 int ret; 7541 struct btrfs_root *extent_root; 7542 struct btrfs_block_group_cache *cache; 7543 7544 extent_root = root->fs_info->extent_root; 7545 7546 root->fs_info->last_trans_log_full_commit = trans->transid; 7547 7548 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7549 if (!cache) 7550 return -ENOMEM; 7551 7552 cache->key.objectid = chunk_offset; 7553 cache->key.offset = size; 7554 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7555 cache->sectorsize = root->sectorsize; 7556 7557 /* 7558 * we only want to have 32k of ram per block group for keeping track 7559 * of free space, and if we pass 1/2 of that we want to start 7560 * converting things over to using bitmaps 7561 */ 7562 cache->extents_thresh = ((1024 * 32) / 2) / 7563 sizeof(struct btrfs_free_space); 7564 atomic_set(&cache->count, 1); 7565 spin_lock_init(&cache->lock); 7566 spin_lock_init(&cache->tree_lock); 7567 INIT_LIST_HEAD(&cache->list); 7568 INIT_LIST_HEAD(&cache->cluster_list); 7569 7570 btrfs_set_block_group_used(&cache->item, bytes_used); 7571 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7572 cache->flags = type; 7573 btrfs_set_block_group_flags(&cache->item, type); 7574 7575 cache->last_byte_to_unpin = (u64)-1; 7576 cache->cached = BTRFS_CACHE_FINISHED; 7577 exclude_super_stripes(root, cache); 7578 7579 add_new_free_space(cache, root->fs_info, chunk_offset, 7580 chunk_offset + size); 7581 7582 free_excluded_extents(root, cache); 7583 7584 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7585 &cache->space_info); 7586 BUG_ON(ret); 7587 7588 spin_lock(&cache->space_info->lock); 7589 cache->space_info->bytes_super += cache->bytes_super; 7590 spin_unlock(&cache->space_info->lock); 7591 7592 down_write(&cache->space_info->groups_sem); 7593 list_add_tail(&cache->list, &cache->space_info->block_groups); 7594 up_write(&cache->space_info->groups_sem); 7595 7596 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7597 BUG_ON(ret); 7598 7599 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7600 sizeof(cache->item)); 7601 BUG_ON(ret); 7602 7603 set_avail_alloc_bits(extent_root->fs_info, type); 7604 7605 return 0; 7606 } 7607 7608 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7609 struct btrfs_root *root, u64 group_start) 7610 { 7611 struct btrfs_path *path; 7612 struct btrfs_block_group_cache *block_group; 7613 struct btrfs_free_cluster *cluster; 7614 struct btrfs_key key; 7615 int ret; 7616 7617 root = root->fs_info->extent_root; 7618 7619 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7620 BUG_ON(!block_group); 7621 BUG_ON(!block_group->ro); 7622 7623 memcpy(&key, &block_group->key, sizeof(key)); 7624 7625 /* make sure this block group isn't part of an allocation cluster */ 7626 cluster = &root->fs_info->data_alloc_cluster; 7627 spin_lock(&cluster->refill_lock); 7628 btrfs_return_cluster_to_free_space(block_group, cluster); 7629 spin_unlock(&cluster->refill_lock); 7630 7631 /* 7632 * make sure this block group isn't part of a metadata 7633 * allocation cluster 7634 */ 7635 cluster = &root->fs_info->meta_alloc_cluster; 7636 spin_lock(&cluster->refill_lock); 7637 btrfs_return_cluster_to_free_space(block_group, cluster); 7638 spin_unlock(&cluster->refill_lock); 7639 7640 path = btrfs_alloc_path(); 7641 BUG_ON(!path); 7642 7643 spin_lock(&root->fs_info->block_group_cache_lock); 7644 rb_erase(&block_group->cache_node, 7645 &root->fs_info->block_group_cache_tree); 7646 spin_unlock(&root->fs_info->block_group_cache_lock); 7647 7648 down_write(&block_group->space_info->groups_sem); 7649 /* 7650 * we must use list_del_init so people can check to see if they 7651 * are still on the list after taking the semaphore 7652 */ 7653 list_del_init(&block_group->list); 7654 up_write(&block_group->space_info->groups_sem); 7655 7656 if (block_group->cached == BTRFS_CACHE_STARTED) 7657 wait_block_group_cache_done(block_group); 7658 7659 btrfs_remove_free_space_cache(block_group); 7660 7661 spin_lock(&block_group->space_info->lock); 7662 block_group->space_info->total_bytes -= block_group->key.offset; 7663 block_group->space_info->bytes_readonly -= block_group->key.offset; 7664 spin_unlock(&block_group->space_info->lock); 7665 7666 btrfs_clear_space_info_full(root->fs_info); 7667 7668 btrfs_put_block_group(block_group); 7669 btrfs_put_block_group(block_group); 7670 7671 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7672 if (ret > 0) 7673 ret = -EIO; 7674 if (ret < 0) 7675 goto out; 7676 7677 ret = btrfs_del_item(trans, root, path); 7678 out: 7679 btrfs_free_path(path); 7680 return ret; 7681 } 7682