1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/version.h> 23 #include "compat.h" 24 #include "hash.h" 25 #include "crc32c.h" 26 #include "ctree.h" 27 #include "disk-io.h" 28 #include "print-tree.h" 29 #include "transaction.h" 30 #include "volumes.h" 31 #include "locking.h" 32 #include "ref-cache.h" 33 #include "compat.h" 34 35 #define PENDING_EXTENT_INSERT 0 36 #define PENDING_EXTENT_DELETE 1 37 #define PENDING_BACKREF_UPDATE 2 38 39 struct pending_extent_op { 40 int type; 41 u64 bytenr; 42 u64 num_bytes; 43 u64 parent; 44 u64 orig_parent; 45 u64 generation; 46 u64 orig_generation; 47 int level; 48 struct list_head list; 49 int del; 50 }; 51 52 static int finish_current_insert(struct btrfs_trans_handle *trans, 53 struct btrfs_root *extent_root, int all); 54 static int del_pending_extents(struct btrfs_trans_handle *trans, 55 struct btrfs_root *extent_root, int all); 56 static int pin_down_bytes(struct btrfs_trans_handle *trans, 57 struct btrfs_root *root, 58 u64 bytenr, u64 num_bytes, int is_data); 59 static int update_block_group(struct btrfs_trans_handle *trans, 60 struct btrfs_root *root, 61 u64 bytenr, u64 num_bytes, int alloc, 62 int mark_free); 63 64 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 65 { 66 return (cache->flags & bits) == bits; 67 } 68 69 /* 70 * this adds the block group to the fs_info rb tree for the block group 71 * cache 72 */ 73 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 74 struct btrfs_block_group_cache *block_group) 75 { 76 struct rb_node **p; 77 struct rb_node *parent = NULL; 78 struct btrfs_block_group_cache *cache; 79 80 spin_lock(&info->block_group_cache_lock); 81 p = &info->block_group_cache_tree.rb_node; 82 83 while (*p) { 84 parent = *p; 85 cache = rb_entry(parent, struct btrfs_block_group_cache, 86 cache_node); 87 if (block_group->key.objectid < cache->key.objectid) { 88 p = &(*p)->rb_left; 89 } else if (block_group->key.objectid > cache->key.objectid) { 90 p = &(*p)->rb_right; 91 } else { 92 spin_unlock(&info->block_group_cache_lock); 93 return -EEXIST; 94 } 95 } 96 97 rb_link_node(&block_group->cache_node, parent, p); 98 rb_insert_color(&block_group->cache_node, 99 &info->block_group_cache_tree); 100 spin_unlock(&info->block_group_cache_lock); 101 102 return 0; 103 } 104 105 /* 106 * This will return the block group at or after bytenr if contains is 0, else 107 * it will return the block group that contains the bytenr 108 */ 109 static struct btrfs_block_group_cache * 110 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 111 int contains) 112 { 113 struct btrfs_block_group_cache *cache, *ret = NULL; 114 struct rb_node *n; 115 u64 end, start; 116 117 spin_lock(&info->block_group_cache_lock); 118 n = info->block_group_cache_tree.rb_node; 119 120 while (n) { 121 cache = rb_entry(n, struct btrfs_block_group_cache, 122 cache_node); 123 end = cache->key.objectid + cache->key.offset - 1; 124 start = cache->key.objectid; 125 126 if (bytenr < start) { 127 if (!contains && (!ret || start < ret->key.objectid)) 128 ret = cache; 129 n = n->rb_left; 130 } else if (bytenr > start) { 131 if (contains && bytenr <= end) { 132 ret = cache; 133 break; 134 } 135 n = n->rb_right; 136 } else { 137 ret = cache; 138 break; 139 } 140 } 141 if (ret) 142 atomic_inc(&ret->count); 143 spin_unlock(&info->block_group_cache_lock); 144 145 return ret; 146 } 147 148 /* 149 * this is only called by cache_block_group, since we could have freed extents 150 * we need to check the pinned_extents for any extents that can't be used yet 151 * since their free space will be released as soon as the transaction commits. 152 */ 153 static int add_new_free_space(struct btrfs_block_group_cache *block_group, 154 struct btrfs_fs_info *info, u64 start, u64 end) 155 { 156 u64 extent_start, extent_end, size; 157 int ret; 158 159 mutex_lock(&info->pinned_mutex); 160 while (start < end) { 161 ret = find_first_extent_bit(&info->pinned_extents, start, 162 &extent_start, &extent_end, 163 EXTENT_DIRTY); 164 if (ret) 165 break; 166 167 if (extent_start == start) { 168 start = extent_end + 1; 169 } else if (extent_start > start && extent_start < end) { 170 size = extent_start - start; 171 ret = btrfs_add_free_space(block_group, start, 172 size); 173 BUG_ON(ret); 174 start = extent_end + 1; 175 } else { 176 break; 177 } 178 } 179 180 if (start < end) { 181 size = end - start; 182 ret = btrfs_add_free_space(block_group, start, size); 183 BUG_ON(ret); 184 } 185 mutex_unlock(&info->pinned_mutex); 186 187 return 0; 188 } 189 190 static int remove_sb_from_cache(struct btrfs_root *root, 191 struct btrfs_block_group_cache *cache) 192 { 193 u64 bytenr; 194 u64 *logical; 195 int stripe_len; 196 int i, nr, ret; 197 198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 199 bytenr = btrfs_sb_offset(i); 200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 201 cache->key.objectid, bytenr, 0, 202 &logical, &nr, &stripe_len); 203 BUG_ON(ret); 204 while (nr--) { 205 btrfs_remove_free_space(cache, logical[nr], 206 stripe_len); 207 } 208 kfree(logical); 209 } 210 return 0; 211 } 212 213 static int cache_block_group(struct btrfs_root *root, 214 struct btrfs_block_group_cache *block_group) 215 { 216 struct btrfs_path *path; 217 int ret = 0; 218 struct btrfs_key key; 219 struct extent_buffer *leaf; 220 int slot; 221 u64 last; 222 223 if (!block_group) 224 return 0; 225 226 root = root->fs_info->extent_root; 227 228 if (block_group->cached) 229 return 0; 230 231 path = btrfs_alloc_path(); 232 if (!path) 233 return -ENOMEM; 234 235 path->reada = 2; 236 /* 237 * we get into deadlocks with paths held by callers of this function. 238 * since the alloc_mutex is protecting things right now, just 239 * skip the locking here 240 */ 241 path->skip_locking = 1; 242 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 243 key.objectid = last; 244 key.offset = 0; 245 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 246 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 247 if (ret < 0) 248 goto err; 249 250 while (1) { 251 leaf = path->nodes[0]; 252 slot = path->slots[0]; 253 if (slot >= btrfs_header_nritems(leaf)) { 254 ret = btrfs_next_leaf(root, path); 255 if (ret < 0) 256 goto err; 257 if (ret == 0) 258 continue; 259 else 260 break; 261 } 262 btrfs_item_key_to_cpu(leaf, &key, slot); 263 if (key.objectid < block_group->key.objectid) 264 goto next; 265 266 if (key.objectid >= block_group->key.objectid + 267 block_group->key.offset) 268 break; 269 270 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 271 add_new_free_space(block_group, root->fs_info, last, 272 key.objectid); 273 274 last = key.objectid + key.offset; 275 } 276 next: 277 path->slots[0]++; 278 } 279 280 add_new_free_space(block_group, root->fs_info, last, 281 block_group->key.objectid + 282 block_group->key.offset); 283 284 remove_sb_from_cache(root, block_group); 285 block_group->cached = 1; 286 ret = 0; 287 err: 288 btrfs_free_path(path); 289 return ret; 290 } 291 292 /* 293 * return the block group that starts at or after bytenr 294 */ 295 static struct btrfs_block_group_cache * 296 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 297 { 298 struct btrfs_block_group_cache *cache; 299 300 cache = block_group_cache_tree_search(info, bytenr, 0); 301 302 return cache; 303 } 304 305 /* 306 * return the block group that contains teh given bytenr 307 */ 308 struct btrfs_block_group_cache *btrfs_lookup_block_group( 309 struct btrfs_fs_info *info, 310 u64 bytenr) 311 { 312 struct btrfs_block_group_cache *cache; 313 314 cache = block_group_cache_tree_search(info, bytenr, 1); 315 316 return cache; 317 } 318 319 static inline void put_block_group(struct btrfs_block_group_cache *cache) 320 { 321 if (atomic_dec_and_test(&cache->count)) 322 kfree(cache); 323 } 324 325 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 326 u64 flags) 327 { 328 struct list_head *head = &info->space_info; 329 struct list_head *cur; 330 struct btrfs_space_info *found; 331 list_for_each(cur, head) { 332 found = list_entry(cur, struct btrfs_space_info, list); 333 if (found->flags == flags) 334 return found; 335 } 336 return NULL; 337 } 338 339 static u64 div_factor(u64 num, int factor) 340 { 341 if (factor == 10) 342 return num; 343 num *= factor; 344 do_div(num, 10); 345 return num; 346 } 347 348 u64 btrfs_find_block_group(struct btrfs_root *root, 349 u64 search_start, u64 search_hint, int owner) 350 { 351 struct btrfs_block_group_cache *cache; 352 u64 used; 353 u64 last = max(search_hint, search_start); 354 u64 group_start = 0; 355 int full_search = 0; 356 int factor = 9; 357 int wrapped = 0; 358 again: 359 while (1) { 360 cache = btrfs_lookup_first_block_group(root->fs_info, last); 361 if (!cache) 362 break; 363 364 spin_lock(&cache->lock); 365 last = cache->key.objectid + cache->key.offset; 366 used = btrfs_block_group_used(&cache->item); 367 368 if ((full_search || !cache->ro) && 369 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 370 if (used + cache->pinned + cache->reserved < 371 div_factor(cache->key.offset, factor)) { 372 group_start = cache->key.objectid; 373 spin_unlock(&cache->lock); 374 put_block_group(cache); 375 goto found; 376 } 377 } 378 spin_unlock(&cache->lock); 379 put_block_group(cache); 380 cond_resched(); 381 } 382 if (!wrapped) { 383 last = search_start; 384 wrapped = 1; 385 goto again; 386 } 387 if (!full_search && factor < 10) { 388 last = search_start; 389 full_search = 1; 390 factor = 10; 391 goto again; 392 } 393 found: 394 return group_start; 395 } 396 397 /* simple helper to search for an existing extent at a given offset */ 398 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 399 { 400 int ret; 401 struct btrfs_key key; 402 struct btrfs_path *path; 403 404 path = btrfs_alloc_path(); 405 BUG_ON(!path); 406 key.objectid = start; 407 key.offset = len; 408 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 409 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 410 0, 0); 411 btrfs_free_path(path); 412 return ret; 413 } 414 415 /* 416 * Back reference rules. Back refs have three main goals: 417 * 418 * 1) differentiate between all holders of references to an extent so that 419 * when a reference is dropped we can make sure it was a valid reference 420 * before freeing the extent. 421 * 422 * 2) Provide enough information to quickly find the holders of an extent 423 * if we notice a given block is corrupted or bad. 424 * 425 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 426 * maintenance. This is actually the same as #2, but with a slightly 427 * different use case. 428 * 429 * File extents can be referenced by: 430 * 431 * - multiple snapshots, subvolumes, or different generations in one subvol 432 * - different files inside a single subvolume 433 * - different offsets inside a file (bookend extents in file.c) 434 * 435 * The extent ref structure has fields for: 436 * 437 * - Objectid of the subvolume root 438 * - Generation number of the tree holding the reference 439 * - objectid of the file holding the reference 440 * - number of references holding by parent node (alway 1 for tree blocks) 441 * 442 * Btree leaf may hold multiple references to a file extent. In most cases, 443 * these references are from same file and the corresponding offsets inside 444 * the file are close together. 445 * 446 * When a file extent is allocated the fields are filled in: 447 * (root_key.objectid, trans->transid, inode objectid, 1) 448 * 449 * When a leaf is cow'd new references are added for every file extent found 450 * in the leaf. It looks similar to the create case, but trans->transid will 451 * be different when the block is cow'd. 452 * 453 * (root_key.objectid, trans->transid, inode objectid, 454 * number of references in the leaf) 455 * 456 * When a file extent is removed either during snapshot deletion or 457 * file truncation, we find the corresponding back reference and check 458 * the following fields: 459 * 460 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 461 * inode objectid) 462 * 463 * Btree extents can be referenced by: 464 * 465 * - Different subvolumes 466 * - Different generations of the same subvolume 467 * 468 * When a tree block is created, back references are inserted: 469 * 470 * (root->root_key.objectid, trans->transid, level, 1) 471 * 472 * When a tree block is cow'd, new back references are added for all the 473 * blocks it points to. If the tree block isn't in reference counted root, 474 * the old back references are removed. These new back references are of 475 * the form (trans->transid will have increased since creation): 476 * 477 * (root->root_key.objectid, trans->transid, level, 1) 478 * 479 * When a backref is in deleting, the following fields are checked: 480 * 481 * if backref was for a tree root: 482 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level) 483 * else 484 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level) 485 * 486 * Back Reference Key composing: 487 * 488 * The key objectid corresponds to the first byte in the extent, the key 489 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first 490 * byte of parent extent. If a extent is tree root, the key offset is set 491 * to the key objectid. 492 */ 493 494 static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, 495 struct btrfs_root *root, 496 struct btrfs_path *path, 497 u64 bytenr, u64 parent, 498 u64 ref_root, u64 ref_generation, 499 u64 owner_objectid, int del) 500 { 501 struct btrfs_key key; 502 struct btrfs_extent_ref *ref; 503 struct extent_buffer *leaf; 504 u64 ref_objectid; 505 int ret; 506 507 key.objectid = bytenr; 508 key.type = BTRFS_EXTENT_REF_KEY; 509 key.offset = parent; 510 511 ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); 512 if (ret < 0) 513 goto out; 514 if (ret > 0) { 515 ret = -ENOENT; 516 goto out; 517 } 518 519 leaf = path->nodes[0]; 520 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 521 ref_objectid = btrfs_ref_objectid(leaf, ref); 522 if (btrfs_ref_root(leaf, ref) != ref_root || 523 btrfs_ref_generation(leaf, ref) != ref_generation || 524 (ref_objectid != owner_objectid && 525 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 526 ret = -EIO; 527 WARN_ON(1); 528 goto out; 529 } 530 ret = 0; 531 out: 532 return ret; 533 } 534 535 /* 536 * updates all the backrefs that are pending on update_list for the 537 * extent_root 538 */ 539 static noinline int update_backrefs(struct btrfs_trans_handle *trans, 540 struct btrfs_root *extent_root, 541 struct btrfs_path *path, 542 struct list_head *update_list) 543 { 544 struct btrfs_key key; 545 struct btrfs_extent_ref *ref; 546 struct btrfs_fs_info *info = extent_root->fs_info; 547 struct pending_extent_op *op; 548 struct extent_buffer *leaf; 549 int ret = 0; 550 struct list_head *cur = update_list->next; 551 u64 ref_objectid; 552 u64 ref_root = extent_root->root_key.objectid; 553 554 op = list_entry(cur, struct pending_extent_op, list); 555 556 search: 557 key.objectid = op->bytenr; 558 key.type = BTRFS_EXTENT_REF_KEY; 559 key.offset = op->orig_parent; 560 561 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); 562 BUG_ON(ret); 563 564 leaf = path->nodes[0]; 565 566 loop: 567 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 568 569 ref_objectid = btrfs_ref_objectid(leaf, ref); 570 571 if (btrfs_ref_root(leaf, ref) != ref_root || 572 btrfs_ref_generation(leaf, ref) != op->orig_generation || 573 (ref_objectid != op->level && 574 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 575 printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " 576 "root %llu, owner %u\n", 577 (unsigned long long)op->bytenr, 578 (unsigned long long)op->orig_parent, 579 (unsigned long long)ref_root, op->level); 580 btrfs_print_leaf(extent_root, leaf); 581 BUG(); 582 } 583 584 key.objectid = op->bytenr; 585 key.offset = op->parent; 586 key.type = BTRFS_EXTENT_REF_KEY; 587 ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); 588 BUG_ON(ret); 589 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 590 btrfs_set_ref_generation(leaf, ref, op->generation); 591 592 cur = cur->next; 593 594 list_del_init(&op->list); 595 unlock_extent(&info->extent_ins, op->bytenr, 596 op->bytenr + op->num_bytes - 1, GFP_NOFS); 597 kfree(op); 598 599 if (cur == update_list) { 600 btrfs_mark_buffer_dirty(path->nodes[0]); 601 btrfs_release_path(extent_root, path); 602 goto out; 603 } 604 605 op = list_entry(cur, struct pending_extent_op, list); 606 607 path->slots[0]++; 608 while (path->slots[0] < btrfs_header_nritems(leaf)) { 609 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 610 if (key.objectid == op->bytenr && 611 key.type == BTRFS_EXTENT_REF_KEY) 612 goto loop; 613 path->slots[0]++; 614 } 615 616 btrfs_mark_buffer_dirty(path->nodes[0]); 617 btrfs_release_path(extent_root, path); 618 goto search; 619 620 out: 621 return 0; 622 } 623 624 static noinline int insert_extents(struct btrfs_trans_handle *trans, 625 struct btrfs_root *extent_root, 626 struct btrfs_path *path, 627 struct list_head *insert_list, int nr) 628 { 629 struct btrfs_key *keys; 630 u32 *data_size; 631 struct pending_extent_op *op; 632 struct extent_buffer *leaf; 633 struct list_head *cur = insert_list->next; 634 struct btrfs_fs_info *info = extent_root->fs_info; 635 u64 ref_root = extent_root->root_key.objectid; 636 int i = 0, last = 0, ret; 637 int total = nr * 2; 638 639 if (!nr) 640 return 0; 641 642 keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); 643 if (!keys) 644 return -ENOMEM; 645 646 data_size = kzalloc(total * sizeof(u32), GFP_NOFS); 647 if (!data_size) { 648 kfree(keys); 649 return -ENOMEM; 650 } 651 652 list_for_each_entry(op, insert_list, list) { 653 keys[i].objectid = op->bytenr; 654 keys[i].offset = op->num_bytes; 655 keys[i].type = BTRFS_EXTENT_ITEM_KEY; 656 data_size[i] = sizeof(struct btrfs_extent_item); 657 i++; 658 659 keys[i].objectid = op->bytenr; 660 keys[i].offset = op->parent; 661 keys[i].type = BTRFS_EXTENT_REF_KEY; 662 data_size[i] = sizeof(struct btrfs_extent_ref); 663 i++; 664 } 665 666 op = list_entry(cur, struct pending_extent_op, list); 667 i = 0; 668 while (i < total) { 669 int c; 670 ret = btrfs_insert_some_items(trans, extent_root, path, 671 keys+i, data_size+i, total-i); 672 BUG_ON(ret < 0); 673 674 if (last && ret > 1) 675 BUG(); 676 677 leaf = path->nodes[0]; 678 for (c = 0; c < ret; c++) { 679 int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; 680 681 /* 682 * if the first item we inserted was a backref, then 683 * the EXTENT_ITEM will be the odd c's, else it will 684 * be the even c's 685 */ 686 if ((ref_first && (c % 2)) || 687 (!ref_first && !(c % 2))) { 688 struct btrfs_extent_item *itm; 689 690 itm = btrfs_item_ptr(leaf, path->slots[0] + c, 691 struct btrfs_extent_item); 692 btrfs_set_extent_refs(path->nodes[0], itm, 1); 693 op->del++; 694 } else { 695 struct btrfs_extent_ref *ref; 696 697 ref = btrfs_item_ptr(leaf, path->slots[0] + c, 698 struct btrfs_extent_ref); 699 btrfs_set_ref_root(leaf, ref, ref_root); 700 btrfs_set_ref_generation(leaf, ref, 701 op->generation); 702 btrfs_set_ref_objectid(leaf, ref, op->level); 703 btrfs_set_ref_num_refs(leaf, ref, 1); 704 op->del++; 705 } 706 707 /* 708 * using del to see when its ok to free up the 709 * pending_extent_op. In the case where we insert the 710 * last item on the list in order to help do batching 711 * we need to not free the extent op until we actually 712 * insert the extent_item 713 */ 714 if (op->del == 2) { 715 unlock_extent(&info->extent_ins, op->bytenr, 716 op->bytenr + op->num_bytes - 1, 717 GFP_NOFS); 718 cur = cur->next; 719 list_del_init(&op->list); 720 kfree(op); 721 if (cur != insert_list) 722 op = list_entry(cur, 723 struct pending_extent_op, 724 list); 725 } 726 } 727 btrfs_mark_buffer_dirty(leaf); 728 btrfs_release_path(extent_root, path); 729 730 /* 731 * Ok backref's and items usually go right next to eachother, 732 * but if we could only insert 1 item that means that we 733 * inserted on the end of a leaf, and we have no idea what may 734 * be on the next leaf so we just play it safe. In order to 735 * try and help this case we insert the last thing on our 736 * insert list so hopefully it will end up being the last 737 * thing on the leaf and everything else will be before it, 738 * which will let us insert a whole bunch of items at the same 739 * time. 740 */ 741 if (ret == 1 && !last && (i + ret < total)) { 742 /* 743 * last: where we will pick up the next time around 744 * i: our current key to insert, will be total - 1 745 * cur: the current op we are screwing with 746 * op: duh 747 */ 748 last = i + ret; 749 i = total - 1; 750 cur = insert_list->prev; 751 op = list_entry(cur, struct pending_extent_op, list); 752 } else if (last) { 753 /* 754 * ok we successfully inserted the last item on the 755 * list, lets reset everything 756 * 757 * i: our current key to insert, so where we left off 758 * last time 759 * last: done with this 760 * cur: the op we are messing with 761 * op: duh 762 * total: since we inserted the last key, we need to 763 * decrement total so we dont overflow 764 */ 765 i = last; 766 last = 0; 767 total--; 768 if (i < total) { 769 cur = insert_list->next; 770 op = list_entry(cur, struct pending_extent_op, 771 list); 772 } 773 } else { 774 i += ret; 775 } 776 777 cond_resched(); 778 } 779 ret = 0; 780 kfree(keys); 781 kfree(data_size); 782 return ret; 783 } 784 785 static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 786 struct btrfs_root *root, 787 struct btrfs_path *path, 788 u64 bytenr, u64 parent, 789 u64 ref_root, u64 ref_generation, 790 u64 owner_objectid) 791 { 792 struct btrfs_key key; 793 struct extent_buffer *leaf; 794 struct btrfs_extent_ref *ref; 795 u32 num_refs; 796 int ret; 797 798 key.objectid = bytenr; 799 key.type = BTRFS_EXTENT_REF_KEY; 800 key.offset = parent; 801 802 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); 803 if (ret == 0) { 804 leaf = path->nodes[0]; 805 ref = btrfs_item_ptr(leaf, path->slots[0], 806 struct btrfs_extent_ref); 807 btrfs_set_ref_root(leaf, ref, ref_root); 808 btrfs_set_ref_generation(leaf, ref, ref_generation); 809 btrfs_set_ref_objectid(leaf, ref, owner_objectid); 810 btrfs_set_ref_num_refs(leaf, ref, 1); 811 } else if (ret == -EEXIST) { 812 u64 existing_owner; 813 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); 814 leaf = path->nodes[0]; 815 ref = btrfs_item_ptr(leaf, path->slots[0], 816 struct btrfs_extent_ref); 817 if (btrfs_ref_root(leaf, ref) != ref_root || 818 btrfs_ref_generation(leaf, ref) != ref_generation) { 819 ret = -EIO; 820 WARN_ON(1); 821 goto out; 822 } 823 824 num_refs = btrfs_ref_num_refs(leaf, ref); 825 BUG_ON(num_refs == 0); 826 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); 827 828 existing_owner = btrfs_ref_objectid(leaf, ref); 829 if (existing_owner != owner_objectid && 830 existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { 831 btrfs_set_ref_objectid(leaf, ref, 832 BTRFS_MULTIPLE_OBJECTIDS); 833 } 834 ret = 0; 835 } else { 836 goto out; 837 } 838 btrfs_mark_buffer_dirty(path->nodes[0]); 839 out: 840 btrfs_release_path(root, path); 841 return ret; 842 } 843 844 static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 845 struct btrfs_root *root, 846 struct btrfs_path *path) 847 { 848 struct extent_buffer *leaf; 849 struct btrfs_extent_ref *ref; 850 u32 num_refs; 851 int ret = 0; 852 853 leaf = path->nodes[0]; 854 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 855 num_refs = btrfs_ref_num_refs(leaf, ref); 856 BUG_ON(num_refs == 0); 857 num_refs -= 1; 858 if (num_refs == 0) { 859 ret = btrfs_del_item(trans, root, path); 860 } else { 861 btrfs_set_ref_num_refs(leaf, ref, num_refs); 862 btrfs_mark_buffer_dirty(leaf); 863 } 864 btrfs_release_path(root, path); 865 return ret; 866 } 867 868 #ifdef BIO_RW_DISCARD 869 static void btrfs_issue_discard(struct block_device *bdev, 870 u64 start, u64 len) 871 { 872 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 873 } 874 #endif 875 876 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 877 u64 num_bytes) 878 { 879 #ifdef BIO_RW_DISCARD 880 int ret; 881 u64 map_length = num_bytes; 882 struct btrfs_multi_bio *multi = NULL; 883 884 /* Tell the block device(s) that the sectors can be discarded */ 885 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 886 bytenr, &map_length, &multi, 0); 887 if (!ret) { 888 struct btrfs_bio_stripe *stripe = multi->stripes; 889 int i; 890 891 if (map_length > num_bytes) 892 map_length = num_bytes; 893 894 for (i = 0; i < multi->num_stripes; i++, stripe++) { 895 btrfs_issue_discard(stripe->dev->bdev, 896 stripe->physical, 897 map_length); 898 } 899 kfree(multi); 900 } 901 902 return ret; 903 #else 904 return 0; 905 #endif 906 } 907 908 static noinline int free_extents(struct btrfs_trans_handle *trans, 909 struct btrfs_root *extent_root, 910 struct list_head *del_list) 911 { 912 struct btrfs_fs_info *info = extent_root->fs_info; 913 struct btrfs_path *path; 914 struct btrfs_key key, found_key; 915 struct extent_buffer *leaf; 916 struct list_head *cur; 917 struct pending_extent_op *op; 918 struct btrfs_extent_item *ei; 919 int ret, num_to_del, extent_slot = 0, found_extent = 0; 920 u32 refs; 921 u64 bytes_freed = 0; 922 923 path = btrfs_alloc_path(); 924 if (!path) 925 return -ENOMEM; 926 path->reada = 1; 927 928 search: 929 /* search for the backref for the current ref we want to delete */ 930 cur = del_list->next; 931 op = list_entry(cur, struct pending_extent_op, list); 932 ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, 933 op->orig_parent, 934 extent_root->root_key.objectid, 935 op->orig_generation, op->level, 1); 936 if (ret) { 937 printk(KERN_ERR "btrfs unable to find backref byte nr %llu " 938 "root %llu gen %llu owner %u\n", 939 (unsigned long long)op->bytenr, 940 (unsigned long long)extent_root->root_key.objectid, 941 (unsigned long long)op->orig_generation, op->level); 942 btrfs_print_leaf(extent_root, path->nodes[0]); 943 WARN_ON(1); 944 goto out; 945 } 946 947 extent_slot = path->slots[0]; 948 num_to_del = 1; 949 found_extent = 0; 950 951 /* 952 * if we aren't the first item on the leaf we can move back one and see 953 * if our ref is right next to our extent item 954 */ 955 if (likely(extent_slot)) { 956 extent_slot--; 957 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 958 extent_slot); 959 if (found_key.objectid == op->bytenr && 960 found_key.type == BTRFS_EXTENT_ITEM_KEY && 961 found_key.offset == op->num_bytes) { 962 num_to_del++; 963 found_extent = 1; 964 } 965 } 966 967 /* 968 * if we didn't find the extent we need to delete the backref and then 969 * search for the extent item key so we can update its ref count 970 */ 971 if (!found_extent) { 972 key.objectid = op->bytenr; 973 key.type = BTRFS_EXTENT_ITEM_KEY; 974 key.offset = op->num_bytes; 975 976 ret = remove_extent_backref(trans, extent_root, path); 977 BUG_ON(ret); 978 btrfs_release_path(extent_root, path); 979 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); 980 BUG_ON(ret); 981 extent_slot = path->slots[0]; 982 } 983 984 /* this is where we update the ref count for the extent */ 985 leaf = path->nodes[0]; 986 ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); 987 refs = btrfs_extent_refs(leaf, ei); 988 BUG_ON(refs == 0); 989 refs--; 990 btrfs_set_extent_refs(leaf, ei, refs); 991 992 btrfs_mark_buffer_dirty(leaf); 993 994 /* 995 * This extent needs deleting. The reason cur_slot is extent_slot + 996 * num_to_del is because extent_slot points to the slot where the extent 997 * is, and if the backref was not right next to the extent we will be 998 * deleting at least 1 item, and will want to start searching at the 999 * slot directly next to extent_slot. However if we did find the 1000 * backref next to the extent item them we will be deleting at least 2 1001 * items and will want to start searching directly after the ref slot 1002 */ 1003 if (!refs) { 1004 struct list_head *pos, *n, *end; 1005 int cur_slot = extent_slot+num_to_del; 1006 u64 super_used; 1007 u64 root_used; 1008 1009 path->slots[0] = extent_slot; 1010 bytes_freed = op->num_bytes; 1011 1012 mutex_lock(&info->pinned_mutex); 1013 ret = pin_down_bytes(trans, extent_root, op->bytenr, 1014 op->num_bytes, op->level >= 1015 BTRFS_FIRST_FREE_OBJECTID); 1016 mutex_unlock(&info->pinned_mutex); 1017 BUG_ON(ret < 0); 1018 op->del = ret; 1019 1020 /* 1021 * we need to see if we can delete multiple things at once, so 1022 * start looping through the list of extents we are wanting to 1023 * delete and see if their extent/backref's are right next to 1024 * eachother and the extents only have 1 ref 1025 */ 1026 for (pos = cur->next; pos != del_list; pos = pos->next) { 1027 struct pending_extent_op *tmp; 1028 1029 tmp = list_entry(pos, struct pending_extent_op, list); 1030 1031 /* we only want to delete extent+ref at this stage */ 1032 if (cur_slot >= btrfs_header_nritems(leaf) - 1) 1033 break; 1034 1035 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); 1036 if (found_key.objectid != tmp->bytenr || 1037 found_key.type != BTRFS_EXTENT_ITEM_KEY || 1038 found_key.offset != tmp->num_bytes) 1039 break; 1040 1041 /* check to make sure this extent only has one ref */ 1042 ei = btrfs_item_ptr(leaf, cur_slot, 1043 struct btrfs_extent_item); 1044 if (btrfs_extent_refs(leaf, ei) != 1) 1045 break; 1046 1047 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); 1048 if (found_key.objectid != tmp->bytenr || 1049 found_key.type != BTRFS_EXTENT_REF_KEY || 1050 found_key.offset != tmp->orig_parent) 1051 break; 1052 1053 /* 1054 * the ref is right next to the extent, we can set the 1055 * ref count to 0 since we will delete them both now 1056 */ 1057 btrfs_set_extent_refs(leaf, ei, 0); 1058 1059 /* pin down the bytes for this extent */ 1060 mutex_lock(&info->pinned_mutex); 1061 ret = pin_down_bytes(trans, extent_root, tmp->bytenr, 1062 tmp->num_bytes, tmp->level >= 1063 BTRFS_FIRST_FREE_OBJECTID); 1064 mutex_unlock(&info->pinned_mutex); 1065 BUG_ON(ret < 0); 1066 1067 /* 1068 * use the del field to tell if we need to go ahead and 1069 * free up the extent when we delete the item or not. 1070 */ 1071 tmp->del = ret; 1072 bytes_freed += tmp->num_bytes; 1073 1074 num_to_del += 2; 1075 cur_slot += 2; 1076 } 1077 end = pos; 1078 1079 /* update the free space counters */ 1080 spin_lock(&info->delalloc_lock); 1081 super_used = btrfs_super_bytes_used(&info->super_copy); 1082 btrfs_set_super_bytes_used(&info->super_copy, 1083 super_used - bytes_freed); 1084 1085 root_used = btrfs_root_used(&extent_root->root_item); 1086 btrfs_set_root_used(&extent_root->root_item, 1087 root_used - bytes_freed); 1088 spin_unlock(&info->delalloc_lock); 1089 1090 /* delete the items */ 1091 ret = btrfs_del_items(trans, extent_root, path, 1092 path->slots[0], num_to_del); 1093 BUG_ON(ret); 1094 1095 /* 1096 * loop through the extents we deleted and do the cleanup work 1097 * on them 1098 */ 1099 for (pos = cur, n = pos->next; pos != end; 1100 pos = n, n = pos->next) { 1101 struct pending_extent_op *tmp; 1102 tmp = list_entry(pos, struct pending_extent_op, list); 1103 1104 /* 1105 * remember tmp->del tells us wether or not we pinned 1106 * down the extent 1107 */ 1108 ret = update_block_group(trans, extent_root, 1109 tmp->bytenr, tmp->num_bytes, 0, 1110 tmp->del); 1111 BUG_ON(ret); 1112 1113 list_del_init(&tmp->list); 1114 unlock_extent(&info->extent_ins, tmp->bytenr, 1115 tmp->bytenr + tmp->num_bytes - 1, 1116 GFP_NOFS); 1117 kfree(tmp); 1118 } 1119 } else if (refs && found_extent) { 1120 /* 1121 * the ref and extent were right next to eachother, but the 1122 * extent still has a ref, so just free the backref and keep 1123 * going 1124 */ 1125 ret = remove_extent_backref(trans, extent_root, path); 1126 BUG_ON(ret); 1127 1128 list_del_init(&op->list); 1129 unlock_extent(&info->extent_ins, op->bytenr, 1130 op->bytenr + op->num_bytes - 1, GFP_NOFS); 1131 kfree(op); 1132 } else { 1133 /* 1134 * the extent has multiple refs and the backref we were looking 1135 * for was not right next to it, so just unlock and go next, 1136 * we're good to go 1137 */ 1138 list_del_init(&op->list); 1139 unlock_extent(&info->extent_ins, op->bytenr, 1140 op->bytenr + op->num_bytes - 1, GFP_NOFS); 1141 kfree(op); 1142 } 1143 1144 btrfs_release_path(extent_root, path); 1145 if (!list_empty(del_list)) 1146 goto search; 1147 1148 out: 1149 btrfs_free_path(path); 1150 return ret; 1151 } 1152 1153 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1154 struct btrfs_root *root, u64 bytenr, 1155 u64 orig_parent, u64 parent, 1156 u64 orig_root, u64 ref_root, 1157 u64 orig_generation, u64 ref_generation, 1158 u64 owner_objectid) 1159 { 1160 int ret; 1161 struct btrfs_root *extent_root = root->fs_info->extent_root; 1162 struct btrfs_path *path; 1163 1164 if (root == root->fs_info->extent_root) { 1165 struct pending_extent_op *extent_op; 1166 u64 num_bytes; 1167 1168 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); 1169 num_bytes = btrfs_level_size(root, (int)owner_objectid); 1170 mutex_lock(&root->fs_info->extent_ins_mutex); 1171 if (test_range_bit(&root->fs_info->extent_ins, bytenr, 1172 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { 1173 u64 priv; 1174 ret = get_state_private(&root->fs_info->extent_ins, 1175 bytenr, &priv); 1176 BUG_ON(ret); 1177 extent_op = (struct pending_extent_op *) 1178 (unsigned long)priv; 1179 BUG_ON(extent_op->parent != orig_parent); 1180 BUG_ON(extent_op->generation != orig_generation); 1181 1182 extent_op->parent = parent; 1183 extent_op->generation = ref_generation; 1184 } else { 1185 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 1186 BUG_ON(!extent_op); 1187 1188 extent_op->type = PENDING_BACKREF_UPDATE; 1189 extent_op->bytenr = bytenr; 1190 extent_op->num_bytes = num_bytes; 1191 extent_op->parent = parent; 1192 extent_op->orig_parent = orig_parent; 1193 extent_op->generation = ref_generation; 1194 extent_op->orig_generation = orig_generation; 1195 extent_op->level = (int)owner_objectid; 1196 INIT_LIST_HEAD(&extent_op->list); 1197 extent_op->del = 0; 1198 1199 set_extent_bits(&root->fs_info->extent_ins, 1200 bytenr, bytenr + num_bytes - 1, 1201 EXTENT_WRITEBACK, GFP_NOFS); 1202 set_state_private(&root->fs_info->extent_ins, 1203 bytenr, (unsigned long)extent_op); 1204 } 1205 mutex_unlock(&root->fs_info->extent_ins_mutex); 1206 return 0; 1207 } 1208 1209 path = btrfs_alloc_path(); 1210 if (!path) 1211 return -ENOMEM; 1212 ret = lookup_extent_backref(trans, extent_root, path, 1213 bytenr, orig_parent, orig_root, 1214 orig_generation, owner_objectid, 1); 1215 if (ret) 1216 goto out; 1217 ret = remove_extent_backref(trans, extent_root, path); 1218 if (ret) 1219 goto out; 1220 ret = insert_extent_backref(trans, extent_root, path, bytenr, 1221 parent, ref_root, ref_generation, 1222 owner_objectid); 1223 BUG_ON(ret); 1224 finish_current_insert(trans, extent_root, 0); 1225 del_pending_extents(trans, extent_root, 0); 1226 out: 1227 btrfs_free_path(path); 1228 return ret; 1229 } 1230 1231 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1232 struct btrfs_root *root, u64 bytenr, 1233 u64 orig_parent, u64 parent, 1234 u64 ref_root, u64 ref_generation, 1235 u64 owner_objectid) 1236 { 1237 int ret; 1238 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1239 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1240 return 0; 1241 ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, 1242 parent, ref_root, ref_root, 1243 ref_generation, ref_generation, 1244 owner_objectid); 1245 return ret; 1246 } 1247 1248 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1249 struct btrfs_root *root, u64 bytenr, 1250 u64 orig_parent, u64 parent, 1251 u64 orig_root, u64 ref_root, 1252 u64 orig_generation, u64 ref_generation, 1253 u64 owner_objectid) 1254 { 1255 struct btrfs_path *path; 1256 int ret; 1257 struct btrfs_key key; 1258 struct extent_buffer *l; 1259 struct btrfs_extent_item *item; 1260 u32 refs; 1261 1262 path = btrfs_alloc_path(); 1263 if (!path) 1264 return -ENOMEM; 1265 1266 path->reada = 1; 1267 key.objectid = bytenr; 1268 key.type = BTRFS_EXTENT_ITEM_KEY; 1269 key.offset = (u64)-1; 1270 1271 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 1272 0, 1); 1273 if (ret < 0) 1274 return ret; 1275 BUG_ON(ret == 0 || path->slots[0] == 0); 1276 1277 path->slots[0]--; 1278 l = path->nodes[0]; 1279 1280 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 1281 if (key.objectid != bytenr) { 1282 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); 1283 printk(KERN_ERR "btrfs wanted %llu found %llu\n", 1284 (unsigned long long)bytenr, 1285 (unsigned long long)key.objectid); 1286 BUG(); 1287 } 1288 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); 1289 1290 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 1291 refs = btrfs_extent_refs(l, item); 1292 btrfs_set_extent_refs(l, item, refs + 1); 1293 btrfs_mark_buffer_dirty(path->nodes[0]); 1294 1295 btrfs_release_path(root->fs_info->extent_root, path); 1296 1297 path->reada = 1; 1298 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1299 path, bytenr, parent, 1300 ref_root, ref_generation, 1301 owner_objectid); 1302 BUG_ON(ret); 1303 finish_current_insert(trans, root->fs_info->extent_root, 0); 1304 del_pending_extents(trans, root->fs_info->extent_root, 0); 1305 1306 btrfs_free_path(path); 1307 return 0; 1308 } 1309 1310 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1311 struct btrfs_root *root, 1312 u64 bytenr, u64 num_bytes, u64 parent, 1313 u64 ref_root, u64 ref_generation, 1314 u64 owner_objectid) 1315 { 1316 int ret; 1317 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1318 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1319 return 0; 1320 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 1321 0, ref_root, 0, ref_generation, 1322 owner_objectid); 1323 return ret; 1324 } 1325 1326 int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 1327 struct btrfs_root *root) 1328 { 1329 finish_current_insert(trans, root->fs_info->extent_root, 1); 1330 del_pending_extents(trans, root->fs_info->extent_root, 1); 1331 return 0; 1332 } 1333 1334 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 1335 struct btrfs_root *root, u64 bytenr, 1336 u64 num_bytes, u32 *refs) 1337 { 1338 struct btrfs_path *path; 1339 int ret; 1340 struct btrfs_key key; 1341 struct extent_buffer *l; 1342 struct btrfs_extent_item *item; 1343 1344 WARN_ON(num_bytes < root->sectorsize); 1345 path = btrfs_alloc_path(); 1346 path->reada = 1; 1347 key.objectid = bytenr; 1348 key.offset = num_bytes; 1349 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 1350 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 1351 0, 0); 1352 if (ret < 0) 1353 goto out; 1354 if (ret != 0) { 1355 btrfs_print_leaf(root, path->nodes[0]); 1356 printk(KERN_INFO "btrfs failed to find block number %llu\n", 1357 (unsigned long long)bytenr); 1358 BUG(); 1359 } 1360 l = path->nodes[0]; 1361 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 1362 *refs = btrfs_extent_refs(l, item); 1363 out: 1364 btrfs_free_path(path); 1365 return 0; 1366 } 1367 1368 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1369 struct btrfs_root *root, u64 objectid, u64 bytenr) 1370 { 1371 struct btrfs_root *extent_root = root->fs_info->extent_root; 1372 struct btrfs_path *path; 1373 struct extent_buffer *leaf; 1374 struct btrfs_extent_ref *ref_item; 1375 struct btrfs_key key; 1376 struct btrfs_key found_key; 1377 u64 ref_root; 1378 u64 last_snapshot; 1379 u32 nritems; 1380 int ret; 1381 1382 key.objectid = bytenr; 1383 key.offset = (u64)-1; 1384 key.type = BTRFS_EXTENT_ITEM_KEY; 1385 1386 path = btrfs_alloc_path(); 1387 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 1388 if (ret < 0) 1389 goto out; 1390 BUG_ON(ret == 0); 1391 1392 ret = -ENOENT; 1393 if (path->slots[0] == 0) 1394 goto out; 1395 1396 path->slots[0]--; 1397 leaf = path->nodes[0]; 1398 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1399 1400 if (found_key.objectid != bytenr || 1401 found_key.type != BTRFS_EXTENT_ITEM_KEY) 1402 goto out; 1403 1404 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1405 while (1) { 1406 leaf = path->nodes[0]; 1407 nritems = btrfs_header_nritems(leaf); 1408 if (path->slots[0] >= nritems) { 1409 ret = btrfs_next_leaf(extent_root, path); 1410 if (ret < 0) 1411 goto out; 1412 if (ret == 0) 1413 continue; 1414 break; 1415 } 1416 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1417 if (found_key.objectid != bytenr) 1418 break; 1419 1420 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 1421 path->slots[0]++; 1422 continue; 1423 } 1424 1425 ref_item = btrfs_item_ptr(leaf, path->slots[0], 1426 struct btrfs_extent_ref); 1427 ref_root = btrfs_ref_root(leaf, ref_item); 1428 if ((ref_root != root->root_key.objectid && 1429 ref_root != BTRFS_TREE_LOG_OBJECTID) || 1430 objectid != btrfs_ref_objectid(leaf, ref_item)) { 1431 ret = 1; 1432 goto out; 1433 } 1434 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) { 1435 ret = 1; 1436 goto out; 1437 } 1438 1439 path->slots[0]++; 1440 } 1441 ret = 0; 1442 out: 1443 btrfs_free_path(path); 1444 return ret; 1445 } 1446 1447 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1448 struct extent_buffer *buf, u32 nr_extents) 1449 { 1450 struct btrfs_key key; 1451 struct btrfs_file_extent_item *fi; 1452 u64 root_gen; 1453 u32 nritems; 1454 int i; 1455 int level; 1456 int ret = 0; 1457 int shared = 0; 1458 1459 if (!root->ref_cows) 1460 return 0; 1461 1462 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1463 shared = 0; 1464 root_gen = root->root_key.offset; 1465 } else { 1466 shared = 1; 1467 root_gen = trans->transid - 1; 1468 } 1469 1470 level = btrfs_header_level(buf); 1471 nritems = btrfs_header_nritems(buf); 1472 1473 if (level == 0) { 1474 struct btrfs_leaf_ref *ref; 1475 struct btrfs_extent_info *info; 1476 1477 ref = btrfs_alloc_leaf_ref(root, nr_extents); 1478 if (!ref) { 1479 ret = -ENOMEM; 1480 goto out; 1481 } 1482 1483 ref->root_gen = root_gen; 1484 ref->bytenr = buf->start; 1485 ref->owner = btrfs_header_owner(buf); 1486 ref->generation = btrfs_header_generation(buf); 1487 ref->nritems = nr_extents; 1488 info = ref->extents; 1489 1490 for (i = 0; nr_extents > 0 && i < nritems; i++) { 1491 u64 disk_bytenr; 1492 btrfs_item_key_to_cpu(buf, &key, i); 1493 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1494 continue; 1495 fi = btrfs_item_ptr(buf, i, 1496 struct btrfs_file_extent_item); 1497 if (btrfs_file_extent_type(buf, fi) == 1498 BTRFS_FILE_EXTENT_INLINE) 1499 continue; 1500 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1501 if (disk_bytenr == 0) 1502 continue; 1503 1504 info->bytenr = disk_bytenr; 1505 info->num_bytes = 1506 btrfs_file_extent_disk_num_bytes(buf, fi); 1507 info->objectid = key.objectid; 1508 info->offset = key.offset; 1509 info++; 1510 } 1511 1512 ret = btrfs_add_leaf_ref(root, ref, shared); 1513 if (ret == -EEXIST && shared) { 1514 struct btrfs_leaf_ref *old; 1515 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 1516 BUG_ON(!old); 1517 btrfs_remove_leaf_ref(root, old); 1518 btrfs_free_leaf_ref(root, old); 1519 ret = btrfs_add_leaf_ref(root, ref, shared); 1520 } 1521 WARN_ON(ret); 1522 btrfs_free_leaf_ref(root, ref); 1523 } 1524 out: 1525 return ret; 1526 } 1527 1528 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1529 struct extent_buffer *orig_buf, struct extent_buffer *buf, 1530 u32 *nr_extents) 1531 { 1532 u64 bytenr; 1533 u64 ref_root; 1534 u64 orig_root; 1535 u64 ref_generation; 1536 u64 orig_generation; 1537 u32 nritems; 1538 u32 nr_file_extents = 0; 1539 struct btrfs_key key; 1540 struct btrfs_file_extent_item *fi; 1541 int i; 1542 int level; 1543 int ret = 0; 1544 int faili = 0; 1545 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1546 u64, u64, u64, u64, u64, u64, u64, u64); 1547 1548 ref_root = btrfs_header_owner(buf); 1549 ref_generation = btrfs_header_generation(buf); 1550 orig_root = btrfs_header_owner(orig_buf); 1551 orig_generation = btrfs_header_generation(orig_buf); 1552 1553 nritems = btrfs_header_nritems(buf); 1554 level = btrfs_header_level(buf); 1555 1556 if (root->ref_cows) { 1557 process_func = __btrfs_inc_extent_ref; 1558 } else { 1559 if (level == 0 && 1560 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 1561 goto out; 1562 if (level != 0 && 1563 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 1564 goto out; 1565 process_func = __btrfs_update_extent_ref; 1566 } 1567 1568 for (i = 0; i < nritems; i++) { 1569 cond_resched(); 1570 if (level == 0) { 1571 btrfs_item_key_to_cpu(buf, &key, i); 1572 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1573 continue; 1574 fi = btrfs_item_ptr(buf, i, 1575 struct btrfs_file_extent_item); 1576 if (btrfs_file_extent_type(buf, fi) == 1577 BTRFS_FILE_EXTENT_INLINE) 1578 continue; 1579 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1580 if (bytenr == 0) 1581 continue; 1582 1583 nr_file_extents++; 1584 1585 ret = process_func(trans, root, bytenr, 1586 orig_buf->start, buf->start, 1587 orig_root, ref_root, 1588 orig_generation, ref_generation, 1589 key.objectid); 1590 1591 if (ret) { 1592 faili = i; 1593 WARN_ON(1); 1594 goto fail; 1595 } 1596 } else { 1597 bytenr = btrfs_node_blockptr(buf, i); 1598 ret = process_func(trans, root, bytenr, 1599 orig_buf->start, buf->start, 1600 orig_root, ref_root, 1601 orig_generation, ref_generation, 1602 level - 1); 1603 if (ret) { 1604 faili = i; 1605 WARN_ON(1); 1606 goto fail; 1607 } 1608 } 1609 } 1610 out: 1611 if (nr_extents) { 1612 if (level == 0) 1613 *nr_extents = nr_file_extents; 1614 else 1615 *nr_extents = nritems; 1616 } 1617 return 0; 1618 fail: 1619 WARN_ON(1); 1620 return ret; 1621 } 1622 1623 int btrfs_update_ref(struct btrfs_trans_handle *trans, 1624 struct btrfs_root *root, struct extent_buffer *orig_buf, 1625 struct extent_buffer *buf, int start_slot, int nr) 1626 1627 { 1628 u64 bytenr; 1629 u64 ref_root; 1630 u64 orig_root; 1631 u64 ref_generation; 1632 u64 orig_generation; 1633 struct btrfs_key key; 1634 struct btrfs_file_extent_item *fi; 1635 int i; 1636 int ret; 1637 int slot; 1638 int level; 1639 1640 BUG_ON(start_slot < 0); 1641 BUG_ON(start_slot + nr > btrfs_header_nritems(buf)); 1642 1643 ref_root = btrfs_header_owner(buf); 1644 ref_generation = btrfs_header_generation(buf); 1645 orig_root = btrfs_header_owner(orig_buf); 1646 orig_generation = btrfs_header_generation(orig_buf); 1647 level = btrfs_header_level(buf); 1648 1649 if (!root->ref_cows) { 1650 if (level == 0 && 1651 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 1652 return 0; 1653 if (level != 0 && 1654 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 1655 return 0; 1656 } 1657 1658 for (i = 0, slot = start_slot; i < nr; i++, slot++) { 1659 cond_resched(); 1660 if (level == 0) { 1661 btrfs_item_key_to_cpu(buf, &key, slot); 1662 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1663 continue; 1664 fi = btrfs_item_ptr(buf, slot, 1665 struct btrfs_file_extent_item); 1666 if (btrfs_file_extent_type(buf, fi) == 1667 BTRFS_FILE_EXTENT_INLINE) 1668 continue; 1669 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1670 if (bytenr == 0) 1671 continue; 1672 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1673 orig_buf->start, buf->start, 1674 orig_root, ref_root, 1675 orig_generation, ref_generation, 1676 key.objectid); 1677 if (ret) 1678 goto fail; 1679 } else { 1680 bytenr = btrfs_node_blockptr(buf, slot); 1681 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1682 orig_buf->start, buf->start, 1683 orig_root, ref_root, 1684 orig_generation, ref_generation, 1685 level - 1); 1686 if (ret) 1687 goto fail; 1688 } 1689 } 1690 return 0; 1691 fail: 1692 WARN_ON(1); 1693 return -1; 1694 } 1695 1696 static int write_one_cache_group(struct btrfs_trans_handle *trans, 1697 struct btrfs_root *root, 1698 struct btrfs_path *path, 1699 struct btrfs_block_group_cache *cache) 1700 { 1701 int ret; 1702 int pending_ret; 1703 struct btrfs_root *extent_root = root->fs_info->extent_root; 1704 unsigned long bi; 1705 struct extent_buffer *leaf; 1706 1707 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 1708 if (ret < 0) 1709 goto fail; 1710 BUG_ON(ret); 1711 1712 leaf = path->nodes[0]; 1713 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 1714 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 1715 btrfs_mark_buffer_dirty(leaf); 1716 btrfs_release_path(extent_root, path); 1717 fail: 1718 finish_current_insert(trans, extent_root, 0); 1719 pending_ret = del_pending_extents(trans, extent_root, 0); 1720 if (ret) 1721 return ret; 1722 if (pending_ret) 1723 return pending_ret; 1724 return 0; 1725 1726 } 1727 1728 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 1729 struct btrfs_root *root) 1730 { 1731 struct btrfs_block_group_cache *cache, *entry; 1732 struct rb_node *n; 1733 int err = 0; 1734 int werr = 0; 1735 struct btrfs_path *path; 1736 u64 last = 0; 1737 1738 path = btrfs_alloc_path(); 1739 if (!path) 1740 return -ENOMEM; 1741 1742 while (1) { 1743 cache = NULL; 1744 spin_lock(&root->fs_info->block_group_cache_lock); 1745 for (n = rb_first(&root->fs_info->block_group_cache_tree); 1746 n; n = rb_next(n)) { 1747 entry = rb_entry(n, struct btrfs_block_group_cache, 1748 cache_node); 1749 if (entry->dirty) { 1750 cache = entry; 1751 break; 1752 } 1753 } 1754 spin_unlock(&root->fs_info->block_group_cache_lock); 1755 1756 if (!cache) 1757 break; 1758 1759 cache->dirty = 0; 1760 last += cache->key.offset; 1761 1762 err = write_one_cache_group(trans, root, 1763 path, cache); 1764 /* 1765 * if we fail to write the cache group, we want 1766 * to keep it marked dirty in hopes that a later 1767 * write will work 1768 */ 1769 if (err) { 1770 werr = err; 1771 continue; 1772 } 1773 } 1774 btrfs_free_path(path); 1775 return werr; 1776 } 1777 1778 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 1779 { 1780 struct btrfs_block_group_cache *block_group; 1781 int readonly = 0; 1782 1783 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 1784 if (!block_group || block_group->ro) 1785 readonly = 1; 1786 if (block_group) 1787 put_block_group(block_group); 1788 return readonly; 1789 } 1790 1791 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 1792 u64 total_bytes, u64 bytes_used, 1793 struct btrfs_space_info **space_info) 1794 { 1795 struct btrfs_space_info *found; 1796 1797 found = __find_space_info(info, flags); 1798 if (found) { 1799 spin_lock(&found->lock); 1800 found->total_bytes += total_bytes; 1801 found->bytes_used += bytes_used; 1802 found->full = 0; 1803 spin_unlock(&found->lock); 1804 *space_info = found; 1805 return 0; 1806 } 1807 found = kzalloc(sizeof(*found), GFP_NOFS); 1808 if (!found) 1809 return -ENOMEM; 1810 1811 list_add(&found->list, &info->space_info); 1812 INIT_LIST_HEAD(&found->block_groups); 1813 init_rwsem(&found->groups_sem); 1814 spin_lock_init(&found->lock); 1815 found->flags = flags; 1816 found->total_bytes = total_bytes; 1817 found->bytes_used = bytes_used; 1818 found->bytes_pinned = 0; 1819 found->bytes_reserved = 0; 1820 found->bytes_readonly = 0; 1821 found->full = 0; 1822 found->force_alloc = 0; 1823 *space_info = found; 1824 return 0; 1825 } 1826 1827 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 1828 { 1829 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 1830 BTRFS_BLOCK_GROUP_RAID1 | 1831 BTRFS_BLOCK_GROUP_RAID10 | 1832 BTRFS_BLOCK_GROUP_DUP); 1833 if (extra_flags) { 1834 if (flags & BTRFS_BLOCK_GROUP_DATA) 1835 fs_info->avail_data_alloc_bits |= extra_flags; 1836 if (flags & BTRFS_BLOCK_GROUP_METADATA) 1837 fs_info->avail_metadata_alloc_bits |= extra_flags; 1838 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 1839 fs_info->avail_system_alloc_bits |= extra_flags; 1840 } 1841 } 1842 1843 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 1844 { 1845 spin_lock(&cache->space_info->lock); 1846 spin_lock(&cache->lock); 1847 if (!cache->ro) { 1848 cache->space_info->bytes_readonly += cache->key.offset - 1849 btrfs_block_group_used(&cache->item); 1850 cache->ro = 1; 1851 } 1852 spin_unlock(&cache->lock); 1853 spin_unlock(&cache->space_info->lock); 1854 } 1855 1856 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 1857 { 1858 u64 num_devices = root->fs_info->fs_devices->rw_devices; 1859 1860 if (num_devices == 1) 1861 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 1862 if (num_devices < 4) 1863 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 1864 1865 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 1866 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 1867 BTRFS_BLOCK_GROUP_RAID10))) { 1868 flags &= ~BTRFS_BLOCK_GROUP_DUP; 1869 } 1870 1871 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 1872 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 1873 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 1874 } 1875 1876 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 1877 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 1878 (flags & BTRFS_BLOCK_GROUP_RAID10) | 1879 (flags & BTRFS_BLOCK_GROUP_DUP))) 1880 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 1881 return flags; 1882 } 1883 1884 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 1885 struct btrfs_root *extent_root, u64 alloc_bytes, 1886 u64 flags, int force) 1887 { 1888 struct btrfs_space_info *space_info; 1889 u64 thresh; 1890 int ret = 0; 1891 1892 mutex_lock(&extent_root->fs_info->chunk_mutex); 1893 1894 flags = btrfs_reduce_alloc_profile(extent_root, flags); 1895 1896 space_info = __find_space_info(extent_root->fs_info, flags); 1897 if (!space_info) { 1898 ret = update_space_info(extent_root->fs_info, flags, 1899 0, 0, &space_info); 1900 BUG_ON(ret); 1901 } 1902 BUG_ON(!space_info); 1903 1904 spin_lock(&space_info->lock); 1905 if (space_info->force_alloc) { 1906 force = 1; 1907 space_info->force_alloc = 0; 1908 } 1909 if (space_info->full) { 1910 spin_unlock(&space_info->lock); 1911 goto out; 1912 } 1913 1914 thresh = space_info->total_bytes - space_info->bytes_readonly; 1915 thresh = div_factor(thresh, 6); 1916 if (!force && 1917 (space_info->bytes_used + space_info->bytes_pinned + 1918 space_info->bytes_reserved + alloc_bytes) < thresh) { 1919 spin_unlock(&space_info->lock); 1920 goto out; 1921 } 1922 spin_unlock(&space_info->lock); 1923 1924 ret = btrfs_alloc_chunk(trans, extent_root, flags); 1925 if (ret) 1926 space_info->full = 1; 1927 out: 1928 mutex_unlock(&extent_root->fs_info->chunk_mutex); 1929 return ret; 1930 } 1931 1932 static int update_block_group(struct btrfs_trans_handle *trans, 1933 struct btrfs_root *root, 1934 u64 bytenr, u64 num_bytes, int alloc, 1935 int mark_free) 1936 { 1937 struct btrfs_block_group_cache *cache; 1938 struct btrfs_fs_info *info = root->fs_info; 1939 u64 total = num_bytes; 1940 u64 old_val; 1941 u64 byte_in_group; 1942 1943 while (total) { 1944 cache = btrfs_lookup_block_group(info, bytenr); 1945 if (!cache) 1946 return -1; 1947 byte_in_group = bytenr - cache->key.objectid; 1948 WARN_ON(byte_in_group > cache->key.offset); 1949 1950 spin_lock(&cache->space_info->lock); 1951 spin_lock(&cache->lock); 1952 cache->dirty = 1; 1953 old_val = btrfs_block_group_used(&cache->item); 1954 num_bytes = min(total, cache->key.offset - byte_in_group); 1955 if (alloc) { 1956 old_val += num_bytes; 1957 cache->space_info->bytes_used += num_bytes; 1958 if (cache->ro) 1959 cache->space_info->bytes_readonly -= num_bytes; 1960 btrfs_set_block_group_used(&cache->item, old_val); 1961 spin_unlock(&cache->lock); 1962 spin_unlock(&cache->space_info->lock); 1963 } else { 1964 old_val -= num_bytes; 1965 cache->space_info->bytes_used -= num_bytes; 1966 if (cache->ro) 1967 cache->space_info->bytes_readonly += num_bytes; 1968 btrfs_set_block_group_used(&cache->item, old_val); 1969 spin_unlock(&cache->lock); 1970 spin_unlock(&cache->space_info->lock); 1971 if (mark_free) { 1972 int ret; 1973 1974 ret = btrfs_discard_extent(root, bytenr, 1975 num_bytes); 1976 WARN_ON(ret); 1977 1978 ret = btrfs_add_free_space(cache, bytenr, 1979 num_bytes); 1980 WARN_ON(ret); 1981 } 1982 } 1983 put_block_group(cache); 1984 total -= num_bytes; 1985 bytenr += num_bytes; 1986 } 1987 return 0; 1988 } 1989 1990 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 1991 { 1992 struct btrfs_block_group_cache *cache; 1993 u64 bytenr; 1994 1995 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 1996 if (!cache) 1997 return 0; 1998 1999 bytenr = cache->key.objectid; 2000 put_block_group(cache); 2001 2002 return bytenr; 2003 } 2004 2005 int btrfs_update_pinned_extents(struct btrfs_root *root, 2006 u64 bytenr, u64 num, int pin) 2007 { 2008 u64 len; 2009 struct btrfs_block_group_cache *cache; 2010 struct btrfs_fs_info *fs_info = root->fs_info; 2011 2012 WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex)); 2013 if (pin) { 2014 set_extent_dirty(&fs_info->pinned_extents, 2015 bytenr, bytenr + num - 1, GFP_NOFS); 2016 } else { 2017 clear_extent_dirty(&fs_info->pinned_extents, 2018 bytenr, bytenr + num - 1, GFP_NOFS); 2019 } 2020 while (num > 0) { 2021 cache = btrfs_lookup_block_group(fs_info, bytenr); 2022 BUG_ON(!cache); 2023 len = min(num, cache->key.offset - 2024 (bytenr - cache->key.objectid)); 2025 if (pin) { 2026 spin_lock(&cache->space_info->lock); 2027 spin_lock(&cache->lock); 2028 cache->pinned += len; 2029 cache->space_info->bytes_pinned += len; 2030 spin_unlock(&cache->lock); 2031 spin_unlock(&cache->space_info->lock); 2032 fs_info->total_pinned += len; 2033 } else { 2034 spin_lock(&cache->space_info->lock); 2035 spin_lock(&cache->lock); 2036 cache->pinned -= len; 2037 cache->space_info->bytes_pinned -= len; 2038 spin_unlock(&cache->lock); 2039 spin_unlock(&cache->space_info->lock); 2040 fs_info->total_pinned -= len; 2041 if (cache->cached) 2042 btrfs_add_free_space(cache, bytenr, len); 2043 } 2044 put_block_group(cache); 2045 bytenr += len; 2046 num -= len; 2047 } 2048 return 0; 2049 } 2050 2051 static int update_reserved_extents(struct btrfs_root *root, 2052 u64 bytenr, u64 num, int reserve) 2053 { 2054 u64 len; 2055 struct btrfs_block_group_cache *cache; 2056 struct btrfs_fs_info *fs_info = root->fs_info; 2057 2058 while (num > 0) { 2059 cache = btrfs_lookup_block_group(fs_info, bytenr); 2060 BUG_ON(!cache); 2061 len = min(num, cache->key.offset - 2062 (bytenr - cache->key.objectid)); 2063 2064 spin_lock(&cache->space_info->lock); 2065 spin_lock(&cache->lock); 2066 if (reserve) { 2067 cache->reserved += len; 2068 cache->space_info->bytes_reserved += len; 2069 } else { 2070 cache->reserved -= len; 2071 cache->space_info->bytes_reserved -= len; 2072 } 2073 spin_unlock(&cache->lock); 2074 spin_unlock(&cache->space_info->lock); 2075 put_block_group(cache); 2076 bytenr += len; 2077 num -= len; 2078 } 2079 return 0; 2080 } 2081 2082 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 2083 { 2084 u64 last = 0; 2085 u64 start; 2086 u64 end; 2087 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 2088 int ret; 2089 2090 mutex_lock(&root->fs_info->pinned_mutex); 2091 while (1) { 2092 ret = find_first_extent_bit(pinned_extents, last, 2093 &start, &end, EXTENT_DIRTY); 2094 if (ret) 2095 break; 2096 set_extent_dirty(copy, start, end, GFP_NOFS); 2097 last = end + 1; 2098 } 2099 mutex_unlock(&root->fs_info->pinned_mutex); 2100 return 0; 2101 } 2102 2103 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 2104 struct btrfs_root *root, 2105 struct extent_io_tree *unpin) 2106 { 2107 u64 start; 2108 u64 end; 2109 int ret; 2110 2111 mutex_lock(&root->fs_info->pinned_mutex); 2112 while (1) { 2113 ret = find_first_extent_bit(unpin, 0, &start, &end, 2114 EXTENT_DIRTY); 2115 if (ret) 2116 break; 2117 2118 ret = btrfs_discard_extent(root, start, end + 1 - start); 2119 2120 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 2121 clear_extent_dirty(unpin, start, end, GFP_NOFS); 2122 2123 if (need_resched()) { 2124 mutex_unlock(&root->fs_info->pinned_mutex); 2125 cond_resched(); 2126 mutex_lock(&root->fs_info->pinned_mutex); 2127 } 2128 } 2129 mutex_unlock(&root->fs_info->pinned_mutex); 2130 return ret; 2131 } 2132 2133 static int finish_current_insert(struct btrfs_trans_handle *trans, 2134 struct btrfs_root *extent_root, int all) 2135 { 2136 u64 start; 2137 u64 end; 2138 u64 priv; 2139 u64 search = 0; 2140 u64 skipped = 0; 2141 struct btrfs_fs_info *info = extent_root->fs_info; 2142 struct btrfs_path *path; 2143 struct pending_extent_op *extent_op, *tmp; 2144 struct list_head insert_list, update_list; 2145 int ret; 2146 int num_inserts = 0, max_inserts; 2147 2148 path = btrfs_alloc_path(); 2149 INIT_LIST_HEAD(&insert_list); 2150 INIT_LIST_HEAD(&update_list); 2151 2152 max_inserts = extent_root->leafsize / 2153 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + 2154 sizeof(struct btrfs_extent_ref) + 2155 sizeof(struct btrfs_extent_item)); 2156 again: 2157 mutex_lock(&info->extent_ins_mutex); 2158 while (1) { 2159 ret = find_first_extent_bit(&info->extent_ins, search, &start, 2160 &end, EXTENT_WRITEBACK); 2161 if (ret) { 2162 if (skipped && all && !num_inserts) { 2163 skipped = 0; 2164 search = 0; 2165 continue; 2166 } 2167 mutex_unlock(&info->extent_ins_mutex); 2168 break; 2169 } 2170 2171 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); 2172 if (!ret) { 2173 skipped = 1; 2174 search = end + 1; 2175 if (need_resched()) { 2176 mutex_unlock(&info->extent_ins_mutex); 2177 cond_resched(); 2178 mutex_lock(&info->extent_ins_mutex); 2179 } 2180 continue; 2181 } 2182 2183 ret = get_state_private(&info->extent_ins, start, &priv); 2184 BUG_ON(ret); 2185 extent_op = (struct pending_extent_op *)(unsigned long) priv; 2186 2187 if (extent_op->type == PENDING_EXTENT_INSERT) { 2188 num_inserts++; 2189 list_add_tail(&extent_op->list, &insert_list); 2190 search = end + 1; 2191 if (num_inserts == max_inserts) { 2192 mutex_unlock(&info->extent_ins_mutex); 2193 break; 2194 } 2195 } else if (extent_op->type == PENDING_BACKREF_UPDATE) { 2196 list_add_tail(&extent_op->list, &update_list); 2197 search = end + 1; 2198 } else { 2199 BUG(); 2200 } 2201 } 2202 2203 /* 2204 * process the update list, clear the writeback bit for it, and if 2205 * somebody marked this thing for deletion then just unlock it and be 2206 * done, the free_extents will handle it 2207 */ 2208 mutex_lock(&info->extent_ins_mutex); 2209 list_for_each_entry_safe(extent_op, tmp, &update_list, list) { 2210 clear_extent_bits(&info->extent_ins, extent_op->bytenr, 2211 extent_op->bytenr + extent_op->num_bytes - 1, 2212 EXTENT_WRITEBACK, GFP_NOFS); 2213 if (extent_op->del) { 2214 list_del_init(&extent_op->list); 2215 unlock_extent(&info->extent_ins, extent_op->bytenr, 2216 extent_op->bytenr + extent_op->num_bytes 2217 - 1, GFP_NOFS); 2218 kfree(extent_op); 2219 } 2220 } 2221 mutex_unlock(&info->extent_ins_mutex); 2222 2223 /* 2224 * still have things left on the update list, go ahead an update 2225 * everything 2226 */ 2227 if (!list_empty(&update_list)) { 2228 ret = update_backrefs(trans, extent_root, path, &update_list); 2229 BUG_ON(ret); 2230 } 2231 2232 /* 2233 * if no inserts need to be done, but we skipped some extents and we 2234 * need to make sure everything is cleaned then reset everything and 2235 * go back to the beginning 2236 */ 2237 if (!num_inserts && all && skipped) { 2238 search = 0; 2239 skipped = 0; 2240 INIT_LIST_HEAD(&update_list); 2241 INIT_LIST_HEAD(&insert_list); 2242 goto again; 2243 } else if (!num_inserts) { 2244 goto out; 2245 } 2246 2247 /* 2248 * process the insert extents list. Again if we are deleting this 2249 * extent, then just unlock it, pin down the bytes if need be, and be 2250 * done with it. Saves us from having to actually insert the extent 2251 * into the tree and then subsequently come along and delete it 2252 */ 2253 mutex_lock(&info->extent_ins_mutex); 2254 list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { 2255 clear_extent_bits(&info->extent_ins, extent_op->bytenr, 2256 extent_op->bytenr + extent_op->num_bytes - 1, 2257 EXTENT_WRITEBACK, GFP_NOFS); 2258 if (extent_op->del) { 2259 u64 used; 2260 list_del_init(&extent_op->list); 2261 unlock_extent(&info->extent_ins, extent_op->bytenr, 2262 extent_op->bytenr + extent_op->num_bytes 2263 - 1, GFP_NOFS); 2264 2265 mutex_lock(&extent_root->fs_info->pinned_mutex); 2266 ret = pin_down_bytes(trans, extent_root, 2267 extent_op->bytenr, 2268 extent_op->num_bytes, 0); 2269 mutex_unlock(&extent_root->fs_info->pinned_mutex); 2270 2271 spin_lock(&info->delalloc_lock); 2272 used = btrfs_super_bytes_used(&info->super_copy); 2273 btrfs_set_super_bytes_used(&info->super_copy, 2274 used - extent_op->num_bytes); 2275 used = btrfs_root_used(&extent_root->root_item); 2276 btrfs_set_root_used(&extent_root->root_item, 2277 used - extent_op->num_bytes); 2278 spin_unlock(&info->delalloc_lock); 2279 2280 ret = update_block_group(trans, extent_root, 2281 extent_op->bytenr, 2282 extent_op->num_bytes, 2283 0, ret > 0); 2284 BUG_ON(ret); 2285 kfree(extent_op); 2286 num_inserts--; 2287 } 2288 } 2289 mutex_unlock(&info->extent_ins_mutex); 2290 2291 ret = insert_extents(trans, extent_root, path, &insert_list, 2292 num_inserts); 2293 BUG_ON(ret); 2294 2295 /* 2296 * if we broke out of the loop in order to insert stuff because we hit 2297 * the maximum number of inserts at a time we can handle, then loop 2298 * back and pick up where we left off 2299 */ 2300 if (num_inserts == max_inserts) { 2301 INIT_LIST_HEAD(&insert_list); 2302 INIT_LIST_HEAD(&update_list); 2303 num_inserts = 0; 2304 goto again; 2305 } 2306 2307 /* 2308 * again, if we need to make absolutely sure there are no more pending 2309 * extent operations left and we know that we skipped some, go back to 2310 * the beginning and do it all again 2311 */ 2312 if (all && skipped) { 2313 INIT_LIST_HEAD(&insert_list); 2314 INIT_LIST_HEAD(&update_list); 2315 search = 0; 2316 skipped = 0; 2317 num_inserts = 0; 2318 goto again; 2319 } 2320 out: 2321 btrfs_free_path(path); 2322 return 0; 2323 } 2324 2325 static int pin_down_bytes(struct btrfs_trans_handle *trans, 2326 struct btrfs_root *root, 2327 u64 bytenr, u64 num_bytes, int is_data) 2328 { 2329 int err = 0; 2330 struct extent_buffer *buf; 2331 2332 if (is_data) 2333 goto pinit; 2334 2335 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 2336 if (!buf) 2337 goto pinit; 2338 2339 /* we can reuse a block if it hasn't been written 2340 * and it is from this transaction. We can't 2341 * reuse anything from the tree log root because 2342 * it has tiny sub-transactions. 2343 */ 2344 if (btrfs_buffer_uptodate(buf, 0) && 2345 btrfs_try_tree_lock(buf)) { 2346 u64 header_owner = btrfs_header_owner(buf); 2347 u64 header_transid = btrfs_header_generation(buf); 2348 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 2349 header_owner != BTRFS_TREE_RELOC_OBJECTID && 2350 header_transid == trans->transid && 2351 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 2352 clean_tree_block(NULL, root, buf); 2353 btrfs_tree_unlock(buf); 2354 free_extent_buffer(buf); 2355 return 1; 2356 } 2357 btrfs_tree_unlock(buf); 2358 } 2359 free_extent_buffer(buf); 2360 pinit: 2361 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 2362 2363 BUG_ON(err < 0); 2364 return 0; 2365 } 2366 2367 /* 2368 * remove an extent from the root, returns 0 on success 2369 */ 2370 static int __free_extent(struct btrfs_trans_handle *trans, 2371 struct btrfs_root *root, 2372 u64 bytenr, u64 num_bytes, u64 parent, 2373 u64 root_objectid, u64 ref_generation, 2374 u64 owner_objectid, int pin, int mark_free) 2375 { 2376 struct btrfs_path *path; 2377 struct btrfs_key key; 2378 struct btrfs_fs_info *info = root->fs_info; 2379 struct btrfs_root *extent_root = info->extent_root; 2380 struct extent_buffer *leaf; 2381 int ret; 2382 int extent_slot = 0; 2383 int found_extent = 0; 2384 int num_to_del = 1; 2385 struct btrfs_extent_item *ei; 2386 u32 refs; 2387 2388 key.objectid = bytenr; 2389 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 2390 key.offset = num_bytes; 2391 path = btrfs_alloc_path(); 2392 if (!path) 2393 return -ENOMEM; 2394 2395 path->reada = 1; 2396 ret = lookup_extent_backref(trans, extent_root, path, 2397 bytenr, parent, root_objectid, 2398 ref_generation, owner_objectid, 1); 2399 if (ret == 0) { 2400 struct btrfs_key found_key; 2401 extent_slot = path->slots[0]; 2402 while (extent_slot > 0) { 2403 extent_slot--; 2404 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2405 extent_slot); 2406 if (found_key.objectid != bytenr) 2407 break; 2408 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 2409 found_key.offset == num_bytes) { 2410 found_extent = 1; 2411 break; 2412 } 2413 if (path->slots[0] - extent_slot > 5) 2414 break; 2415 } 2416 if (!found_extent) { 2417 ret = remove_extent_backref(trans, extent_root, path); 2418 BUG_ON(ret); 2419 btrfs_release_path(extent_root, path); 2420 ret = btrfs_search_slot(trans, extent_root, 2421 &key, path, -1, 1); 2422 if (ret) { 2423 printk(KERN_ERR "umm, got %d back from search" 2424 ", was looking for %llu\n", ret, 2425 (unsigned long long)bytenr); 2426 btrfs_print_leaf(extent_root, path->nodes[0]); 2427 } 2428 BUG_ON(ret); 2429 extent_slot = path->slots[0]; 2430 } 2431 } else { 2432 btrfs_print_leaf(extent_root, path->nodes[0]); 2433 WARN_ON(1); 2434 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 2435 "root %llu gen %llu owner %llu\n", 2436 (unsigned long long)bytenr, 2437 (unsigned long long)root_objectid, 2438 (unsigned long long)ref_generation, 2439 (unsigned long long)owner_objectid); 2440 } 2441 2442 leaf = path->nodes[0]; 2443 ei = btrfs_item_ptr(leaf, extent_slot, 2444 struct btrfs_extent_item); 2445 refs = btrfs_extent_refs(leaf, ei); 2446 BUG_ON(refs == 0); 2447 refs -= 1; 2448 btrfs_set_extent_refs(leaf, ei, refs); 2449 2450 btrfs_mark_buffer_dirty(leaf); 2451 2452 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { 2453 struct btrfs_extent_ref *ref; 2454 ref = btrfs_item_ptr(leaf, path->slots[0], 2455 struct btrfs_extent_ref); 2456 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); 2457 /* if the back ref and the extent are next to each other 2458 * they get deleted below in one shot 2459 */ 2460 path->slots[0] = extent_slot; 2461 num_to_del = 2; 2462 } else if (found_extent) { 2463 /* otherwise delete the extent back ref */ 2464 ret = remove_extent_backref(trans, extent_root, path); 2465 BUG_ON(ret); 2466 /* if refs are 0, we need to setup the path for deletion */ 2467 if (refs == 0) { 2468 btrfs_release_path(extent_root, path); 2469 ret = btrfs_search_slot(trans, extent_root, &key, path, 2470 -1, 1); 2471 BUG_ON(ret); 2472 } 2473 } 2474 2475 if (refs == 0) { 2476 u64 super_used; 2477 u64 root_used; 2478 2479 if (pin) { 2480 mutex_lock(&root->fs_info->pinned_mutex); 2481 ret = pin_down_bytes(trans, root, bytenr, num_bytes, 2482 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); 2483 mutex_unlock(&root->fs_info->pinned_mutex); 2484 if (ret > 0) 2485 mark_free = 1; 2486 BUG_ON(ret < 0); 2487 } 2488 /* block accounting for super block */ 2489 spin_lock(&info->delalloc_lock); 2490 super_used = btrfs_super_bytes_used(&info->super_copy); 2491 btrfs_set_super_bytes_used(&info->super_copy, 2492 super_used - num_bytes); 2493 2494 /* block accounting for root item */ 2495 root_used = btrfs_root_used(&root->root_item); 2496 btrfs_set_root_used(&root->root_item, 2497 root_used - num_bytes); 2498 spin_unlock(&info->delalloc_lock); 2499 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 2500 num_to_del); 2501 BUG_ON(ret); 2502 btrfs_release_path(extent_root, path); 2503 2504 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 2505 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 2506 BUG_ON(ret); 2507 } 2508 2509 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 2510 mark_free); 2511 BUG_ON(ret); 2512 } 2513 btrfs_free_path(path); 2514 finish_current_insert(trans, extent_root, 0); 2515 return ret; 2516 } 2517 2518 /* 2519 * find all the blocks marked as pending in the radix tree and remove 2520 * them from the extent map 2521 */ 2522 static int del_pending_extents(struct btrfs_trans_handle *trans, 2523 struct btrfs_root *extent_root, int all) 2524 { 2525 int ret; 2526 int err = 0; 2527 u64 start; 2528 u64 end; 2529 u64 priv; 2530 u64 search = 0; 2531 int nr = 0, skipped = 0; 2532 struct extent_io_tree *pending_del; 2533 struct extent_io_tree *extent_ins; 2534 struct pending_extent_op *extent_op; 2535 struct btrfs_fs_info *info = extent_root->fs_info; 2536 struct list_head delete_list; 2537 2538 INIT_LIST_HEAD(&delete_list); 2539 extent_ins = &extent_root->fs_info->extent_ins; 2540 pending_del = &extent_root->fs_info->pending_del; 2541 2542 again: 2543 mutex_lock(&info->extent_ins_mutex); 2544 while (1) { 2545 ret = find_first_extent_bit(pending_del, search, &start, &end, 2546 EXTENT_WRITEBACK); 2547 if (ret) { 2548 if (all && skipped && !nr) { 2549 search = 0; 2550 continue; 2551 } 2552 mutex_unlock(&info->extent_ins_mutex); 2553 break; 2554 } 2555 2556 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); 2557 if (!ret) { 2558 search = end+1; 2559 skipped = 1; 2560 2561 if (need_resched()) { 2562 mutex_unlock(&info->extent_ins_mutex); 2563 cond_resched(); 2564 mutex_lock(&info->extent_ins_mutex); 2565 } 2566 2567 continue; 2568 } 2569 BUG_ON(ret < 0); 2570 2571 ret = get_state_private(pending_del, start, &priv); 2572 BUG_ON(ret); 2573 extent_op = (struct pending_extent_op *)(unsigned long)priv; 2574 2575 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, 2576 GFP_NOFS); 2577 if (!test_range_bit(extent_ins, start, end, 2578 EXTENT_WRITEBACK, 0)) { 2579 list_add_tail(&extent_op->list, &delete_list); 2580 nr++; 2581 } else { 2582 kfree(extent_op); 2583 2584 ret = get_state_private(&info->extent_ins, start, 2585 &priv); 2586 BUG_ON(ret); 2587 extent_op = (struct pending_extent_op *) 2588 (unsigned long)priv; 2589 2590 clear_extent_bits(&info->extent_ins, start, end, 2591 EXTENT_WRITEBACK, GFP_NOFS); 2592 2593 if (extent_op->type == PENDING_BACKREF_UPDATE) { 2594 list_add_tail(&extent_op->list, &delete_list); 2595 search = end + 1; 2596 nr++; 2597 continue; 2598 } 2599 2600 mutex_lock(&extent_root->fs_info->pinned_mutex); 2601 ret = pin_down_bytes(trans, extent_root, start, 2602 end + 1 - start, 0); 2603 mutex_unlock(&extent_root->fs_info->pinned_mutex); 2604 2605 ret = update_block_group(trans, extent_root, start, 2606 end + 1 - start, 0, ret > 0); 2607 2608 unlock_extent(extent_ins, start, end, GFP_NOFS); 2609 BUG_ON(ret); 2610 kfree(extent_op); 2611 } 2612 if (ret) 2613 err = ret; 2614 2615 search = end + 1; 2616 2617 if (need_resched()) { 2618 mutex_unlock(&info->extent_ins_mutex); 2619 cond_resched(); 2620 mutex_lock(&info->extent_ins_mutex); 2621 } 2622 } 2623 2624 if (nr) { 2625 ret = free_extents(trans, extent_root, &delete_list); 2626 BUG_ON(ret); 2627 } 2628 2629 if (all && skipped) { 2630 INIT_LIST_HEAD(&delete_list); 2631 search = 0; 2632 nr = 0; 2633 goto again; 2634 } 2635 2636 return err; 2637 } 2638 2639 /* 2640 * remove an extent from the root, returns 0 on success 2641 */ 2642 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 2643 struct btrfs_root *root, 2644 u64 bytenr, u64 num_bytes, u64 parent, 2645 u64 root_objectid, u64 ref_generation, 2646 u64 owner_objectid, int pin) 2647 { 2648 struct btrfs_root *extent_root = root->fs_info->extent_root; 2649 int pending_ret; 2650 int ret; 2651 2652 WARN_ON(num_bytes < root->sectorsize); 2653 if (root == extent_root) { 2654 struct pending_extent_op *extent_op = NULL; 2655 2656 mutex_lock(&root->fs_info->extent_ins_mutex); 2657 if (test_range_bit(&root->fs_info->extent_ins, bytenr, 2658 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { 2659 u64 priv; 2660 ret = get_state_private(&root->fs_info->extent_ins, 2661 bytenr, &priv); 2662 BUG_ON(ret); 2663 extent_op = (struct pending_extent_op *) 2664 (unsigned long)priv; 2665 2666 extent_op->del = 1; 2667 if (extent_op->type == PENDING_EXTENT_INSERT) { 2668 mutex_unlock(&root->fs_info->extent_ins_mutex); 2669 return 0; 2670 } 2671 } 2672 2673 if (extent_op) { 2674 ref_generation = extent_op->orig_generation; 2675 parent = extent_op->orig_parent; 2676 } 2677 2678 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2679 BUG_ON(!extent_op); 2680 2681 extent_op->type = PENDING_EXTENT_DELETE; 2682 extent_op->bytenr = bytenr; 2683 extent_op->num_bytes = num_bytes; 2684 extent_op->parent = parent; 2685 extent_op->orig_parent = parent; 2686 extent_op->generation = ref_generation; 2687 extent_op->orig_generation = ref_generation; 2688 extent_op->level = (int)owner_objectid; 2689 INIT_LIST_HEAD(&extent_op->list); 2690 extent_op->del = 0; 2691 2692 set_extent_bits(&root->fs_info->pending_del, 2693 bytenr, bytenr + num_bytes - 1, 2694 EXTENT_WRITEBACK, GFP_NOFS); 2695 set_state_private(&root->fs_info->pending_del, 2696 bytenr, (unsigned long)extent_op); 2697 mutex_unlock(&root->fs_info->extent_ins_mutex); 2698 return 0; 2699 } 2700 /* if metadata always pin */ 2701 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 2702 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 2703 struct btrfs_block_group_cache *cache; 2704 2705 /* btrfs_free_reserved_extent */ 2706 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 2707 BUG_ON(!cache); 2708 btrfs_add_free_space(cache, bytenr, num_bytes); 2709 put_block_group(cache); 2710 update_reserved_extents(root, bytenr, num_bytes, 0); 2711 return 0; 2712 } 2713 pin = 1; 2714 } 2715 2716 /* if data pin when any transaction has committed this */ 2717 if (ref_generation != trans->transid) 2718 pin = 1; 2719 2720 ret = __free_extent(trans, root, bytenr, num_bytes, parent, 2721 root_objectid, ref_generation, 2722 owner_objectid, pin, pin == 0); 2723 2724 finish_current_insert(trans, root->fs_info->extent_root, 0); 2725 pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); 2726 return ret ? ret : pending_ret; 2727 } 2728 2729 int btrfs_free_extent(struct btrfs_trans_handle *trans, 2730 struct btrfs_root *root, 2731 u64 bytenr, u64 num_bytes, u64 parent, 2732 u64 root_objectid, u64 ref_generation, 2733 u64 owner_objectid, int pin) 2734 { 2735 int ret; 2736 2737 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, 2738 root_objectid, ref_generation, 2739 owner_objectid, pin); 2740 return ret; 2741 } 2742 2743 static u64 stripe_align(struct btrfs_root *root, u64 val) 2744 { 2745 u64 mask = ((u64)root->stripesize - 1); 2746 u64 ret = (val + mask) & ~mask; 2747 return ret; 2748 } 2749 2750 /* 2751 * walks the btree of allocated extents and find a hole of a given size. 2752 * The key ins is changed to record the hole: 2753 * ins->objectid == block start 2754 * ins->flags = BTRFS_EXTENT_ITEM_KEY 2755 * ins->offset == number of blocks 2756 * Any available blocks before search_start are skipped. 2757 */ 2758 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 2759 struct btrfs_root *orig_root, 2760 u64 num_bytes, u64 empty_size, 2761 u64 search_start, u64 search_end, 2762 u64 hint_byte, struct btrfs_key *ins, 2763 u64 exclude_start, u64 exclude_nr, 2764 int data) 2765 { 2766 int ret = 0; 2767 struct btrfs_root *root = orig_root->fs_info->extent_root; 2768 u64 total_needed = num_bytes; 2769 u64 *last_ptr = NULL; 2770 u64 last_wanted = 0; 2771 struct btrfs_block_group_cache *block_group = NULL; 2772 int chunk_alloc_done = 0; 2773 int empty_cluster = 2 * 1024 * 1024; 2774 int allowed_chunk_alloc = 0; 2775 struct list_head *head = NULL, *cur = NULL; 2776 int loop = 0; 2777 int extra_loop = 0; 2778 struct btrfs_space_info *space_info; 2779 2780 WARN_ON(num_bytes < root->sectorsize); 2781 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 2782 ins->objectid = 0; 2783 ins->offset = 0; 2784 2785 if (orig_root->ref_cows || empty_size) 2786 allowed_chunk_alloc = 1; 2787 2788 if (data & BTRFS_BLOCK_GROUP_METADATA) { 2789 last_ptr = &root->fs_info->last_alloc; 2790 empty_cluster = 64 * 1024; 2791 } 2792 2793 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) 2794 last_ptr = &root->fs_info->last_data_alloc; 2795 2796 if (last_ptr) { 2797 if (*last_ptr) { 2798 hint_byte = *last_ptr; 2799 last_wanted = *last_ptr; 2800 } else 2801 empty_size += empty_cluster; 2802 } else { 2803 empty_cluster = 0; 2804 } 2805 search_start = max(search_start, first_logical_byte(root, 0)); 2806 search_start = max(search_start, hint_byte); 2807 2808 if (last_wanted && search_start != last_wanted) { 2809 last_wanted = 0; 2810 empty_size += empty_cluster; 2811 } 2812 2813 total_needed += empty_size; 2814 block_group = btrfs_lookup_block_group(root->fs_info, search_start); 2815 if (!block_group) 2816 block_group = btrfs_lookup_first_block_group(root->fs_info, 2817 search_start); 2818 space_info = __find_space_info(root->fs_info, data); 2819 2820 down_read(&space_info->groups_sem); 2821 while (1) { 2822 struct btrfs_free_space *free_space; 2823 /* 2824 * the only way this happens if our hint points to a block 2825 * group thats not of the proper type, while looping this 2826 * should never happen 2827 */ 2828 if (empty_size) 2829 extra_loop = 1; 2830 2831 if (!block_group) 2832 goto new_group_no_lock; 2833 2834 if (unlikely(!block_group->cached)) { 2835 mutex_lock(&block_group->cache_mutex); 2836 ret = cache_block_group(root, block_group); 2837 mutex_unlock(&block_group->cache_mutex); 2838 if (ret) 2839 break; 2840 } 2841 2842 mutex_lock(&block_group->alloc_mutex); 2843 if (unlikely(!block_group_bits(block_group, data))) 2844 goto new_group; 2845 2846 if (unlikely(block_group->ro)) 2847 goto new_group; 2848 2849 free_space = btrfs_find_free_space(block_group, search_start, 2850 total_needed); 2851 if (free_space) { 2852 u64 start = block_group->key.objectid; 2853 u64 end = block_group->key.objectid + 2854 block_group->key.offset; 2855 2856 search_start = stripe_align(root, free_space->offset); 2857 2858 /* move on to the next group */ 2859 if (search_start + num_bytes >= search_end) 2860 goto new_group; 2861 2862 /* move on to the next group */ 2863 if (search_start + num_bytes > end) 2864 goto new_group; 2865 2866 if (last_wanted && search_start != last_wanted) { 2867 total_needed += empty_cluster; 2868 empty_size += empty_cluster; 2869 last_wanted = 0; 2870 /* 2871 * if search_start is still in this block group 2872 * then we just re-search this block group 2873 */ 2874 if (search_start >= start && 2875 search_start < end) { 2876 mutex_unlock(&block_group->alloc_mutex); 2877 continue; 2878 } 2879 2880 /* else we go to the next block group */ 2881 goto new_group; 2882 } 2883 2884 if (exclude_nr > 0 && 2885 (search_start + num_bytes > exclude_start && 2886 search_start < exclude_start + exclude_nr)) { 2887 search_start = exclude_start + exclude_nr; 2888 /* 2889 * if search_start is still in this block group 2890 * then we just re-search this block group 2891 */ 2892 if (search_start >= start && 2893 search_start < end) { 2894 mutex_unlock(&block_group->alloc_mutex); 2895 last_wanted = 0; 2896 continue; 2897 } 2898 2899 /* else we go to the next block group */ 2900 goto new_group; 2901 } 2902 2903 ins->objectid = search_start; 2904 ins->offset = num_bytes; 2905 2906 btrfs_remove_free_space_lock(block_group, search_start, 2907 num_bytes); 2908 /* we are all good, lets return */ 2909 mutex_unlock(&block_group->alloc_mutex); 2910 break; 2911 } 2912 new_group: 2913 mutex_unlock(&block_group->alloc_mutex); 2914 put_block_group(block_group); 2915 block_group = NULL; 2916 new_group_no_lock: 2917 /* don't try to compare new allocations against the 2918 * last allocation any more 2919 */ 2920 last_wanted = 0; 2921 2922 /* 2923 * Here's how this works. 2924 * loop == 0: we were searching a block group via a hint 2925 * and didn't find anything, so we start at 2926 * the head of the block groups and keep searching 2927 * loop == 1: we're searching through all of the block groups 2928 * if we hit the head again we have searched 2929 * all of the block groups for this space and we 2930 * need to try and allocate, if we cant error out. 2931 * loop == 2: we allocated more space and are looping through 2932 * all of the block groups again. 2933 */ 2934 if (loop == 0) { 2935 head = &space_info->block_groups; 2936 cur = head->next; 2937 loop++; 2938 } else if (loop == 1 && cur == head) { 2939 int keep_going; 2940 2941 /* at this point we give up on the empty_size 2942 * allocations and just try to allocate the min 2943 * space. 2944 * 2945 * The extra_loop field was set if an empty_size 2946 * allocation was attempted above, and if this 2947 * is try we need to try the loop again without 2948 * the additional empty_size. 2949 */ 2950 total_needed -= empty_size; 2951 empty_size = 0; 2952 keep_going = extra_loop; 2953 loop++; 2954 2955 if (allowed_chunk_alloc && !chunk_alloc_done) { 2956 up_read(&space_info->groups_sem); 2957 ret = do_chunk_alloc(trans, root, num_bytes + 2958 2 * 1024 * 1024, data, 1); 2959 down_read(&space_info->groups_sem); 2960 if (ret < 0) 2961 goto loop_check; 2962 head = &space_info->block_groups; 2963 /* 2964 * we've allocated a new chunk, keep 2965 * trying 2966 */ 2967 keep_going = 1; 2968 chunk_alloc_done = 1; 2969 } else if (!allowed_chunk_alloc) { 2970 space_info->force_alloc = 1; 2971 } 2972 loop_check: 2973 if (keep_going) { 2974 cur = head->next; 2975 extra_loop = 0; 2976 } else { 2977 break; 2978 } 2979 } else if (cur == head) { 2980 break; 2981 } 2982 2983 block_group = list_entry(cur, struct btrfs_block_group_cache, 2984 list); 2985 atomic_inc(&block_group->count); 2986 2987 search_start = block_group->key.objectid; 2988 cur = cur->next; 2989 } 2990 2991 /* we found what we needed */ 2992 if (ins->objectid) { 2993 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 2994 trans->block_group = block_group->key.objectid; 2995 2996 if (last_ptr) 2997 *last_ptr = ins->objectid + ins->offset; 2998 ret = 0; 2999 } else if (!ret) { 3000 printk(KERN_ERR "btrfs searching for %llu bytes, " 3001 "num_bytes %llu, loop %d, allowed_alloc %d\n", 3002 (unsigned long long)total_needed, 3003 (unsigned long long)num_bytes, 3004 loop, allowed_chunk_alloc); 3005 ret = -ENOSPC; 3006 } 3007 if (block_group) 3008 put_block_group(block_group); 3009 3010 up_read(&space_info->groups_sem); 3011 return ret; 3012 } 3013 3014 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3015 { 3016 struct btrfs_block_group_cache *cache; 3017 struct list_head *l; 3018 3019 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3020 (unsigned long long)(info->total_bytes - info->bytes_used - 3021 info->bytes_pinned - info->bytes_reserved), 3022 (info->full) ? "" : "not "); 3023 3024 down_read(&info->groups_sem); 3025 list_for_each(l, &info->block_groups) { 3026 cache = list_entry(l, struct btrfs_block_group_cache, list); 3027 spin_lock(&cache->lock); 3028 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3029 "%llu pinned %llu reserved\n", 3030 (unsigned long long)cache->key.objectid, 3031 (unsigned long long)cache->key.offset, 3032 (unsigned long long)btrfs_block_group_used(&cache->item), 3033 (unsigned long long)cache->pinned, 3034 (unsigned long long)cache->reserved); 3035 btrfs_dump_free_space(cache, bytes); 3036 spin_unlock(&cache->lock); 3037 } 3038 up_read(&info->groups_sem); 3039 } 3040 3041 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3042 struct btrfs_root *root, 3043 u64 num_bytes, u64 min_alloc_size, 3044 u64 empty_size, u64 hint_byte, 3045 u64 search_end, struct btrfs_key *ins, 3046 u64 data) 3047 { 3048 int ret; 3049 u64 search_start = 0; 3050 u64 alloc_profile; 3051 struct btrfs_fs_info *info = root->fs_info; 3052 3053 if (data) { 3054 alloc_profile = info->avail_data_alloc_bits & 3055 info->data_alloc_profile; 3056 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 3057 } else if (root == root->fs_info->chunk_root) { 3058 alloc_profile = info->avail_system_alloc_bits & 3059 info->system_alloc_profile; 3060 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 3061 } else { 3062 alloc_profile = info->avail_metadata_alloc_bits & 3063 info->metadata_alloc_profile; 3064 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 3065 } 3066 again: 3067 data = btrfs_reduce_alloc_profile(root, data); 3068 /* 3069 * the only place that sets empty_size is btrfs_realloc_node, which 3070 * is not called recursively on allocations 3071 */ 3072 if (empty_size || root->ref_cows) { 3073 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 3074 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3075 2 * 1024 * 1024, 3076 BTRFS_BLOCK_GROUP_METADATA | 3077 (info->metadata_alloc_profile & 3078 info->avail_metadata_alloc_bits), 0); 3079 } 3080 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3081 num_bytes + 2 * 1024 * 1024, data, 0); 3082 } 3083 3084 WARN_ON(num_bytes < root->sectorsize); 3085 ret = find_free_extent(trans, root, num_bytes, empty_size, 3086 search_start, search_end, hint_byte, ins, 3087 trans->alloc_exclude_start, 3088 trans->alloc_exclude_nr, data); 3089 3090 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 3091 num_bytes = num_bytes >> 1; 3092 num_bytes = num_bytes & ~(root->sectorsize - 1); 3093 num_bytes = max(num_bytes, min_alloc_size); 3094 do_chunk_alloc(trans, root->fs_info->extent_root, 3095 num_bytes, data, 1); 3096 goto again; 3097 } 3098 if (ret) { 3099 struct btrfs_space_info *sinfo; 3100 3101 sinfo = __find_space_info(root->fs_info, data); 3102 printk(KERN_ERR "btrfs allocation failed flags %llu, " 3103 "wanted %llu\n", (unsigned long long)data, 3104 (unsigned long long)num_bytes); 3105 dump_space_info(sinfo, num_bytes); 3106 BUG(); 3107 } 3108 3109 return ret; 3110 } 3111 3112 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 3113 { 3114 struct btrfs_block_group_cache *cache; 3115 int ret = 0; 3116 3117 cache = btrfs_lookup_block_group(root->fs_info, start); 3118 if (!cache) { 3119 printk(KERN_ERR "Unable to find block group for %llu\n", 3120 (unsigned long long)start); 3121 return -ENOSPC; 3122 } 3123 3124 ret = btrfs_discard_extent(root, start, len); 3125 3126 btrfs_add_free_space(cache, start, len); 3127 put_block_group(cache); 3128 update_reserved_extents(root, start, len, 0); 3129 3130 return ret; 3131 } 3132 3133 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3134 struct btrfs_root *root, 3135 u64 num_bytes, u64 min_alloc_size, 3136 u64 empty_size, u64 hint_byte, 3137 u64 search_end, struct btrfs_key *ins, 3138 u64 data) 3139 { 3140 int ret; 3141 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 3142 empty_size, hint_byte, search_end, ins, 3143 data); 3144 update_reserved_extents(root, ins->objectid, ins->offset, 1); 3145 return ret; 3146 } 3147 3148 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3149 struct btrfs_root *root, u64 parent, 3150 u64 root_objectid, u64 ref_generation, 3151 u64 owner, struct btrfs_key *ins) 3152 { 3153 int ret; 3154 int pending_ret; 3155 u64 super_used; 3156 u64 root_used; 3157 u64 num_bytes = ins->offset; 3158 u32 sizes[2]; 3159 struct btrfs_fs_info *info = root->fs_info; 3160 struct btrfs_root *extent_root = info->extent_root; 3161 struct btrfs_extent_item *extent_item; 3162 struct btrfs_extent_ref *ref; 3163 struct btrfs_path *path; 3164 struct btrfs_key keys[2]; 3165 3166 if (parent == 0) 3167 parent = ins->objectid; 3168 3169 /* block accounting for super block */ 3170 spin_lock(&info->delalloc_lock); 3171 super_used = btrfs_super_bytes_used(&info->super_copy); 3172 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); 3173 3174 /* block accounting for root item */ 3175 root_used = btrfs_root_used(&root->root_item); 3176 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 3177 spin_unlock(&info->delalloc_lock); 3178 3179 if (root == extent_root) { 3180 struct pending_extent_op *extent_op; 3181 3182 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 3183 BUG_ON(!extent_op); 3184 3185 extent_op->type = PENDING_EXTENT_INSERT; 3186 extent_op->bytenr = ins->objectid; 3187 extent_op->num_bytes = ins->offset; 3188 extent_op->parent = parent; 3189 extent_op->orig_parent = 0; 3190 extent_op->generation = ref_generation; 3191 extent_op->orig_generation = 0; 3192 extent_op->level = (int)owner; 3193 INIT_LIST_HEAD(&extent_op->list); 3194 extent_op->del = 0; 3195 3196 mutex_lock(&root->fs_info->extent_ins_mutex); 3197 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 3198 ins->objectid + ins->offset - 1, 3199 EXTENT_WRITEBACK, GFP_NOFS); 3200 set_state_private(&root->fs_info->extent_ins, 3201 ins->objectid, (unsigned long)extent_op); 3202 mutex_unlock(&root->fs_info->extent_ins_mutex); 3203 goto update_block; 3204 } 3205 3206 memcpy(&keys[0], ins, sizeof(*ins)); 3207 keys[1].objectid = ins->objectid; 3208 keys[1].type = BTRFS_EXTENT_REF_KEY; 3209 keys[1].offset = parent; 3210 sizes[0] = sizeof(*extent_item); 3211 sizes[1] = sizeof(*ref); 3212 3213 path = btrfs_alloc_path(); 3214 BUG_ON(!path); 3215 3216 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 3217 sizes, 2); 3218 BUG_ON(ret); 3219 3220 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3221 struct btrfs_extent_item); 3222 btrfs_set_extent_refs(path->nodes[0], extent_item, 1); 3223 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 3224 struct btrfs_extent_ref); 3225 3226 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 3227 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 3228 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 3229 btrfs_set_ref_num_refs(path->nodes[0], ref, 1); 3230 3231 btrfs_mark_buffer_dirty(path->nodes[0]); 3232 3233 trans->alloc_exclude_start = 0; 3234 trans->alloc_exclude_nr = 0; 3235 btrfs_free_path(path); 3236 finish_current_insert(trans, extent_root, 0); 3237 pending_ret = del_pending_extents(trans, extent_root, 0); 3238 3239 if (ret) 3240 goto out; 3241 if (pending_ret) { 3242 ret = pending_ret; 3243 goto out; 3244 } 3245 3246 update_block: 3247 ret = update_block_group(trans, root, ins->objectid, 3248 ins->offset, 1, 0); 3249 if (ret) { 3250 printk(KERN_ERR "btrfs update block group failed for %llu " 3251 "%llu\n", (unsigned long long)ins->objectid, 3252 (unsigned long long)ins->offset); 3253 BUG(); 3254 } 3255 out: 3256 return ret; 3257 } 3258 3259 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3260 struct btrfs_root *root, u64 parent, 3261 u64 root_objectid, u64 ref_generation, 3262 u64 owner, struct btrfs_key *ins) 3263 { 3264 int ret; 3265 3266 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 3267 return 0; 3268 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 3269 ref_generation, owner, ins); 3270 update_reserved_extents(root, ins->objectid, ins->offset, 0); 3271 return ret; 3272 } 3273 3274 /* 3275 * this is used by the tree logging recovery code. It records that 3276 * an extent has been allocated and makes sure to clear the free 3277 * space cache bits as well 3278 */ 3279 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, 3280 struct btrfs_root *root, u64 parent, 3281 u64 root_objectid, u64 ref_generation, 3282 u64 owner, struct btrfs_key *ins) 3283 { 3284 int ret; 3285 struct btrfs_block_group_cache *block_group; 3286 3287 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 3288 mutex_lock(&block_group->cache_mutex); 3289 cache_block_group(root, block_group); 3290 mutex_unlock(&block_group->cache_mutex); 3291 3292 ret = btrfs_remove_free_space(block_group, ins->objectid, 3293 ins->offset); 3294 BUG_ON(ret); 3295 put_block_group(block_group); 3296 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 3297 ref_generation, owner, ins); 3298 return ret; 3299 } 3300 3301 /* 3302 * finds a free extent and does all the dirty work required for allocation 3303 * returns the key for the extent through ins, and a tree buffer for 3304 * the first block of the extent through buf. 3305 * 3306 * returns 0 if everything worked, non-zero otherwise. 3307 */ 3308 int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 3309 struct btrfs_root *root, 3310 u64 num_bytes, u64 parent, u64 min_alloc_size, 3311 u64 root_objectid, u64 ref_generation, 3312 u64 owner_objectid, u64 empty_size, u64 hint_byte, 3313 u64 search_end, struct btrfs_key *ins, u64 data) 3314 { 3315 int ret; 3316 3317 ret = __btrfs_reserve_extent(trans, root, num_bytes, 3318 min_alloc_size, empty_size, hint_byte, 3319 search_end, ins, data); 3320 BUG_ON(ret); 3321 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 3322 ret = __btrfs_alloc_reserved_extent(trans, root, parent, 3323 root_objectid, ref_generation, 3324 owner_objectid, ins); 3325 BUG_ON(ret); 3326 3327 } else { 3328 update_reserved_extents(root, ins->objectid, ins->offset, 1); 3329 } 3330 return ret; 3331 } 3332 3333 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 3334 struct btrfs_root *root, 3335 u64 bytenr, u32 blocksize) 3336 { 3337 struct extent_buffer *buf; 3338 3339 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 3340 if (!buf) 3341 return ERR_PTR(-ENOMEM); 3342 btrfs_set_header_generation(buf, trans->transid); 3343 btrfs_tree_lock(buf); 3344 clean_tree_block(trans, root, buf); 3345 btrfs_set_buffer_uptodate(buf); 3346 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3347 set_extent_dirty(&root->dirty_log_pages, buf->start, 3348 buf->start + buf->len - 1, GFP_NOFS); 3349 } else { 3350 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 3351 buf->start + buf->len - 1, GFP_NOFS); 3352 } 3353 trans->blocks_used++; 3354 return buf; 3355 } 3356 3357 /* 3358 * helper function to allocate a block for a given tree 3359 * returns the tree buffer or NULL. 3360 */ 3361 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 3362 struct btrfs_root *root, 3363 u32 blocksize, u64 parent, 3364 u64 root_objectid, 3365 u64 ref_generation, 3366 int level, 3367 u64 hint, 3368 u64 empty_size) 3369 { 3370 struct btrfs_key ins; 3371 int ret; 3372 struct extent_buffer *buf; 3373 3374 ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, 3375 root_objectid, ref_generation, level, 3376 empty_size, hint, (u64)-1, &ins, 0); 3377 if (ret) { 3378 BUG_ON(ret > 0); 3379 return ERR_PTR(ret); 3380 } 3381 3382 buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); 3383 return buf; 3384 } 3385 3386 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 3387 struct btrfs_root *root, struct extent_buffer *leaf) 3388 { 3389 u64 leaf_owner; 3390 u64 leaf_generation; 3391 struct btrfs_key key; 3392 struct btrfs_file_extent_item *fi; 3393 int i; 3394 int nritems; 3395 int ret; 3396 3397 BUG_ON(!btrfs_is_leaf(leaf)); 3398 nritems = btrfs_header_nritems(leaf); 3399 leaf_owner = btrfs_header_owner(leaf); 3400 leaf_generation = btrfs_header_generation(leaf); 3401 3402 for (i = 0; i < nritems; i++) { 3403 u64 disk_bytenr; 3404 cond_resched(); 3405 3406 btrfs_item_key_to_cpu(leaf, &key, i); 3407 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3408 continue; 3409 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 3410 if (btrfs_file_extent_type(leaf, fi) == 3411 BTRFS_FILE_EXTENT_INLINE) 3412 continue; 3413 /* 3414 * FIXME make sure to insert a trans record that 3415 * repeats the snapshot del on crash 3416 */ 3417 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 3418 if (disk_bytenr == 0) 3419 continue; 3420 3421 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3422 btrfs_file_extent_disk_num_bytes(leaf, fi), 3423 leaf->start, leaf_owner, leaf_generation, 3424 key.objectid, 0); 3425 BUG_ON(ret); 3426 3427 atomic_inc(&root->fs_info->throttle_gen); 3428 wake_up(&root->fs_info->transaction_throttle); 3429 cond_resched(); 3430 } 3431 return 0; 3432 } 3433 3434 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 3435 struct btrfs_root *root, 3436 struct btrfs_leaf_ref *ref) 3437 { 3438 int i; 3439 int ret; 3440 struct btrfs_extent_info *info = ref->extents; 3441 3442 for (i = 0; i < ref->nritems; i++) { 3443 ret = __btrfs_free_extent(trans, root, info->bytenr, 3444 info->num_bytes, ref->bytenr, 3445 ref->owner, ref->generation, 3446 info->objectid, 0); 3447 3448 atomic_inc(&root->fs_info->throttle_gen); 3449 wake_up(&root->fs_info->transaction_throttle); 3450 cond_resched(); 3451 3452 BUG_ON(ret); 3453 info++; 3454 } 3455 3456 return 0; 3457 } 3458 3459 static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, 3460 u64 len, u32 *refs) 3461 { 3462 int ret; 3463 3464 ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); 3465 BUG_ON(ret); 3466 3467 #if 0 /* some debugging code in case we see problems here */ 3468 /* if the refs count is one, it won't get increased again. But 3469 * if the ref count is > 1, someone may be decreasing it at 3470 * the same time we are. 3471 */ 3472 if (*refs != 1) { 3473 struct extent_buffer *eb = NULL; 3474 eb = btrfs_find_create_tree_block(root, start, len); 3475 if (eb) 3476 btrfs_tree_lock(eb); 3477 3478 mutex_lock(&root->fs_info->alloc_mutex); 3479 ret = lookup_extent_ref(NULL, root, start, len, refs); 3480 BUG_ON(ret); 3481 mutex_unlock(&root->fs_info->alloc_mutex); 3482 3483 if (eb) { 3484 btrfs_tree_unlock(eb); 3485 free_extent_buffer(eb); 3486 } 3487 if (*refs == 1) { 3488 printk(KERN_ERR "btrfs block %llu went down to one " 3489 "during drop_snap\n", (unsigned long long)start); 3490 } 3491 3492 } 3493 #endif 3494 3495 cond_resched(); 3496 return ret; 3497 } 3498 3499 /* 3500 * helper function for drop_snapshot, this walks down the tree dropping ref 3501 * counts as it goes. 3502 */ 3503 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 3504 struct btrfs_root *root, 3505 struct btrfs_path *path, int *level) 3506 { 3507 u64 root_owner; 3508 u64 root_gen; 3509 u64 bytenr; 3510 u64 ptr_gen; 3511 struct extent_buffer *next; 3512 struct extent_buffer *cur; 3513 struct extent_buffer *parent; 3514 struct btrfs_leaf_ref *ref; 3515 u32 blocksize; 3516 int ret; 3517 u32 refs; 3518 3519 WARN_ON(*level < 0); 3520 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3521 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, 3522 path->nodes[*level]->len, &refs); 3523 BUG_ON(ret); 3524 if (refs > 1) 3525 goto out; 3526 3527 /* 3528 * walk down to the last node level and free all the leaves 3529 */ 3530 while (*level >= 0) { 3531 WARN_ON(*level < 0); 3532 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3533 cur = path->nodes[*level]; 3534 3535 if (btrfs_header_level(cur) != *level) 3536 WARN_ON(1); 3537 3538 if (path->slots[*level] >= 3539 btrfs_header_nritems(cur)) 3540 break; 3541 if (*level == 0) { 3542 ret = btrfs_drop_leaf_ref(trans, root, cur); 3543 BUG_ON(ret); 3544 break; 3545 } 3546 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 3547 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3548 blocksize = btrfs_level_size(root, *level - 1); 3549 3550 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3551 BUG_ON(ret); 3552 if (refs != 1) { 3553 parent = path->nodes[*level]; 3554 root_owner = btrfs_header_owner(parent); 3555 root_gen = btrfs_header_generation(parent); 3556 path->slots[*level]++; 3557 3558 ret = __btrfs_free_extent(trans, root, bytenr, 3559 blocksize, parent->start, 3560 root_owner, root_gen, 3561 *level - 1, 1); 3562 BUG_ON(ret); 3563 3564 atomic_inc(&root->fs_info->throttle_gen); 3565 wake_up(&root->fs_info->transaction_throttle); 3566 cond_resched(); 3567 3568 continue; 3569 } 3570 /* 3571 * at this point, we have a single ref, and since the 3572 * only place referencing this extent is a dead root 3573 * the reference count should never go higher. 3574 * So, we don't need to check it again 3575 */ 3576 if (*level == 1) { 3577 ref = btrfs_lookup_leaf_ref(root, bytenr); 3578 if (ref && ref->generation != ptr_gen) { 3579 btrfs_free_leaf_ref(root, ref); 3580 ref = NULL; 3581 } 3582 if (ref) { 3583 ret = cache_drop_leaf_ref(trans, root, ref); 3584 BUG_ON(ret); 3585 btrfs_remove_leaf_ref(root, ref); 3586 btrfs_free_leaf_ref(root, ref); 3587 *level = 0; 3588 break; 3589 } 3590 } 3591 next = btrfs_find_tree_block(root, bytenr, blocksize); 3592 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { 3593 free_extent_buffer(next); 3594 3595 next = read_tree_block(root, bytenr, blocksize, 3596 ptr_gen); 3597 cond_resched(); 3598 #if 0 3599 /* 3600 * this is a debugging check and can go away 3601 * the ref should never go all the way down to 1 3602 * at this point 3603 */ 3604 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, 3605 &refs); 3606 BUG_ON(ret); 3607 WARN_ON(refs != 1); 3608 #endif 3609 } 3610 WARN_ON(*level <= 0); 3611 if (path->nodes[*level-1]) 3612 free_extent_buffer(path->nodes[*level-1]); 3613 path->nodes[*level-1] = next; 3614 *level = btrfs_header_level(next); 3615 path->slots[*level] = 0; 3616 cond_resched(); 3617 } 3618 out: 3619 WARN_ON(*level < 0); 3620 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3621 3622 if (path->nodes[*level] == root->node) { 3623 parent = path->nodes[*level]; 3624 bytenr = path->nodes[*level]->start; 3625 } else { 3626 parent = path->nodes[*level + 1]; 3627 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); 3628 } 3629 3630 blocksize = btrfs_level_size(root, *level); 3631 root_owner = btrfs_header_owner(parent); 3632 root_gen = btrfs_header_generation(parent); 3633 3634 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 3635 parent->start, root_owner, root_gen, 3636 *level, 1); 3637 free_extent_buffer(path->nodes[*level]); 3638 path->nodes[*level] = NULL; 3639 *level += 1; 3640 BUG_ON(ret); 3641 3642 cond_resched(); 3643 return 0; 3644 } 3645 3646 /* 3647 * helper function for drop_subtree, this function is similar to 3648 * walk_down_tree. The main difference is that it checks reference 3649 * counts while tree blocks are locked. 3650 */ 3651 static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, 3652 struct btrfs_root *root, 3653 struct btrfs_path *path, int *level) 3654 { 3655 struct extent_buffer *next; 3656 struct extent_buffer *cur; 3657 struct extent_buffer *parent; 3658 u64 bytenr; 3659 u64 ptr_gen; 3660 u32 blocksize; 3661 u32 refs; 3662 int ret; 3663 3664 cur = path->nodes[*level]; 3665 ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, 3666 &refs); 3667 BUG_ON(ret); 3668 if (refs > 1) 3669 goto out; 3670 3671 while (*level >= 0) { 3672 cur = path->nodes[*level]; 3673 if (*level == 0) { 3674 ret = btrfs_drop_leaf_ref(trans, root, cur); 3675 BUG_ON(ret); 3676 clean_tree_block(trans, root, cur); 3677 break; 3678 } 3679 if (path->slots[*level] >= btrfs_header_nritems(cur)) { 3680 clean_tree_block(trans, root, cur); 3681 break; 3682 } 3683 3684 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 3685 blocksize = btrfs_level_size(root, *level - 1); 3686 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3687 3688 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3689 btrfs_tree_lock(next); 3690 3691 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 3692 &refs); 3693 BUG_ON(ret); 3694 if (refs > 1) { 3695 parent = path->nodes[*level]; 3696 ret = btrfs_free_extent(trans, root, bytenr, 3697 blocksize, parent->start, 3698 btrfs_header_owner(parent), 3699 btrfs_header_generation(parent), 3700 *level - 1, 1); 3701 BUG_ON(ret); 3702 path->slots[*level]++; 3703 btrfs_tree_unlock(next); 3704 free_extent_buffer(next); 3705 continue; 3706 } 3707 3708 *level = btrfs_header_level(next); 3709 path->nodes[*level] = next; 3710 path->slots[*level] = 0; 3711 path->locks[*level] = 1; 3712 cond_resched(); 3713 } 3714 out: 3715 parent = path->nodes[*level + 1]; 3716 bytenr = path->nodes[*level]->start; 3717 blocksize = path->nodes[*level]->len; 3718 3719 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 3720 parent->start, btrfs_header_owner(parent), 3721 btrfs_header_generation(parent), *level, 1); 3722 BUG_ON(ret); 3723 3724 if (path->locks[*level]) { 3725 btrfs_tree_unlock(path->nodes[*level]); 3726 path->locks[*level] = 0; 3727 } 3728 free_extent_buffer(path->nodes[*level]); 3729 path->nodes[*level] = NULL; 3730 *level += 1; 3731 cond_resched(); 3732 return 0; 3733 } 3734 3735 /* 3736 * helper for dropping snapshots. This walks back up the tree in the path 3737 * to find the first node higher up where we haven't yet gone through 3738 * all the slots 3739 */ 3740 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 3741 struct btrfs_root *root, 3742 struct btrfs_path *path, 3743 int *level, int max_level) 3744 { 3745 u64 root_owner; 3746 u64 root_gen; 3747 struct btrfs_root_item *root_item = &root->root_item; 3748 int i; 3749 int slot; 3750 int ret; 3751 3752 for (i = *level; i < max_level && path->nodes[i]; i++) { 3753 slot = path->slots[i]; 3754 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 3755 struct extent_buffer *node; 3756 struct btrfs_disk_key disk_key; 3757 node = path->nodes[i]; 3758 path->slots[i]++; 3759 *level = i; 3760 WARN_ON(*level == 0); 3761 btrfs_node_key(node, &disk_key, path->slots[i]); 3762 memcpy(&root_item->drop_progress, 3763 &disk_key, sizeof(disk_key)); 3764 root_item->drop_level = i; 3765 return 0; 3766 } else { 3767 struct extent_buffer *parent; 3768 if (path->nodes[*level] == root->node) 3769 parent = path->nodes[*level]; 3770 else 3771 parent = path->nodes[*level + 1]; 3772 3773 root_owner = btrfs_header_owner(parent); 3774 root_gen = btrfs_header_generation(parent); 3775 3776 clean_tree_block(trans, root, path->nodes[*level]); 3777 ret = btrfs_free_extent(trans, root, 3778 path->nodes[*level]->start, 3779 path->nodes[*level]->len, 3780 parent->start, root_owner, 3781 root_gen, *level, 1); 3782 BUG_ON(ret); 3783 if (path->locks[*level]) { 3784 btrfs_tree_unlock(path->nodes[*level]); 3785 path->locks[*level] = 0; 3786 } 3787 free_extent_buffer(path->nodes[*level]); 3788 path->nodes[*level] = NULL; 3789 *level = i + 1; 3790 } 3791 } 3792 return 1; 3793 } 3794 3795 /* 3796 * drop the reference count on the tree rooted at 'snap'. This traverses 3797 * the tree freeing any blocks that have a ref count of zero after being 3798 * decremented. 3799 */ 3800 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 3801 *root) 3802 { 3803 int ret = 0; 3804 int wret; 3805 int level; 3806 struct btrfs_path *path; 3807 int i; 3808 int orig_level; 3809 struct btrfs_root_item *root_item = &root->root_item; 3810 3811 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); 3812 path = btrfs_alloc_path(); 3813 BUG_ON(!path); 3814 3815 level = btrfs_header_level(root->node); 3816 orig_level = level; 3817 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 3818 path->nodes[level] = root->node; 3819 extent_buffer_get(root->node); 3820 path->slots[level] = 0; 3821 } else { 3822 struct btrfs_key key; 3823 struct btrfs_disk_key found_key; 3824 struct extent_buffer *node; 3825 3826 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 3827 level = root_item->drop_level; 3828 path->lowest_level = level; 3829 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3830 if (wret < 0) { 3831 ret = wret; 3832 goto out; 3833 } 3834 node = path->nodes[level]; 3835 btrfs_node_key(node, &found_key, path->slots[level]); 3836 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 3837 sizeof(found_key))); 3838 /* 3839 * unlock our path, this is safe because only this 3840 * function is allowed to delete this snapshot 3841 */ 3842 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 3843 if (path->nodes[i] && path->locks[i]) { 3844 path->locks[i] = 0; 3845 btrfs_tree_unlock(path->nodes[i]); 3846 } 3847 } 3848 } 3849 while (1) { 3850 wret = walk_down_tree(trans, root, path, &level); 3851 if (wret > 0) 3852 break; 3853 if (wret < 0) 3854 ret = wret; 3855 3856 wret = walk_up_tree(trans, root, path, &level, 3857 BTRFS_MAX_LEVEL); 3858 if (wret > 0) 3859 break; 3860 if (wret < 0) 3861 ret = wret; 3862 if (trans->transaction->in_commit) { 3863 ret = -EAGAIN; 3864 break; 3865 } 3866 atomic_inc(&root->fs_info->throttle_gen); 3867 wake_up(&root->fs_info->transaction_throttle); 3868 } 3869 for (i = 0; i <= orig_level; i++) { 3870 if (path->nodes[i]) { 3871 free_extent_buffer(path->nodes[i]); 3872 path->nodes[i] = NULL; 3873 } 3874 } 3875 out: 3876 btrfs_free_path(path); 3877 return ret; 3878 } 3879 3880 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 3881 struct btrfs_root *root, 3882 struct extent_buffer *node, 3883 struct extent_buffer *parent) 3884 { 3885 struct btrfs_path *path; 3886 int level; 3887 int parent_level; 3888 int ret = 0; 3889 int wret; 3890 3891 path = btrfs_alloc_path(); 3892 BUG_ON(!path); 3893 3894 BUG_ON(!btrfs_tree_locked(parent)); 3895 parent_level = btrfs_header_level(parent); 3896 extent_buffer_get(parent); 3897 path->nodes[parent_level] = parent; 3898 path->slots[parent_level] = btrfs_header_nritems(parent); 3899 3900 BUG_ON(!btrfs_tree_locked(node)); 3901 level = btrfs_header_level(node); 3902 extent_buffer_get(node); 3903 path->nodes[level] = node; 3904 path->slots[level] = 0; 3905 3906 while (1) { 3907 wret = walk_down_subtree(trans, root, path, &level); 3908 if (wret < 0) 3909 ret = wret; 3910 if (wret != 0) 3911 break; 3912 3913 wret = walk_up_tree(trans, root, path, &level, parent_level); 3914 if (wret < 0) 3915 ret = wret; 3916 if (wret != 0) 3917 break; 3918 } 3919 3920 btrfs_free_path(path); 3921 return ret; 3922 } 3923 3924 static unsigned long calc_ra(unsigned long start, unsigned long last, 3925 unsigned long nr) 3926 { 3927 return min(last, start + nr - 1); 3928 } 3929 3930 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 3931 u64 len) 3932 { 3933 u64 page_start; 3934 u64 page_end; 3935 unsigned long first_index; 3936 unsigned long last_index; 3937 unsigned long i; 3938 struct page *page; 3939 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3940 struct file_ra_state *ra; 3941 struct btrfs_ordered_extent *ordered; 3942 unsigned int total_read = 0; 3943 unsigned int total_dirty = 0; 3944 int ret = 0; 3945 3946 ra = kzalloc(sizeof(*ra), GFP_NOFS); 3947 3948 mutex_lock(&inode->i_mutex); 3949 first_index = start >> PAGE_CACHE_SHIFT; 3950 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 3951 3952 /* make sure the dirty trick played by the caller work */ 3953 ret = invalidate_inode_pages2_range(inode->i_mapping, 3954 first_index, last_index); 3955 if (ret) 3956 goto out_unlock; 3957 3958 file_ra_state_init(ra, inode->i_mapping); 3959 3960 for (i = first_index ; i <= last_index; i++) { 3961 if (total_read % ra->ra_pages == 0) { 3962 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 3963 calc_ra(i, last_index, ra->ra_pages)); 3964 } 3965 total_read++; 3966 again: 3967 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 3968 BUG_ON(1); 3969 page = grab_cache_page(inode->i_mapping, i); 3970 if (!page) { 3971 ret = -ENOMEM; 3972 goto out_unlock; 3973 } 3974 if (!PageUptodate(page)) { 3975 btrfs_readpage(NULL, page); 3976 lock_page(page); 3977 if (!PageUptodate(page)) { 3978 unlock_page(page); 3979 page_cache_release(page); 3980 ret = -EIO; 3981 goto out_unlock; 3982 } 3983 } 3984 wait_on_page_writeback(page); 3985 3986 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 3987 page_end = page_start + PAGE_CACHE_SIZE - 1; 3988 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 3989 3990 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3991 if (ordered) { 3992 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3993 unlock_page(page); 3994 page_cache_release(page); 3995 btrfs_start_ordered_extent(inode, ordered, 1); 3996 btrfs_put_ordered_extent(ordered); 3997 goto again; 3998 } 3999 set_page_extent_mapped(page); 4000 4001 if (i == first_index) 4002 set_extent_bits(io_tree, page_start, page_end, 4003 EXTENT_BOUNDARY, GFP_NOFS); 4004 btrfs_set_extent_delalloc(inode, page_start, page_end); 4005 4006 set_page_dirty(page); 4007 total_dirty++; 4008 4009 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4010 unlock_page(page); 4011 page_cache_release(page); 4012 } 4013 4014 out_unlock: 4015 kfree(ra); 4016 mutex_unlock(&inode->i_mutex); 4017 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 4018 return ret; 4019 } 4020 4021 static noinline int relocate_data_extent(struct inode *reloc_inode, 4022 struct btrfs_key *extent_key, 4023 u64 offset) 4024 { 4025 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 4026 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 4027 struct extent_map *em; 4028 u64 start = extent_key->objectid - offset; 4029 u64 end = start + extent_key->offset - 1; 4030 4031 em = alloc_extent_map(GFP_NOFS); 4032 BUG_ON(!em || IS_ERR(em)); 4033 4034 em->start = start; 4035 em->len = extent_key->offset; 4036 em->block_len = extent_key->offset; 4037 em->block_start = extent_key->objectid; 4038 em->bdev = root->fs_info->fs_devices->latest_bdev; 4039 set_bit(EXTENT_FLAG_PINNED, &em->flags); 4040 4041 /* setup extent map to cheat btrfs_readpage */ 4042 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 4043 while (1) { 4044 int ret; 4045 spin_lock(&em_tree->lock); 4046 ret = add_extent_mapping(em_tree, em); 4047 spin_unlock(&em_tree->lock); 4048 if (ret != -EEXIST) { 4049 free_extent_map(em); 4050 break; 4051 } 4052 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 4053 } 4054 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 4055 4056 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 4057 } 4058 4059 struct btrfs_ref_path { 4060 u64 extent_start; 4061 u64 nodes[BTRFS_MAX_LEVEL]; 4062 u64 root_objectid; 4063 u64 root_generation; 4064 u64 owner_objectid; 4065 u32 num_refs; 4066 int lowest_level; 4067 int current_level; 4068 int shared_level; 4069 4070 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 4071 u64 new_nodes[BTRFS_MAX_LEVEL]; 4072 }; 4073 4074 struct disk_extent { 4075 u64 ram_bytes; 4076 u64 disk_bytenr; 4077 u64 disk_num_bytes; 4078 u64 offset; 4079 u64 num_bytes; 4080 u8 compression; 4081 u8 encryption; 4082 u16 other_encoding; 4083 }; 4084 4085 static int is_cowonly_root(u64 root_objectid) 4086 { 4087 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 4088 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 4089 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 4090 root_objectid == BTRFS_DEV_TREE_OBJECTID || 4091 root_objectid == BTRFS_TREE_LOG_OBJECTID || 4092 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 4093 return 1; 4094 return 0; 4095 } 4096 4097 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 4098 struct btrfs_root *extent_root, 4099 struct btrfs_ref_path *ref_path, 4100 int first_time) 4101 { 4102 struct extent_buffer *leaf; 4103 struct btrfs_path *path; 4104 struct btrfs_extent_ref *ref; 4105 struct btrfs_key key; 4106 struct btrfs_key found_key; 4107 u64 bytenr; 4108 u32 nritems; 4109 int level; 4110 int ret = 1; 4111 4112 path = btrfs_alloc_path(); 4113 if (!path) 4114 return -ENOMEM; 4115 4116 if (first_time) { 4117 ref_path->lowest_level = -1; 4118 ref_path->current_level = -1; 4119 ref_path->shared_level = -1; 4120 goto walk_up; 4121 } 4122 walk_down: 4123 level = ref_path->current_level - 1; 4124 while (level >= -1) { 4125 u64 parent; 4126 if (level < ref_path->lowest_level) 4127 break; 4128 4129 if (level >= 0) 4130 bytenr = ref_path->nodes[level]; 4131 else 4132 bytenr = ref_path->extent_start; 4133 BUG_ON(bytenr == 0); 4134 4135 parent = ref_path->nodes[level + 1]; 4136 ref_path->nodes[level + 1] = 0; 4137 ref_path->current_level = level; 4138 BUG_ON(parent == 0); 4139 4140 key.objectid = bytenr; 4141 key.offset = parent + 1; 4142 key.type = BTRFS_EXTENT_REF_KEY; 4143 4144 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 4145 if (ret < 0) 4146 goto out; 4147 BUG_ON(ret == 0); 4148 4149 leaf = path->nodes[0]; 4150 nritems = btrfs_header_nritems(leaf); 4151 if (path->slots[0] >= nritems) { 4152 ret = btrfs_next_leaf(extent_root, path); 4153 if (ret < 0) 4154 goto out; 4155 if (ret > 0) 4156 goto next; 4157 leaf = path->nodes[0]; 4158 } 4159 4160 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4161 if (found_key.objectid == bytenr && 4162 found_key.type == BTRFS_EXTENT_REF_KEY) { 4163 if (level < ref_path->shared_level) 4164 ref_path->shared_level = level; 4165 goto found; 4166 } 4167 next: 4168 level--; 4169 btrfs_release_path(extent_root, path); 4170 cond_resched(); 4171 } 4172 /* reached lowest level */ 4173 ret = 1; 4174 goto out; 4175 walk_up: 4176 level = ref_path->current_level; 4177 while (level < BTRFS_MAX_LEVEL - 1) { 4178 u64 ref_objectid; 4179 4180 if (level >= 0) 4181 bytenr = ref_path->nodes[level]; 4182 else 4183 bytenr = ref_path->extent_start; 4184 4185 BUG_ON(bytenr == 0); 4186 4187 key.objectid = bytenr; 4188 key.offset = 0; 4189 key.type = BTRFS_EXTENT_REF_KEY; 4190 4191 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 4192 if (ret < 0) 4193 goto out; 4194 4195 leaf = path->nodes[0]; 4196 nritems = btrfs_header_nritems(leaf); 4197 if (path->slots[0] >= nritems) { 4198 ret = btrfs_next_leaf(extent_root, path); 4199 if (ret < 0) 4200 goto out; 4201 if (ret > 0) { 4202 /* the extent was freed by someone */ 4203 if (ref_path->lowest_level == level) 4204 goto out; 4205 btrfs_release_path(extent_root, path); 4206 goto walk_down; 4207 } 4208 leaf = path->nodes[0]; 4209 } 4210 4211 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4212 if (found_key.objectid != bytenr || 4213 found_key.type != BTRFS_EXTENT_REF_KEY) { 4214 /* the extent was freed by someone */ 4215 if (ref_path->lowest_level == level) { 4216 ret = 1; 4217 goto out; 4218 } 4219 btrfs_release_path(extent_root, path); 4220 goto walk_down; 4221 } 4222 found: 4223 ref = btrfs_item_ptr(leaf, path->slots[0], 4224 struct btrfs_extent_ref); 4225 ref_objectid = btrfs_ref_objectid(leaf, ref); 4226 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 4227 if (first_time) { 4228 level = (int)ref_objectid; 4229 BUG_ON(level >= BTRFS_MAX_LEVEL); 4230 ref_path->lowest_level = level; 4231 ref_path->current_level = level; 4232 ref_path->nodes[level] = bytenr; 4233 } else { 4234 WARN_ON(ref_objectid != level); 4235 } 4236 } else { 4237 WARN_ON(level != -1); 4238 } 4239 first_time = 0; 4240 4241 if (ref_path->lowest_level == level) { 4242 ref_path->owner_objectid = ref_objectid; 4243 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 4244 } 4245 4246 /* 4247 * the block is tree root or the block isn't in reference 4248 * counted tree. 4249 */ 4250 if (found_key.objectid == found_key.offset || 4251 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 4252 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 4253 ref_path->root_generation = 4254 btrfs_ref_generation(leaf, ref); 4255 if (level < 0) { 4256 /* special reference from the tree log */ 4257 ref_path->nodes[0] = found_key.offset; 4258 ref_path->current_level = 0; 4259 } 4260 ret = 0; 4261 goto out; 4262 } 4263 4264 level++; 4265 BUG_ON(ref_path->nodes[level] != 0); 4266 ref_path->nodes[level] = found_key.offset; 4267 ref_path->current_level = level; 4268 4269 /* 4270 * the reference was created in the running transaction, 4271 * no need to continue walking up. 4272 */ 4273 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 4274 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 4275 ref_path->root_generation = 4276 btrfs_ref_generation(leaf, ref); 4277 ret = 0; 4278 goto out; 4279 } 4280 4281 btrfs_release_path(extent_root, path); 4282 cond_resched(); 4283 } 4284 /* reached max tree level, but no tree root found. */ 4285 BUG(); 4286 out: 4287 btrfs_free_path(path); 4288 return ret; 4289 } 4290 4291 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 4292 struct btrfs_root *extent_root, 4293 struct btrfs_ref_path *ref_path, 4294 u64 extent_start) 4295 { 4296 memset(ref_path, 0, sizeof(*ref_path)); 4297 ref_path->extent_start = extent_start; 4298 4299 return __next_ref_path(trans, extent_root, ref_path, 1); 4300 } 4301 4302 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 4303 struct btrfs_root *extent_root, 4304 struct btrfs_ref_path *ref_path) 4305 { 4306 return __next_ref_path(trans, extent_root, ref_path, 0); 4307 } 4308 4309 static noinline int get_new_locations(struct inode *reloc_inode, 4310 struct btrfs_key *extent_key, 4311 u64 offset, int no_fragment, 4312 struct disk_extent **extents, 4313 int *nr_extents) 4314 { 4315 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 4316 struct btrfs_path *path; 4317 struct btrfs_file_extent_item *fi; 4318 struct extent_buffer *leaf; 4319 struct disk_extent *exts = *extents; 4320 struct btrfs_key found_key; 4321 u64 cur_pos; 4322 u64 last_byte; 4323 u32 nritems; 4324 int nr = 0; 4325 int max = *nr_extents; 4326 int ret; 4327 4328 WARN_ON(!no_fragment && *extents); 4329 if (!exts) { 4330 max = 1; 4331 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 4332 if (!exts) 4333 return -ENOMEM; 4334 } 4335 4336 path = btrfs_alloc_path(); 4337 BUG_ON(!path); 4338 4339 cur_pos = extent_key->objectid - offset; 4340 last_byte = extent_key->objectid + extent_key->offset; 4341 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 4342 cur_pos, 0); 4343 if (ret < 0) 4344 goto out; 4345 if (ret > 0) { 4346 ret = -ENOENT; 4347 goto out; 4348 } 4349 4350 while (1) { 4351 leaf = path->nodes[0]; 4352 nritems = btrfs_header_nritems(leaf); 4353 if (path->slots[0] >= nritems) { 4354 ret = btrfs_next_leaf(root, path); 4355 if (ret < 0) 4356 goto out; 4357 if (ret > 0) 4358 break; 4359 leaf = path->nodes[0]; 4360 } 4361 4362 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4363 if (found_key.offset != cur_pos || 4364 found_key.type != BTRFS_EXTENT_DATA_KEY || 4365 found_key.objectid != reloc_inode->i_ino) 4366 break; 4367 4368 fi = btrfs_item_ptr(leaf, path->slots[0], 4369 struct btrfs_file_extent_item); 4370 if (btrfs_file_extent_type(leaf, fi) != 4371 BTRFS_FILE_EXTENT_REG || 4372 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 4373 break; 4374 4375 if (nr == max) { 4376 struct disk_extent *old = exts; 4377 max *= 2; 4378 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 4379 memcpy(exts, old, sizeof(*exts) * nr); 4380 if (old != *extents) 4381 kfree(old); 4382 } 4383 4384 exts[nr].disk_bytenr = 4385 btrfs_file_extent_disk_bytenr(leaf, fi); 4386 exts[nr].disk_num_bytes = 4387 btrfs_file_extent_disk_num_bytes(leaf, fi); 4388 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 4389 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4390 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 4391 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 4392 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 4393 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 4394 fi); 4395 BUG_ON(exts[nr].offset > 0); 4396 BUG_ON(exts[nr].compression || exts[nr].encryption); 4397 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 4398 4399 cur_pos += exts[nr].num_bytes; 4400 nr++; 4401 4402 if (cur_pos + offset >= last_byte) 4403 break; 4404 4405 if (no_fragment) { 4406 ret = 1; 4407 goto out; 4408 } 4409 path->slots[0]++; 4410 } 4411 4412 BUG_ON(cur_pos + offset > last_byte); 4413 if (cur_pos + offset < last_byte) { 4414 ret = -ENOENT; 4415 goto out; 4416 } 4417 ret = 0; 4418 out: 4419 btrfs_free_path(path); 4420 if (ret) { 4421 if (exts != *extents) 4422 kfree(exts); 4423 } else { 4424 *extents = exts; 4425 *nr_extents = nr; 4426 } 4427 return ret; 4428 } 4429 4430 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 4431 struct btrfs_root *root, 4432 struct btrfs_path *path, 4433 struct btrfs_key *extent_key, 4434 struct btrfs_key *leaf_key, 4435 struct btrfs_ref_path *ref_path, 4436 struct disk_extent *new_extents, 4437 int nr_extents) 4438 { 4439 struct extent_buffer *leaf; 4440 struct btrfs_file_extent_item *fi; 4441 struct inode *inode = NULL; 4442 struct btrfs_key key; 4443 u64 lock_start = 0; 4444 u64 lock_end = 0; 4445 u64 num_bytes; 4446 u64 ext_offset; 4447 u64 first_pos; 4448 u32 nritems; 4449 int nr_scaned = 0; 4450 int extent_locked = 0; 4451 int extent_type; 4452 int ret; 4453 4454 memcpy(&key, leaf_key, sizeof(key)); 4455 first_pos = INT_LIMIT(loff_t) - extent_key->offset; 4456 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 4457 if (key.objectid < ref_path->owner_objectid || 4458 (key.objectid == ref_path->owner_objectid && 4459 key.type < BTRFS_EXTENT_DATA_KEY)) { 4460 key.objectid = ref_path->owner_objectid; 4461 key.type = BTRFS_EXTENT_DATA_KEY; 4462 key.offset = 0; 4463 } 4464 } 4465 4466 while (1) { 4467 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 4468 if (ret < 0) 4469 goto out; 4470 4471 leaf = path->nodes[0]; 4472 nritems = btrfs_header_nritems(leaf); 4473 next: 4474 if (extent_locked && ret > 0) { 4475 /* 4476 * the file extent item was modified by someone 4477 * before the extent got locked. 4478 */ 4479 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 4480 lock_end, GFP_NOFS); 4481 extent_locked = 0; 4482 } 4483 4484 if (path->slots[0] >= nritems) { 4485 if (++nr_scaned > 2) 4486 break; 4487 4488 BUG_ON(extent_locked); 4489 ret = btrfs_next_leaf(root, path); 4490 if (ret < 0) 4491 goto out; 4492 if (ret > 0) 4493 break; 4494 leaf = path->nodes[0]; 4495 nritems = btrfs_header_nritems(leaf); 4496 } 4497 4498 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4499 4500 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 4501 if ((key.objectid > ref_path->owner_objectid) || 4502 (key.objectid == ref_path->owner_objectid && 4503 key.type > BTRFS_EXTENT_DATA_KEY) || 4504 (key.offset >= first_pos + extent_key->offset)) 4505 break; 4506 } 4507 4508 if (inode && key.objectid != inode->i_ino) { 4509 BUG_ON(extent_locked); 4510 btrfs_release_path(root, path); 4511 mutex_unlock(&inode->i_mutex); 4512 iput(inode); 4513 inode = NULL; 4514 continue; 4515 } 4516 4517 if (key.type != BTRFS_EXTENT_DATA_KEY) { 4518 path->slots[0]++; 4519 ret = 1; 4520 goto next; 4521 } 4522 fi = btrfs_item_ptr(leaf, path->slots[0], 4523 struct btrfs_file_extent_item); 4524 extent_type = btrfs_file_extent_type(leaf, fi); 4525 if ((extent_type != BTRFS_FILE_EXTENT_REG && 4526 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 4527 (btrfs_file_extent_disk_bytenr(leaf, fi) != 4528 extent_key->objectid)) { 4529 path->slots[0]++; 4530 ret = 1; 4531 goto next; 4532 } 4533 4534 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4535 ext_offset = btrfs_file_extent_offset(leaf, fi); 4536 4537 if (first_pos > key.offset - ext_offset) 4538 first_pos = key.offset - ext_offset; 4539 4540 if (!extent_locked) { 4541 lock_start = key.offset; 4542 lock_end = lock_start + num_bytes - 1; 4543 } else { 4544 if (lock_start > key.offset || 4545 lock_end + 1 < key.offset + num_bytes) { 4546 unlock_extent(&BTRFS_I(inode)->io_tree, 4547 lock_start, lock_end, GFP_NOFS); 4548 extent_locked = 0; 4549 } 4550 } 4551 4552 if (!inode) { 4553 btrfs_release_path(root, path); 4554 4555 inode = btrfs_iget_locked(root->fs_info->sb, 4556 key.objectid, root); 4557 if (inode->i_state & I_NEW) { 4558 BTRFS_I(inode)->root = root; 4559 BTRFS_I(inode)->location.objectid = 4560 key.objectid; 4561 BTRFS_I(inode)->location.type = 4562 BTRFS_INODE_ITEM_KEY; 4563 BTRFS_I(inode)->location.offset = 0; 4564 btrfs_read_locked_inode(inode); 4565 unlock_new_inode(inode); 4566 } 4567 /* 4568 * some code call btrfs_commit_transaction while 4569 * holding the i_mutex, so we can't use mutex_lock 4570 * here. 4571 */ 4572 if (is_bad_inode(inode) || 4573 !mutex_trylock(&inode->i_mutex)) { 4574 iput(inode); 4575 inode = NULL; 4576 key.offset = (u64)-1; 4577 goto skip; 4578 } 4579 } 4580 4581 if (!extent_locked) { 4582 struct btrfs_ordered_extent *ordered; 4583 4584 btrfs_release_path(root, path); 4585 4586 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 4587 lock_end, GFP_NOFS); 4588 ordered = btrfs_lookup_first_ordered_extent(inode, 4589 lock_end); 4590 if (ordered && 4591 ordered->file_offset <= lock_end && 4592 ordered->file_offset + ordered->len > lock_start) { 4593 unlock_extent(&BTRFS_I(inode)->io_tree, 4594 lock_start, lock_end, GFP_NOFS); 4595 btrfs_start_ordered_extent(inode, ordered, 1); 4596 btrfs_put_ordered_extent(ordered); 4597 key.offset += num_bytes; 4598 goto skip; 4599 } 4600 if (ordered) 4601 btrfs_put_ordered_extent(ordered); 4602 4603 extent_locked = 1; 4604 continue; 4605 } 4606 4607 if (nr_extents == 1) { 4608 /* update extent pointer in place */ 4609 btrfs_set_file_extent_disk_bytenr(leaf, fi, 4610 new_extents[0].disk_bytenr); 4611 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 4612 new_extents[0].disk_num_bytes); 4613 btrfs_mark_buffer_dirty(leaf); 4614 4615 btrfs_drop_extent_cache(inode, key.offset, 4616 key.offset + num_bytes - 1, 0); 4617 4618 ret = btrfs_inc_extent_ref(trans, root, 4619 new_extents[0].disk_bytenr, 4620 new_extents[0].disk_num_bytes, 4621 leaf->start, 4622 root->root_key.objectid, 4623 trans->transid, 4624 key.objectid); 4625 BUG_ON(ret); 4626 4627 ret = btrfs_free_extent(trans, root, 4628 extent_key->objectid, 4629 extent_key->offset, 4630 leaf->start, 4631 btrfs_header_owner(leaf), 4632 btrfs_header_generation(leaf), 4633 key.objectid, 0); 4634 BUG_ON(ret); 4635 4636 btrfs_release_path(root, path); 4637 key.offset += num_bytes; 4638 } else { 4639 BUG_ON(1); 4640 #if 0 4641 u64 alloc_hint; 4642 u64 extent_len; 4643 int i; 4644 /* 4645 * drop old extent pointer at first, then insert the 4646 * new pointers one bye one 4647 */ 4648 btrfs_release_path(root, path); 4649 ret = btrfs_drop_extents(trans, root, inode, key.offset, 4650 key.offset + num_bytes, 4651 key.offset, &alloc_hint); 4652 BUG_ON(ret); 4653 4654 for (i = 0; i < nr_extents; i++) { 4655 if (ext_offset >= new_extents[i].num_bytes) { 4656 ext_offset -= new_extents[i].num_bytes; 4657 continue; 4658 } 4659 extent_len = min(new_extents[i].num_bytes - 4660 ext_offset, num_bytes); 4661 4662 ret = btrfs_insert_empty_item(trans, root, 4663 path, &key, 4664 sizeof(*fi)); 4665 BUG_ON(ret); 4666 4667 leaf = path->nodes[0]; 4668 fi = btrfs_item_ptr(leaf, path->slots[0], 4669 struct btrfs_file_extent_item); 4670 btrfs_set_file_extent_generation(leaf, fi, 4671 trans->transid); 4672 btrfs_set_file_extent_type(leaf, fi, 4673 BTRFS_FILE_EXTENT_REG); 4674 btrfs_set_file_extent_disk_bytenr(leaf, fi, 4675 new_extents[i].disk_bytenr); 4676 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 4677 new_extents[i].disk_num_bytes); 4678 btrfs_set_file_extent_ram_bytes(leaf, fi, 4679 new_extents[i].ram_bytes); 4680 4681 btrfs_set_file_extent_compression(leaf, fi, 4682 new_extents[i].compression); 4683 btrfs_set_file_extent_encryption(leaf, fi, 4684 new_extents[i].encryption); 4685 btrfs_set_file_extent_other_encoding(leaf, fi, 4686 new_extents[i].other_encoding); 4687 4688 btrfs_set_file_extent_num_bytes(leaf, fi, 4689 extent_len); 4690 ext_offset += new_extents[i].offset; 4691 btrfs_set_file_extent_offset(leaf, fi, 4692 ext_offset); 4693 btrfs_mark_buffer_dirty(leaf); 4694 4695 btrfs_drop_extent_cache(inode, key.offset, 4696 key.offset + extent_len - 1, 0); 4697 4698 ret = btrfs_inc_extent_ref(trans, root, 4699 new_extents[i].disk_bytenr, 4700 new_extents[i].disk_num_bytes, 4701 leaf->start, 4702 root->root_key.objectid, 4703 trans->transid, key.objectid); 4704 BUG_ON(ret); 4705 btrfs_release_path(root, path); 4706 4707 inode_add_bytes(inode, extent_len); 4708 4709 ext_offset = 0; 4710 num_bytes -= extent_len; 4711 key.offset += extent_len; 4712 4713 if (num_bytes == 0) 4714 break; 4715 } 4716 BUG_ON(i >= nr_extents); 4717 #endif 4718 } 4719 4720 if (extent_locked) { 4721 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 4722 lock_end, GFP_NOFS); 4723 extent_locked = 0; 4724 } 4725 skip: 4726 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 4727 key.offset >= first_pos + extent_key->offset) 4728 break; 4729 4730 cond_resched(); 4731 } 4732 ret = 0; 4733 out: 4734 btrfs_release_path(root, path); 4735 if (inode) { 4736 mutex_unlock(&inode->i_mutex); 4737 if (extent_locked) { 4738 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 4739 lock_end, GFP_NOFS); 4740 } 4741 iput(inode); 4742 } 4743 return ret; 4744 } 4745 4746 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 4747 struct btrfs_root *root, 4748 struct extent_buffer *buf, u64 orig_start) 4749 { 4750 int level; 4751 int ret; 4752 4753 BUG_ON(btrfs_header_generation(buf) != trans->transid); 4754 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 4755 4756 level = btrfs_header_level(buf); 4757 if (level == 0) { 4758 struct btrfs_leaf_ref *ref; 4759 struct btrfs_leaf_ref *orig_ref; 4760 4761 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 4762 if (!orig_ref) 4763 return -ENOENT; 4764 4765 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 4766 if (!ref) { 4767 btrfs_free_leaf_ref(root, orig_ref); 4768 return -ENOMEM; 4769 } 4770 4771 ref->nritems = orig_ref->nritems; 4772 memcpy(ref->extents, orig_ref->extents, 4773 sizeof(ref->extents[0]) * ref->nritems); 4774 4775 btrfs_free_leaf_ref(root, orig_ref); 4776 4777 ref->root_gen = trans->transid; 4778 ref->bytenr = buf->start; 4779 ref->owner = btrfs_header_owner(buf); 4780 ref->generation = btrfs_header_generation(buf); 4781 ret = btrfs_add_leaf_ref(root, ref, 0); 4782 WARN_ON(ret); 4783 btrfs_free_leaf_ref(root, ref); 4784 } 4785 return 0; 4786 } 4787 4788 static noinline int invalidate_extent_cache(struct btrfs_root *root, 4789 struct extent_buffer *leaf, 4790 struct btrfs_block_group_cache *group, 4791 struct btrfs_root *target_root) 4792 { 4793 struct btrfs_key key; 4794 struct inode *inode = NULL; 4795 struct btrfs_file_extent_item *fi; 4796 u64 num_bytes; 4797 u64 skip_objectid = 0; 4798 u32 nritems; 4799 u32 i; 4800 4801 nritems = btrfs_header_nritems(leaf); 4802 for (i = 0; i < nritems; i++) { 4803 btrfs_item_key_to_cpu(leaf, &key, i); 4804 if (key.objectid == skip_objectid || 4805 key.type != BTRFS_EXTENT_DATA_KEY) 4806 continue; 4807 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 4808 if (btrfs_file_extent_type(leaf, fi) == 4809 BTRFS_FILE_EXTENT_INLINE) 4810 continue; 4811 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 4812 continue; 4813 if (!inode || inode->i_ino != key.objectid) { 4814 iput(inode); 4815 inode = btrfs_ilookup(target_root->fs_info->sb, 4816 key.objectid, target_root, 1); 4817 } 4818 if (!inode) { 4819 skip_objectid = key.objectid; 4820 continue; 4821 } 4822 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4823 4824 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 4825 key.offset + num_bytes - 1, GFP_NOFS); 4826 btrfs_drop_extent_cache(inode, key.offset, 4827 key.offset + num_bytes - 1, 1); 4828 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 4829 key.offset + num_bytes - 1, GFP_NOFS); 4830 cond_resched(); 4831 } 4832 iput(inode); 4833 return 0; 4834 } 4835 4836 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 4837 struct btrfs_root *root, 4838 struct extent_buffer *leaf, 4839 struct btrfs_block_group_cache *group, 4840 struct inode *reloc_inode) 4841 { 4842 struct btrfs_key key; 4843 struct btrfs_key extent_key; 4844 struct btrfs_file_extent_item *fi; 4845 struct btrfs_leaf_ref *ref; 4846 struct disk_extent *new_extent; 4847 u64 bytenr; 4848 u64 num_bytes; 4849 u32 nritems; 4850 u32 i; 4851 int ext_index; 4852 int nr_extent; 4853 int ret; 4854 4855 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 4856 BUG_ON(!new_extent); 4857 4858 ref = btrfs_lookup_leaf_ref(root, leaf->start); 4859 BUG_ON(!ref); 4860 4861 ext_index = -1; 4862 nritems = btrfs_header_nritems(leaf); 4863 for (i = 0; i < nritems; i++) { 4864 btrfs_item_key_to_cpu(leaf, &key, i); 4865 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 4866 continue; 4867 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 4868 if (btrfs_file_extent_type(leaf, fi) == 4869 BTRFS_FILE_EXTENT_INLINE) 4870 continue; 4871 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 4872 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 4873 if (bytenr == 0) 4874 continue; 4875 4876 ext_index++; 4877 if (bytenr >= group->key.objectid + group->key.offset || 4878 bytenr + num_bytes <= group->key.objectid) 4879 continue; 4880 4881 extent_key.objectid = bytenr; 4882 extent_key.offset = num_bytes; 4883 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 4884 nr_extent = 1; 4885 ret = get_new_locations(reloc_inode, &extent_key, 4886 group->key.objectid, 1, 4887 &new_extent, &nr_extent); 4888 if (ret > 0) 4889 continue; 4890 BUG_ON(ret < 0); 4891 4892 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 4893 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 4894 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 4895 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 4896 4897 btrfs_set_file_extent_disk_bytenr(leaf, fi, 4898 new_extent->disk_bytenr); 4899 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 4900 new_extent->disk_num_bytes); 4901 btrfs_mark_buffer_dirty(leaf); 4902 4903 ret = btrfs_inc_extent_ref(trans, root, 4904 new_extent->disk_bytenr, 4905 new_extent->disk_num_bytes, 4906 leaf->start, 4907 root->root_key.objectid, 4908 trans->transid, key.objectid); 4909 BUG_ON(ret); 4910 ret = btrfs_free_extent(trans, root, 4911 bytenr, num_bytes, leaf->start, 4912 btrfs_header_owner(leaf), 4913 btrfs_header_generation(leaf), 4914 key.objectid, 0); 4915 BUG_ON(ret); 4916 cond_resched(); 4917 } 4918 kfree(new_extent); 4919 BUG_ON(ext_index + 1 != ref->nritems); 4920 btrfs_free_leaf_ref(root, ref); 4921 return 0; 4922 } 4923 4924 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 4925 struct btrfs_root *root) 4926 { 4927 struct btrfs_root *reloc_root; 4928 int ret; 4929 4930 if (root->reloc_root) { 4931 reloc_root = root->reloc_root; 4932 root->reloc_root = NULL; 4933 list_add(&reloc_root->dead_list, 4934 &root->fs_info->dead_reloc_roots); 4935 4936 btrfs_set_root_bytenr(&reloc_root->root_item, 4937 reloc_root->node->start); 4938 btrfs_set_root_level(&root->root_item, 4939 btrfs_header_level(reloc_root->node)); 4940 memset(&reloc_root->root_item.drop_progress, 0, 4941 sizeof(struct btrfs_disk_key)); 4942 reloc_root->root_item.drop_level = 0; 4943 4944 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4945 &reloc_root->root_key, 4946 &reloc_root->root_item); 4947 BUG_ON(ret); 4948 } 4949 return 0; 4950 } 4951 4952 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 4953 { 4954 struct btrfs_trans_handle *trans; 4955 struct btrfs_root *reloc_root; 4956 struct btrfs_root *prev_root = NULL; 4957 struct list_head dead_roots; 4958 int ret; 4959 unsigned long nr; 4960 4961 INIT_LIST_HEAD(&dead_roots); 4962 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 4963 4964 while (!list_empty(&dead_roots)) { 4965 reloc_root = list_entry(dead_roots.prev, 4966 struct btrfs_root, dead_list); 4967 list_del_init(&reloc_root->dead_list); 4968 4969 BUG_ON(reloc_root->commit_root != NULL); 4970 while (1) { 4971 trans = btrfs_join_transaction(root, 1); 4972 BUG_ON(!trans); 4973 4974 mutex_lock(&root->fs_info->drop_mutex); 4975 ret = btrfs_drop_snapshot(trans, reloc_root); 4976 if (ret != -EAGAIN) 4977 break; 4978 mutex_unlock(&root->fs_info->drop_mutex); 4979 4980 nr = trans->blocks_used; 4981 ret = btrfs_end_transaction(trans, root); 4982 BUG_ON(ret); 4983 btrfs_btree_balance_dirty(root, nr); 4984 } 4985 4986 free_extent_buffer(reloc_root->node); 4987 4988 ret = btrfs_del_root(trans, root->fs_info->tree_root, 4989 &reloc_root->root_key); 4990 BUG_ON(ret); 4991 mutex_unlock(&root->fs_info->drop_mutex); 4992 4993 nr = trans->blocks_used; 4994 ret = btrfs_end_transaction(trans, root); 4995 BUG_ON(ret); 4996 btrfs_btree_balance_dirty(root, nr); 4997 4998 kfree(prev_root); 4999 prev_root = reloc_root; 5000 } 5001 if (prev_root) { 5002 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 5003 kfree(prev_root); 5004 } 5005 return 0; 5006 } 5007 5008 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 5009 { 5010 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 5011 return 0; 5012 } 5013 5014 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 5015 { 5016 struct btrfs_root *reloc_root; 5017 struct btrfs_trans_handle *trans; 5018 struct btrfs_key location; 5019 int found; 5020 int ret; 5021 5022 mutex_lock(&root->fs_info->tree_reloc_mutex); 5023 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 5024 BUG_ON(ret); 5025 found = !list_empty(&root->fs_info->dead_reloc_roots); 5026 mutex_unlock(&root->fs_info->tree_reloc_mutex); 5027 5028 if (found) { 5029 trans = btrfs_start_transaction(root, 1); 5030 BUG_ON(!trans); 5031 ret = btrfs_commit_transaction(trans, root); 5032 BUG_ON(ret); 5033 } 5034 5035 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 5036 location.offset = (u64)-1; 5037 location.type = BTRFS_ROOT_ITEM_KEY; 5038 5039 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 5040 BUG_ON(!reloc_root); 5041 btrfs_orphan_cleanup(reloc_root); 5042 return 0; 5043 } 5044 5045 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 5046 struct btrfs_root *root) 5047 { 5048 struct btrfs_root *reloc_root; 5049 struct extent_buffer *eb; 5050 struct btrfs_root_item *root_item; 5051 struct btrfs_key root_key; 5052 int ret; 5053 5054 BUG_ON(!root->ref_cows); 5055 if (root->reloc_root) 5056 return 0; 5057 5058 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 5059 BUG_ON(!root_item); 5060 5061 ret = btrfs_copy_root(trans, root, root->commit_root, 5062 &eb, BTRFS_TREE_RELOC_OBJECTID); 5063 BUG_ON(ret); 5064 5065 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 5066 root_key.offset = root->root_key.objectid; 5067 root_key.type = BTRFS_ROOT_ITEM_KEY; 5068 5069 memcpy(root_item, &root->root_item, sizeof(root_item)); 5070 btrfs_set_root_refs(root_item, 0); 5071 btrfs_set_root_bytenr(root_item, eb->start); 5072 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 5073 btrfs_set_root_generation(root_item, trans->transid); 5074 5075 btrfs_tree_unlock(eb); 5076 free_extent_buffer(eb); 5077 5078 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 5079 &root_key, root_item); 5080 BUG_ON(ret); 5081 kfree(root_item); 5082 5083 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 5084 &root_key); 5085 BUG_ON(!reloc_root); 5086 reloc_root->last_trans = trans->transid; 5087 reloc_root->commit_root = NULL; 5088 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 5089 5090 root->reloc_root = reloc_root; 5091 return 0; 5092 } 5093 5094 /* 5095 * Core function of space balance. 5096 * 5097 * The idea is using reloc trees to relocate tree blocks in reference 5098 * counted roots. There is one reloc tree for each subvol, and all 5099 * reloc trees share same root key objectid. Reloc trees are snapshots 5100 * of the latest committed roots of subvols (root->commit_root). 5101 * 5102 * To relocate a tree block referenced by a subvol, there are two steps. 5103 * COW the block through subvol's reloc tree, then update block pointer 5104 * in the subvol to point to the new block. Since all reloc trees share 5105 * same root key objectid, doing special handing for tree blocks owned 5106 * by them is easy. Once a tree block has been COWed in one reloc tree, 5107 * we can use the resulting new block directly when the same block is 5108 * required to COW again through other reloc trees. By this way, relocated 5109 * tree blocks are shared between reloc trees, so they are also shared 5110 * between subvols. 5111 */ 5112 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 5113 struct btrfs_root *root, 5114 struct btrfs_path *path, 5115 struct btrfs_key *first_key, 5116 struct btrfs_ref_path *ref_path, 5117 struct btrfs_block_group_cache *group, 5118 struct inode *reloc_inode) 5119 { 5120 struct btrfs_root *reloc_root; 5121 struct extent_buffer *eb = NULL; 5122 struct btrfs_key *keys; 5123 u64 *nodes; 5124 int level; 5125 int shared_level; 5126 int lowest_level = 0; 5127 int ret; 5128 5129 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 5130 lowest_level = ref_path->owner_objectid; 5131 5132 if (!root->ref_cows) { 5133 path->lowest_level = lowest_level; 5134 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 5135 BUG_ON(ret < 0); 5136 path->lowest_level = 0; 5137 btrfs_release_path(root, path); 5138 return 0; 5139 } 5140 5141 mutex_lock(&root->fs_info->tree_reloc_mutex); 5142 ret = init_reloc_tree(trans, root); 5143 BUG_ON(ret); 5144 reloc_root = root->reloc_root; 5145 5146 shared_level = ref_path->shared_level; 5147 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 5148 5149 keys = ref_path->node_keys; 5150 nodes = ref_path->new_nodes; 5151 memset(&keys[shared_level + 1], 0, 5152 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 5153 memset(&nodes[shared_level + 1], 0, 5154 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 5155 5156 if (nodes[lowest_level] == 0) { 5157 path->lowest_level = lowest_level; 5158 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 5159 0, 1); 5160 BUG_ON(ret); 5161 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 5162 eb = path->nodes[level]; 5163 if (!eb || eb == reloc_root->node) 5164 break; 5165 nodes[level] = eb->start; 5166 if (level == 0) 5167 btrfs_item_key_to_cpu(eb, &keys[level], 0); 5168 else 5169 btrfs_node_key_to_cpu(eb, &keys[level], 0); 5170 } 5171 if (nodes[0] && 5172 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5173 eb = path->nodes[0]; 5174 ret = replace_extents_in_leaf(trans, reloc_root, eb, 5175 group, reloc_inode); 5176 BUG_ON(ret); 5177 } 5178 btrfs_release_path(reloc_root, path); 5179 } else { 5180 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 5181 lowest_level); 5182 BUG_ON(ret); 5183 } 5184 5185 /* 5186 * replace tree blocks in the fs tree with tree blocks in 5187 * the reloc tree. 5188 */ 5189 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 5190 BUG_ON(ret < 0); 5191 5192 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5193 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 5194 0, 0); 5195 BUG_ON(ret); 5196 extent_buffer_get(path->nodes[0]); 5197 eb = path->nodes[0]; 5198 btrfs_release_path(reloc_root, path); 5199 ret = invalidate_extent_cache(reloc_root, eb, group, root); 5200 BUG_ON(ret); 5201 free_extent_buffer(eb); 5202 } 5203 5204 mutex_unlock(&root->fs_info->tree_reloc_mutex); 5205 path->lowest_level = 0; 5206 return 0; 5207 } 5208 5209 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 5210 struct btrfs_root *root, 5211 struct btrfs_path *path, 5212 struct btrfs_key *first_key, 5213 struct btrfs_ref_path *ref_path) 5214 { 5215 int ret; 5216 5217 ret = relocate_one_path(trans, root, path, first_key, 5218 ref_path, NULL, NULL); 5219 BUG_ON(ret); 5220 5221 if (root == root->fs_info->extent_root) 5222 btrfs_extent_post_op(trans, root); 5223 5224 return 0; 5225 } 5226 5227 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 5228 struct btrfs_root *extent_root, 5229 struct btrfs_path *path, 5230 struct btrfs_key *extent_key) 5231 { 5232 int ret; 5233 5234 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 5235 if (ret) 5236 goto out; 5237 ret = btrfs_del_item(trans, extent_root, path); 5238 out: 5239 btrfs_release_path(extent_root, path); 5240 return ret; 5241 } 5242 5243 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 5244 struct btrfs_ref_path *ref_path) 5245 { 5246 struct btrfs_key root_key; 5247 5248 root_key.objectid = ref_path->root_objectid; 5249 root_key.type = BTRFS_ROOT_ITEM_KEY; 5250 if (is_cowonly_root(ref_path->root_objectid)) 5251 root_key.offset = 0; 5252 else 5253 root_key.offset = (u64)-1; 5254 5255 return btrfs_read_fs_root_no_name(fs_info, &root_key); 5256 } 5257 5258 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 5259 struct btrfs_path *path, 5260 struct btrfs_key *extent_key, 5261 struct btrfs_block_group_cache *group, 5262 struct inode *reloc_inode, int pass) 5263 { 5264 struct btrfs_trans_handle *trans; 5265 struct btrfs_root *found_root; 5266 struct btrfs_ref_path *ref_path = NULL; 5267 struct disk_extent *new_extents = NULL; 5268 int nr_extents = 0; 5269 int loops; 5270 int ret; 5271 int level; 5272 struct btrfs_key first_key; 5273 u64 prev_block = 0; 5274 5275 5276 trans = btrfs_start_transaction(extent_root, 1); 5277 BUG_ON(!trans); 5278 5279 if (extent_key->objectid == 0) { 5280 ret = del_extent_zero(trans, extent_root, path, extent_key); 5281 goto out; 5282 } 5283 5284 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 5285 if (!ref_path) { 5286 ret = -ENOMEM; 5287 goto out; 5288 } 5289 5290 for (loops = 0; ; loops++) { 5291 if (loops == 0) { 5292 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 5293 extent_key->objectid); 5294 } else { 5295 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 5296 } 5297 if (ret < 0) 5298 goto out; 5299 if (ret > 0) 5300 break; 5301 5302 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 5303 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 5304 continue; 5305 5306 found_root = read_ref_root(extent_root->fs_info, ref_path); 5307 BUG_ON(!found_root); 5308 /* 5309 * for reference counted tree, only process reference paths 5310 * rooted at the latest committed root. 5311 */ 5312 if (found_root->ref_cows && 5313 ref_path->root_generation != found_root->root_key.offset) 5314 continue; 5315 5316 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5317 if (pass == 0) { 5318 /* 5319 * copy data extents to new locations 5320 */ 5321 u64 group_start = group->key.objectid; 5322 ret = relocate_data_extent(reloc_inode, 5323 extent_key, 5324 group_start); 5325 if (ret < 0) 5326 goto out; 5327 break; 5328 } 5329 level = 0; 5330 } else { 5331 level = ref_path->owner_objectid; 5332 } 5333 5334 if (prev_block != ref_path->nodes[level]) { 5335 struct extent_buffer *eb; 5336 u64 block_start = ref_path->nodes[level]; 5337 u64 block_size = btrfs_level_size(found_root, level); 5338 5339 eb = read_tree_block(found_root, block_start, 5340 block_size, 0); 5341 btrfs_tree_lock(eb); 5342 BUG_ON(level != btrfs_header_level(eb)); 5343 5344 if (level == 0) 5345 btrfs_item_key_to_cpu(eb, &first_key, 0); 5346 else 5347 btrfs_node_key_to_cpu(eb, &first_key, 0); 5348 5349 btrfs_tree_unlock(eb); 5350 free_extent_buffer(eb); 5351 prev_block = block_start; 5352 } 5353 5354 btrfs_record_root_in_trans(found_root); 5355 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5356 /* 5357 * try to update data extent references while 5358 * keeping metadata shared between snapshots. 5359 */ 5360 if (pass == 1) { 5361 ret = relocate_one_path(trans, found_root, 5362 path, &first_key, ref_path, 5363 group, reloc_inode); 5364 if (ret < 0) 5365 goto out; 5366 continue; 5367 } 5368 /* 5369 * use fallback method to process the remaining 5370 * references. 5371 */ 5372 if (!new_extents) { 5373 u64 group_start = group->key.objectid; 5374 new_extents = kmalloc(sizeof(*new_extents), 5375 GFP_NOFS); 5376 nr_extents = 1; 5377 ret = get_new_locations(reloc_inode, 5378 extent_key, 5379 group_start, 1, 5380 &new_extents, 5381 &nr_extents); 5382 if (ret) 5383 goto out; 5384 } 5385 ret = replace_one_extent(trans, found_root, 5386 path, extent_key, 5387 &first_key, ref_path, 5388 new_extents, nr_extents); 5389 } else { 5390 ret = relocate_tree_block(trans, found_root, path, 5391 &first_key, ref_path); 5392 } 5393 if (ret < 0) 5394 goto out; 5395 } 5396 ret = 0; 5397 out: 5398 btrfs_end_transaction(trans, extent_root); 5399 kfree(new_extents); 5400 kfree(ref_path); 5401 return ret; 5402 } 5403 5404 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 5405 { 5406 u64 num_devices; 5407 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 5408 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 5409 5410 num_devices = root->fs_info->fs_devices->rw_devices; 5411 if (num_devices == 1) { 5412 stripped |= BTRFS_BLOCK_GROUP_DUP; 5413 stripped = flags & ~stripped; 5414 5415 /* turn raid0 into single device chunks */ 5416 if (flags & BTRFS_BLOCK_GROUP_RAID0) 5417 return stripped; 5418 5419 /* turn mirroring into duplication */ 5420 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 5421 BTRFS_BLOCK_GROUP_RAID10)) 5422 return stripped | BTRFS_BLOCK_GROUP_DUP; 5423 return flags; 5424 } else { 5425 /* they already had raid on here, just return */ 5426 if (flags & stripped) 5427 return flags; 5428 5429 stripped |= BTRFS_BLOCK_GROUP_DUP; 5430 stripped = flags & ~stripped; 5431 5432 /* switch duplicated blocks with raid1 */ 5433 if (flags & BTRFS_BLOCK_GROUP_DUP) 5434 return stripped | BTRFS_BLOCK_GROUP_RAID1; 5435 5436 /* turn single device chunks into raid0 */ 5437 return stripped | BTRFS_BLOCK_GROUP_RAID0; 5438 } 5439 return flags; 5440 } 5441 5442 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 5443 struct btrfs_block_group_cache *shrink_block_group, 5444 int force) 5445 { 5446 struct btrfs_trans_handle *trans; 5447 u64 new_alloc_flags; 5448 u64 calc; 5449 5450 spin_lock(&shrink_block_group->lock); 5451 if (btrfs_block_group_used(&shrink_block_group->item) > 0) { 5452 spin_unlock(&shrink_block_group->lock); 5453 5454 trans = btrfs_start_transaction(root, 1); 5455 spin_lock(&shrink_block_group->lock); 5456 5457 new_alloc_flags = update_block_group_flags(root, 5458 shrink_block_group->flags); 5459 if (new_alloc_flags != shrink_block_group->flags) { 5460 calc = 5461 btrfs_block_group_used(&shrink_block_group->item); 5462 } else { 5463 calc = shrink_block_group->key.offset; 5464 } 5465 spin_unlock(&shrink_block_group->lock); 5466 5467 do_chunk_alloc(trans, root->fs_info->extent_root, 5468 calc + 2 * 1024 * 1024, new_alloc_flags, force); 5469 5470 btrfs_end_transaction(trans, root); 5471 } else 5472 spin_unlock(&shrink_block_group->lock); 5473 return 0; 5474 } 5475 5476 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 5477 struct btrfs_root *root, 5478 u64 objectid, u64 size) 5479 { 5480 struct btrfs_path *path; 5481 struct btrfs_inode_item *item; 5482 struct extent_buffer *leaf; 5483 int ret; 5484 5485 path = btrfs_alloc_path(); 5486 if (!path) 5487 return -ENOMEM; 5488 5489 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 5490 if (ret) 5491 goto out; 5492 5493 leaf = path->nodes[0]; 5494 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 5495 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 5496 btrfs_set_inode_generation(leaf, item, 1); 5497 btrfs_set_inode_size(leaf, item, size); 5498 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 5499 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 5500 btrfs_mark_buffer_dirty(leaf); 5501 btrfs_release_path(root, path); 5502 out: 5503 btrfs_free_path(path); 5504 return ret; 5505 } 5506 5507 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 5508 struct btrfs_block_group_cache *group) 5509 { 5510 struct inode *inode = NULL; 5511 struct btrfs_trans_handle *trans; 5512 struct btrfs_root *root; 5513 struct btrfs_key root_key; 5514 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 5515 int err = 0; 5516 5517 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 5518 root_key.type = BTRFS_ROOT_ITEM_KEY; 5519 root_key.offset = (u64)-1; 5520 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 5521 if (IS_ERR(root)) 5522 return ERR_CAST(root); 5523 5524 trans = btrfs_start_transaction(root, 1); 5525 BUG_ON(!trans); 5526 5527 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 5528 if (err) 5529 goto out; 5530 5531 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 5532 BUG_ON(err); 5533 5534 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 5535 group->key.offset, 0, group->key.offset, 5536 0, 0, 0); 5537 BUG_ON(err); 5538 5539 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); 5540 if (inode->i_state & I_NEW) { 5541 BTRFS_I(inode)->root = root; 5542 BTRFS_I(inode)->location.objectid = objectid; 5543 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 5544 BTRFS_I(inode)->location.offset = 0; 5545 btrfs_read_locked_inode(inode); 5546 unlock_new_inode(inode); 5547 BUG_ON(is_bad_inode(inode)); 5548 } else { 5549 BUG_ON(1); 5550 } 5551 BTRFS_I(inode)->index_cnt = group->key.objectid; 5552 5553 err = btrfs_orphan_add(trans, inode); 5554 out: 5555 btrfs_end_transaction(trans, root); 5556 if (err) { 5557 if (inode) 5558 iput(inode); 5559 inode = ERR_PTR(err); 5560 } 5561 return inode; 5562 } 5563 5564 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 5565 { 5566 5567 struct btrfs_ordered_sum *sums; 5568 struct btrfs_sector_sum *sector_sum; 5569 struct btrfs_ordered_extent *ordered; 5570 struct btrfs_root *root = BTRFS_I(inode)->root; 5571 struct list_head list; 5572 size_t offset; 5573 int ret; 5574 u64 disk_bytenr; 5575 5576 INIT_LIST_HEAD(&list); 5577 5578 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 5579 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 5580 5581 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 5582 ret = btrfs_lookup_csums_range(root, disk_bytenr, 5583 disk_bytenr + len - 1, &list); 5584 5585 while (!list_empty(&list)) { 5586 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 5587 list_del_init(&sums->list); 5588 5589 sector_sum = sums->sums; 5590 sums->bytenr = ordered->start; 5591 5592 offset = 0; 5593 while (offset < sums->len) { 5594 sector_sum->bytenr += ordered->start - disk_bytenr; 5595 sector_sum++; 5596 offset += root->sectorsize; 5597 } 5598 5599 btrfs_add_ordered_sum(inode, ordered, sums); 5600 } 5601 btrfs_put_ordered_extent(ordered); 5602 return 0; 5603 } 5604 5605 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) 5606 { 5607 struct btrfs_trans_handle *trans; 5608 struct btrfs_path *path; 5609 struct btrfs_fs_info *info = root->fs_info; 5610 struct extent_buffer *leaf; 5611 struct inode *reloc_inode; 5612 struct btrfs_block_group_cache *block_group; 5613 struct btrfs_key key; 5614 u64 skipped; 5615 u64 cur_byte; 5616 u64 total_found; 5617 u32 nritems; 5618 int ret; 5619 int progress; 5620 int pass = 0; 5621 5622 root = root->fs_info->extent_root; 5623 5624 block_group = btrfs_lookup_block_group(info, group_start); 5625 BUG_ON(!block_group); 5626 5627 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", 5628 (unsigned long long)block_group->key.objectid, 5629 (unsigned long long)block_group->flags); 5630 5631 path = btrfs_alloc_path(); 5632 BUG_ON(!path); 5633 5634 reloc_inode = create_reloc_inode(info, block_group); 5635 BUG_ON(IS_ERR(reloc_inode)); 5636 5637 __alloc_chunk_for_shrink(root, block_group, 1); 5638 set_block_group_readonly(block_group); 5639 5640 btrfs_start_delalloc_inodes(info->tree_root); 5641 btrfs_wait_ordered_extents(info->tree_root, 0); 5642 again: 5643 skipped = 0; 5644 total_found = 0; 5645 progress = 0; 5646 key.objectid = block_group->key.objectid; 5647 key.offset = 0; 5648 key.type = 0; 5649 cur_byte = key.objectid; 5650 5651 trans = btrfs_start_transaction(info->tree_root, 1); 5652 btrfs_commit_transaction(trans, info->tree_root); 5653 5654 mutex_lock(&root->fs_info->cleaner_mutex); 5655 btrfs_clean_old_snapshots(info->tree_root); 5656 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 5657 mutex_unlock(&root->fs_info->cleaner_mutex); 5658 5659 while (1) { 5660 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5661 if (ret < 0) 5662 goto out; 5663 next: 5664 leaf = path->nodes[0]; 5665 nritems = btrfs_header_nritems(leaf); 5666 if (path->slots[0] >= nritems) { 5667 ret = btrfs_next_leaf(root, path); 5668 if (ret < 0) 5669 goto out; 5670 if (ret == 1) { 5671 ret = 0; 5672 break; 5673 } 5674 leaf = path->nodes[0]; 5675 nritems = btrfs_header_nritems(leaf); 5676 } 5677 5678 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 5679 5680 if (key.objectid >= block_group->key.objectid + 5681 block_group->key.offset) 5682 break; 5683 5684 if (progress && need_resched()) { 5685 btrfs_release_path(root, path); 5686 cond_resched(); 5687 progress = 0; 5688 continue; 5689 } 5690 progress = 1; 5691 5692 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || 5693 key.objectid + key.offset <= cur_byte) { 5694 path->slots[0]++; 5695 goto next; 5696 } 5697 5698 total_found++; 5699 cur_byte = key.objectid + key.offset; 5700 btrfs_release_path(root, path); 5701 5702 __alloc_chunk_for_shrink(root, block_group, 0); 5703 ret = relocate_one_extent(root, path, &key, block_group, 5704 reloc_inode, pass); 5705 BUG_ON(ret < 0); 5706 if (ret > 0) 5707 skipped++; 5708 5709 key.objectid = cur_byte; 5710 key.type = 0; 5711 key.offset = 0; 5712 } 5713 5714 btrfs_release_path(root, path); 5715 5716 if (pass == 0) { 5717 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); 5718 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); 5719 } 5720 5721 if (total_found > 0) { 5722 printk(KERN_INFO "btrfs found %llu extents in pass %d\n", 5723 (unsigned long long)total_found, pass); 5724 pass++; 5725 if (total_found == skipped && pass > 2) { 5726 iput(reloc_inode); 5727 reloc_inode = create_reloc_inode(info, block_group); 5728 pass = 0; 5729 } 5730 goto again; 5731 } 5732 5733 /* delete reloc_inode */ 5734 iput(reloc_inode); 5735 5736 /* unpin extents in this range */ 5737 trans = btrfs_start_transaction(info->tree_root, 1); 5738 btrfs_commit_transaction(trans, info->tree_root); 5739 5740 spin_lock(&block_group->lock); 5741 WARN_ON(block_group->pinned > 0); 5742 WARN_ON(block_group->reserved > 0); 5743 WARN_ON(btrfs_block_group_used(&block_group->item) > 0); 5744 spin_unlock(&block_group->lock); 5745 put_block_group(block_group); 5746 ret = 0; 5747 out: 5748 btrfs_free_path(path); 5749 return ret; 5750 } 5751 5752 static int find_first_block_group(struct btrfs_root *root, 5753 struct btrfs_path *path, struct btrfs_key *key) 5754 { 5755 int ret = 0; 5756 struct btrfs_key found_key; 5757 struct extent_buffer *leaf; 5758 int slot; 5759 5760 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 5761 if (ret < 0) 5762 goto out; 5763 5764 while (1) { 5765 slot = path->slots[0]; 5766 leaf = path->nodes[0]; 5767 if (slot >= btrfs_header_nritems(leaf)) { 5768 ret = btrfs_next_leaf(root, path); 5769 if (ret == 0) 5770 continue; 5771 if (ret < 0) 5772 goto out; 5773 break; 5774 } 5775 btrfs_item_key_to_cpu(leaf, &found_key, slot); 5776 5777 if (found_key.objectid >= key->objectid && 5778 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 5779 ret = 0; 5780 goto out; 5781 } 5782 path->slots[0]++; 5783 } 5784 ret = -ENOENT; 5785 out: 5786 return ret; 5787 } 5788 5789 int btrfs_free_block_groups(struct btrfs_fs_info *info) 5790 { 5791 struct btrfs_block_group_cache *block_group; 5792 struct rb_node *n; 5793 5794 spin_lock(&info->block_group_cache_lock); 5795 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 5796 block_group = rb_entry(n, struct btrfs_block_group_cache, 5797 cache_node); 5798 rb_erase(&block_group->cache_node, 5799 &info->block_group_cache_tree); 5800 spin_unlock(&info->block_group_cache_lock); 5801 5802 btrfs_remove_free_space_cache(block_group); 5803 down_write(&block_group->space_info->groups_sem); 5804 list_del(&block_group->list); 5805 up_write(&block_group->space_info->groups_sem); 5806 5807 WARN_ON(atomic_read(&block_group->count) != 1); 5808 kfree(block_group); 5809 5810 spin_lock(&info->block_group_cache_lock); 5811 } 5812 spin_unlock(&info->block_group_cache_lock); 5813 return 0; 5814 } 5815 5816 int btrfs_read_block_groups(struct btrfs_root *root) 5817 { 5818 struct btrfs_path *path; 5819 int ret; 5820 struct btrfs_block_group_cache *cache; 5821 struct btrfs_fs_info *info = root->fs_info; 5822 struct btrfs_space_info *space_info; 5823 struct btrfs_key key; 5824 struct btrfs_key found_key; 5825 struct extent_buffer *leaf; 5826 5827 root = info->extent_root; 5828 key.objectid = 0; 5829 key.offset = 0; 5830 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 5831 path = btrfs_alloc_path(); 5832 if (!path) 5833 return -ENOMEM; 5834 5835 while (1) { 5836 ret = find_first_block_group(root, path, &key); 5837 if (ret > 0) { 5838 ret = 0; 5839 goto error; 5840 } 5841 if (ret != 0) 5842 goto error; 5843 5844 leaf = path->nodes[0]; 5845 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5846 cache = kzalloc(sizeof(*cache), GFP_NOFS); 5847 if (!cache) { 5848 ret = -ENOMEM; 5849 break; 5850 } 5851 5852 atomic_set(&cache->count, 1); 5853 spin_lock_init(&cache->lock); 5854 mutex_init(&cache->alloc_mutex); 5855 mutex_init(&cache->cache_mutex); 5856 INIT_LIST_HEAD(&cache->list); 5857 read_extent_buffer(leaf, &cache->item, 5858 btrfs_item_ptr_offset(leaf, path->slots[0]), 5859 sizeof(cache->item)); 5860 memcpy(&cache->key, &found_key, sizeof(found_key)); 5861 5862 key.objectid = found_key.objectid + found_key.offset; 5863 btrfs_release_path(root, path); 5864 cache->flags = btrfs_block_group_flags(&cache->item); 5865 5866 ret = update_space_info(info, cache->flags, found_key.offset, 5867 btrfs_block_group_used(&cache->item), 5868 &space_info); 5869 BUG_ON(ret); 5870 cache->space_info = space_info; 5871 down_write(&space_info->groups_sem); 5872 list_add_tail(&cache->list, &space_info->block_groups); 5873 up_write(&space_info->groups_sem); 5874 5875 ret = btrfs_add_block_group_cache(root->fs_info, cache); 5876 BUG_ON(ret); 5877 5878 set_avail_alloc_bits(root->fs_info, cache->flags); 5879 if (btrfs_chunk_readonly(root, cache->key.objectid)) 5880 set_block_group_readonly(cache); 5881 } 5882 ret = 0; 5883 error: 5884 btrfs_free_path(path); 5885 return ret; 5886 } 5887 5888 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 5889 struct btrfs_root *root, u64 bytes_used, 5890 u64 type, u64 chunk_objectid, u64 chunk_offset, 5891 u64 size) 5892 { 5893 int ret; 5894 struct btrfs_root *extent_root; 5895 struct btrfs_block_group_cache *cache; 5896 5897 extent_root = root->fs_info->extent_root; 5898 5899 root->fs_info->last_trans_new_blockgroup = trans->transid; 5900 5901 cache = kzalloc(sizeof(*cache), GFP_NOFS); 5902 if (!cache) 5903 return -ENOMEM; 5904 5905 cache->key.objectid = chunk_offset; 5906 cache->key.offset = size; 5907 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 5908 atomic_set(&cache->count, 1); 5909 spin_lock_init(&cache->lock); 5910 mutex_init(&cache->alloc_mutex); 5911 mutex_init(&cache->cache_mutex); 5912 INIT_LIST_HEAD(&cache->list); 5913 5914 btrfs_set_block_group_used(&cache->item, bytes_used); 5915 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 5916 cache->flags = type; 5917 btrfs_set_block_group_flags(&cache->item, type); 5918 5919 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 5920 &cache->space_info); 5921 BUG_ON(ret); 5922 down_write(&cache->space_info->groups_sem); 5923 list_add_tail(&cache->list, &cache->space_info->block_groups); 5924 up_write(&cache->space_info->groups_sem); 5925 5926 ret = btrfs_add_block_group_cache(root->fs_info, cache); 5927 BUG_ON(ret); 5928 5929 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 5930 sizeof(cache->item)); 5931 BUG_ON(ret); 5932 5933 finish_current_insert(trans, extent_root, 0); 5934 ret = del_pending_extents(trans, extent_root, 0); 5935 BUG_ON(ret); 5936 set_avail_alloc_bits(extent_root->fs_info, type); 5937 5938 return 0; 5939 } 5940 5941 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 5942 struct btrfs_root *root, u64 group_start) 5943 { 5944 struct btrfs_path *path; 5945 struct btrfs_block_group_cache *block_group; 5946 struct btrfs_key key; 5947 int ret; 5948 5949 root = root->fs_info->extent_root; 5950 5951 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 5952 BUG_ON(!block_group); 5953 BUG_ON(!block_group->ro); 5954 5955 memcpy(&key, &block_group->key, sizeof(key)); 5956 5957 path = btrfs_alloc_path(); 5958 BUG_ON(!path); 5959 5960 btrfs_remove_free_space_cache(block_group); 5961 rb_erase(&block_group->cache_node, 5962 &root->fs_info->block_group_cache_tree); 5963 down_write(&block_group->space_info->groups_sem); 5964 list_del(&block_group->list); 5965 up_write(&block_group->space_info->groups_sem); 5966 5967 spin_lock(&block_group->space_info->lock); 5968 block_group->space_info->total_bytes -= block_group->key.offset; 5969 block_group->space_info->bytes_readonly -= block_group->key.offset; 5970 spin_unlock(&block_group->space_info->lock); 5971 block_group->space_info->full = 0; 5972 5973 put_block_group(block_group); 5974 put_block_group(block_group); 5975 5976 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 5977 if (ret > 0) 5978 ret = -EIO; 5979 if (ret < 0) 5980 goto out; 5981 5982 ret = btrfs_del_item(trans, root, path); 5983 out: 5984 btrfs_free_path(path); 5985 return ret; 5986 } 5987