1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include "hash.h" 22 #include "crc32c.h" 23 #include "ctree.h" 24 #include "disk-io.h" 25 #include "print-tree.h" 26 #include "transaction.h" 27 #include "volumes.h" 28 #include "locking.h" 29 30 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK 31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE 32 #define BLOCK_GROUP_SYSTEM EXTENT_NEW 33 34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY 35 36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct 37 btrfs_root *extent_root); 38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct 39 btrfs_root *extent_root); 40 static struct btrfs_block_group_cache * 41 __btrfs_find_block_group(struct btrfs_root *root, 42 struct btrfs_block_group_cache *hint, 43 u64 search_start, int data, int owner); 44 45 void maybe_lock_mutex(struct btrfs_root *root) 46 { 47 if (root != root->fs_info->extent_root && 48 root != root->fs_info->chunk_root && 49 root != root->fs_info->dev_root) { 50 mutex_lock(&root->fs_info->alloc_mutex); 51 } 52 } 53 54 void maybe_unlock_mutex(struct btrfs_root *root) 55 { 56 if (root != root->fs_info->extent_root && 57 root != root->fs_info->chunk_root && 58 root != root->fs_info->dev_root) { 59 mutex_unlock(&root->fs_info->alloc_mutex); 60 } 61 } 62 63 static int cache_block_group(struct btrfs_root *root, 64 struct btrfs_block_group_cache *block_group) 65 { 66 struct btrfs_path *path; 67 int ret; 68 struct btrfs_key key; 69 struct extent_buffer *leaf; 70 struct extent_io_tree *free_space_cache; 71 int slot; 72 u64 last = 0; 73 u64 hole_size; 74 u64 first_free; 75 int found = 0; 76 77 if (!block_group) 78 return 0; 79 80 root = root->fs_info->extent_root; 81 free_space_cache = &root->fs_info->free_space_cache; 82 83 if (block_group->cached) 84 return 0; 85 86 path = btrfs_alloc_path(); 87 if (!path) 88 return -ENOMEM; 89 90 path->reada = 2; 91 first_free = block_group->key.objectid; 92 key.objectid = block_group->key.objectid; 93 key.offset = 0; 94 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 95 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 96 if (ret < 0) 97 return ret; 98 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); 99 if (ret < 0) 100 return ret; 101 if (ret == 0) { 102 leaf = path->nodes[0]; 103 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 104 if (key.objectid + key.offset > first_free) 105 first_free = key.objectid + key.offset; 106 } 107 while(1) { 108 leaf = path->nodes[0]; 109 slot = path->slots[0]; 110 if (slot >= btrfs_header_nritems(leaf)) { 111 ret = btrfs_next_leaf(root, path); 112 if (ret < 0) 113 goto err; 114 if (ret == 0) { 115 continue; 116 } else { 117 break; 118 } 119 } 120 btrfs_item_key_to_cpu(leaf, &key, slot); 121 if (key.objectid < block_group->key.objectid) { 122 goto next; 123 } 124 if (key.objectid >= block_group->key.objectid + 125 block_group->key.offset) { 126 break; 127 } 128 129 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 130 if (!found) { 131 last = first_free; 132 found = 1; 133 } 134 if (key.objectid > last) { 135 hole_size = key.objectid - last; 136 set_extent_dirty(free_space_cache, last, 137 last + hole_size - 1, 138 GFP_NOFS); 139 } 140 last = key.objectid + key.offset; 141 } 142 next: 143 path->slots[0]++; 144 } 145 146 if (!found) 147 last = first_free; 148 if (block_group->key.objectid + 149 block_group->key.offset > last) { 150 hole_size = block_group->key.objectid + 151 block_group->key.offset - last; 152 set_extent_dirty(free_space_cache, last, 153 last + hole_size - 1, GFP_NOFS); 154 } 155 block_group->cached = 1; 156 err: 157 btrfs_free_path(path); 158 return 0; 159 } 160 161 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct 162 btrfs_fs_info *info, 163 u64 bytenr) 164 { 165 struct extent_io_tree *block_group_cache; 166 struct btrfs_block_group_cache *block_group = NULL; 167 u64 ptr; 168 u64 start; 169 u64 end; 170 int ret; 171 172 bytenr = max_t(u64, bytenr, 173 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); 174 block_group_cache = &info->block_group_cache; 175 ret = find_first_extent_bit(block_group_cache, 176 bytenr, &start, &end, 177 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | 178 BLOCK_GROUP_SYSTEM); 179 if (ret) { 180 return NULL; 181 } 182 ret = get_state_private(block_group_cache, start, &ptr); 183 if (ret) 184 return NULL; 185 186 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; 187 return block_group; 188 } 189 190 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct 191 btrfs_fs_info *info, 192 u64 bytenr) 193 { 194 struct extent_io_tree *block_group_cache; 195 struct btrfs_block_group_cache *block_group = NULL; 196 u64 ptr; 197 u64 start; 198 u64 end; 199 int ret; 200 201 bytenr = max_t(u64, bytenr, 202 BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); 203 block_group_cache = &info->block_group_cache; 204 ret = find_first_extent_bit(block_group_cache, 205 bytenr, &start, &end, 206 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | 207 BLOCK_GROUP_SYSTEM); 208 if (ret) { 209 return NULL; 210 } 211 ret = get_state_private(block_group_cache, start, &ptr); 212 if (ret) 213 return NULL; 214 215 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; 216 if (block_group->key.objectid <= bytenr && bytenr < 217 block_group->key.objectid + block_group->key.offset) 218 return block_group; 219 return NULL; 220 } 221 222 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 223 { 224 return (cache->flags & bits) == bits; 225 } 226 227 static int noinline find_search_start(struct btrfs_root *root, 228 struct btrfs_block_group_cache **cache_ret, 229 u64 *start_ret, u64 num, int data) 230 { 231 int ret; 232 struct btrfs_block_group_cache *cache = *cache_ret; 233 struct extent_io_tree *free_space_cache; 234 struct extent_state *state; 235 u64 last; 236 u64 start = 0; 237 u64 cache_miss = 0; 238 u64 total_fs_bytes; 239 u64 search_start = *start_ret; 240 int wrapped = 0; 241 242 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 243 free_space_cache = &root->fs_info->free_space_cache; 244 245 if (!cache) 246 goto out; 247 248 again: 249 ret = cache_block_group(root, cache); 250 if (ret) { 251 goto out; 252 } 253 254 last = max(search_start, cache->key.objectid); 255 if (!block_group_bits(cache, data) || cache->ro) 256 goto new_group; 257 258 spin_lock_irq(&free_space_cache->lock); 259 state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY); 260 while(1) { 261 if (!state) { 262 if (!cache_miss) 263 cache_miss = last; 264 spin_unlock_irq(&free_space_cache->lock); 265 goto new_group; 266 } 267 268 start = max(last, state->start); 269 last = state->end + 1; 270 if (last - start < num) { 271 do { 272 state = extent_state_next(state); 273 } while(state && !(state->state & EXTENT_DIRTY)); 274 continue; 275 } 276 spin_unlock_irq(&free_space_cache->lock); 277 if (cache->ro) { 278 goto new_group; 279 } 280 if (start + num > cache->key.objectid + cache->key.offset) 281 goto new_group; 282 if (!block_group_bits(cache, data)) { 283 printk("block group bits don't match %Lu %d\n", cache->flags, data); 284 } 285 *start_ret = start; 286 return 0; 287 } 288 out: 289 cache = btrfs_lookup_block_group(root->fs_info, search_start); 290 if (!cache) { 291 printk("Unable to find block group for %Lu\n", search_start); 292 WARN_ON(1); 293 } 294 return -ENOSPC; 295 296 new_group: 297 last = cache->key.objectid + cache->key.offset; 298 wrapped: 299 cache = btrfs_lookup_first_block_group(root->fs_info, last); 300 if (!cache || cache->key.objectid >= total_fs_bytes) { 301 no_cache: 302 if (!wrapped) { 303 wrapped = 1; 304 last = search_start; 305 goto wrapped; 306 } 307 goto out; 308 } 309 if (cache_miss && !cache->cached) { 310 cache_block_group(root, cache); 311 last = cache_miss; 312 cache = btrfs_lookup_first_block_group(root->fs_info, last); 313 } 314 cache_miss = 0; 315 cache = __btrfs_find_block_group(root, cache, last, data, 0); 316 if (!cache) 317 goto no_cache; 318 *cache_ret = cache; 319 goto again; 320 } 321 322 static u64 div_factor(u64 num, int factor) 323 { 324 if (factor == 10) 325 return num; 326 num *= factor; 327 do_div(num, 10); 328 return num; 329 } 330 331 static int block_group_state_bits(u64 flags) 332 { 333 int bits = 0; 334 if (flags & BTRFS_BLOCK_GROUP_DATA) 335 bits |= BLOCK_GROUP_DATA; 336 if (flags & BTRFS_BLOCK_GROUP_METADATA) 337 bits |= BLOCK_GROUP_METADATA; 338 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 339 bits |= BLOCK_GROUP_SYSTEM; 340 return bits; 341 } 342 343 static struct btrfs_block_group_cache * 344 __btrfs_find_block_group(struct btrfs_root *root, 345 struct btrfs_block_group_cache *hint, 346 u64 search_start, int data, int owner) 347 { 348 struct btrfs_block_group_cache *cache; 349 struct extent_io_tree *block_group_cache; 350 struct btrfs_block_group_cache *found_group = NULL; 351 struct btrfs_fs_info *info = root->fs_info; 352 u64 used; 353 u64 last = 0; 354 u64 start; 355 u64 end; 356 u64 free_check; 357 u64 ptr; 358 int bit; 359 int ret; 360 int full_search = 0; 361 int factor = 10; 362 int wrapped = 0; 363 364 block_group_cache = &info->block_group_cache; 365 366 if (data & BTRFS_BLOCK_GROUP_METADATA) 367 factor = 9; 368 369 bit = block_group_state_bits(data); 370 371 if (search_start) { 372 struct btrfs_block_group_cache *shint; 373 shint = btrfs_lookup_first_block_group(info, search_start); 374 if (shint && block_group_bits(shint, data) && !shint->ro) { 375 used = btrfs_block_group_used(&shint->item); 376 if (used + shint->pinned < 377 div_factor(shint->key.offset, factor)) { 378 return shint; 379 } 380 } 381 } 382 if (hint && !hint->ro && block_group_bits(hint, data)) { 383 used = btrfs_block_group_used(&hint->item); 384 if (used + hint->pinned < 385 div_factor(hint->key.offset, factor)) { 386 return hint; 387 } 388 last = hint->key.objectid + hint->key.offset; 389 } else { 390 if (hint) 391 last = max(hint->key.objectid, search_start); 392 else 393 last = search_start; 394 } 395 again: 396 while(1) { 397 ret = find_first_extent_bit(block_group_cache, last, 398 &start, &end, bit); 399 if (ret) 400 break; 401 402 ret = get_state_private(block_group_cache, start, &ptr); 403 if (ret) { 404 last = end + 1; 405 continue; 406 } 407 408 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; 409 last = cache->key.objectid + cache->key.offset; 410 used = btrfs_block_group_used(&cache->item); 411 412 if (!cache->ro && block_group_bits(cache, data)) { 413 free_check = div_factor(cache->key.offset, factor); 414 if (used + cache->pinned < free_check) { 415 found_group = cache; 416 goto found; 417 } 418 } 419 cond_resched(); 420 } 421 if (!wrapped) { 422 last = search_start; 423 wrapped = 1; 424 goto again; 425 } 426 if (!full_search && factor < 10) { 427 last = search_start; 428 full_search = 1; 429 factor = 10; 430 goto again; 431 } 432 found: 433 return found_group; 434 } 435 436 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, 437 struct btrfs_block_group_cache 438 *hint, u64 search_start, 439 int data, int owner) 440 { 441 442 struct btrfs_block_group_cache *ret; 443 mutex_lock(&root->fs_info->alloc_mutex); 444 ret = __btrfs_find_block_group(root, hint, search_start, data, owner); 445 mutex_unlock(&root->fs_info->alloc_mutex); 446 return ret; 447 } 448 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, 449 u64 owner, u64 owner_offset) 450 { 451 u32 high_crc = ~(u32)0; 452 u32 low_crc = ~(u32)0; 453 __le64 lenum; 454 lenum = cpu_to_le64(root_objectid); 455 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); 456 lenum = cpu_to_le64(ref_generation); 457 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); 458 if (owner >= BTRFS_FIRST_FREE_OBJECTID) { 459 lenum = cpu_to_le64(owner); 460 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); 461 lenum = cpu_to_le64(owner_offset); 462 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); 463 } 464 return ((u64)high_crc << 32) | (u64)low_crc; 465 } 466 467 static int match_extent_ref(struct extent_buffer *leaf, 468 struct btrfs_extent_ref *disk_ref, 469 struct btrfs_extent_ref *cpu_ref) 470 { 471 int ret; 472 int len; 473 474 if (cpu_ref->objectid) 475 len = sizeof(*cpu_ref); 476 else 477 len = 2 * sizeof(u64); 478 ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref, 479 len); 480 return ret == 0; 481 } 482 483 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, 484 struct btrfs_root *root, 485 struct btrfs_path *path, u64 bytenr, 486 u64 root_objectid, 487 u64 ref_generation, u64 owner, 488 u64 owner_offset, int del) 489 { 490 u64 hash; 491 struct btrfs_key key; 492 struct btrfs_key found_key; 493 struct btrfs_extent_ref ref; 494 struct extent_buffer *leaf; 495 struct btrfs_extent_ref *disk_ref; 496 int ret; 497 int ret2; 498 499 btrfs_set_stack_ref_root(&ref, root_objectid); 500 btrfs_set_stack_ref_generation(&ref, ref_generation); 501 btrfs_set_stack_ref_objectid(&ref, owner); 502 btrfs_set_stack_ref_offset(&ref, owner_offset); 503 504 hash = hash_extent_ref(root_objectid, ref_generation, owner, 505 owner_offset); 506 key.offset = hash; 507 key.objectid = bytenr; 508 key.type = BTRFS_EXTENT_REF_KEY; 509 510 while (1) { 511 ret = btrfs_search_slot(trans, root, &key, path, 512 del ? -1 : 0, del); 513 if (ret < 0) 514 goto out; 515 leaf = path->nodes[0]; 516 if (ret != 0) { 517 u32 nritems = btrfs_header_nritems(leaf); 518 if (path->slots[0] >= nritems) { 519 ret2 = btrfs_next_leaf(root, path); 520 if (ret2) 521 goto out; 522 leaf = path->nodes[0]; 523 } 524 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 525 if (found_key.objectid != bytenr || 526 found_key.type != BTRFS_EXTENT_REF_KEY) 527 goto out; 528 key.offset = found_key.offset; 529 if (del) { 530 btrfs_release_path(root, path); 531 continue; 532 } 533 } 534 disk_ref = btrfs_item_ptr(path->nodes[0], 535 path->slots[0], 536 struct btrfs_extent_ref); 537 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) { 538 ret = 0; 539 goto out; 540 } 541 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 542 key.offset = found_key.offset + 1; 543 btrfs_release_path(root, path); 544 } 545 out: 546 return ret; 547 } 548 549 /* 550 * Back reference rules. Back refs have three main goals: 551 * 552 * 1) differentiate between all holders of references to an extent so that 553 * when a reference is dropped we can make sure it was a valid reference 554 * before freeing the extent. 555 * 556 * 2) Provide enough information to quickly find the holders of an extent 557 * if we notice a given block is corrupted or bad. 558 * 559 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 560 * maintenance. This is actually the same as #2, but with a slightly 561 * different use case. 562 * 563 * File extents can be referenced by: 564 * 565 * - multiple snapshots, subvolumes, or different generations in one subvol 566 * - different files inside a single subvolume (in theory, not implemented yet) 567 * - different offsets inside a file (bookend extents in file.c) 568 * 569 * The extent ref structure has fields for: 570 * 571 * - Objectid of the subvolume root 572 * - Generation number of the tree holding the reference 573 * - objectid of the file holding the reference 574 * - offset in the file corresponding to the key holding the reference 575 * 576 * When a file extent is allocated the fields are filled in: 577 * (root_key.objectid, trans->transid, inode objectid, offset in file) 578 * 579 * When a leaf is cow'd new references are added for every file extent found 580 * in the leaf. It looks the same as the create case, but trans->transid 581 * will be different when the block is cow'd. 582 * 583 * (root_key.objectid, trans->transid, inode objectid, offset in file) 584 * 585 * When a file extent is removed either during snapshot deletion or file 586 * truncation, the corresponding back reference is found 587 * by searching for: 588 * 589 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 590 * inode objectid, offset in file) 591 * 592 * Btree extents can be referenced by: 593 * 594 * - Different subvolumes 595 * - Different generations of the same subvolume 596 * 597 * Storing sufficient information for a full reverse mapping of a btree 598 * block would require storing the lowest key of the block in the backref, 599 * and it would require updating that lowest key either before write out or 600 * every time it changed. Instead, the objectid of the lowest key is stored 601 * along with the level of the tree block. This provides a hint 602 * about where in the btree the block can be found. Searches through the 603 * btree only need to look for a pointer to that block, so they stop one 604 * level higher than the level recorded in the backref. 605 * 606 * Some btrees do not do reference counting on their extents. These 607 * include the extent tree and the tree of tree roots. Backrefs for these 608 * trees always have a generation of zero. 609 * 610 * When a tree block is created, back references are inserted: 611 * 612 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid) 613 * 614 * When a tree block is cow'd in a reference counted root, 615 * new back references are added for all the blocks it points to. 616 * These are of the form (trans->transid will have increased since creation): 617 * 618 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid) 619 * 620 * Because the lowest_key_objectid and the level are just hints 621 * they are not used when backrefs are deleted. When a backref is deleted: 622 * 623 * if backref was for a tree root: 624 * root_objectid = root->root_key.objectid 625 * else 626 * root_objectid = btrfs_header_owner(parent) 627 * 628 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) 629 * 630 * Back Reference Key hashing: 631 * 632 * Back references have four fields, each 64 bits long. Unfortunately, 633 * This is hashed into a single 64 bit number and placed into the key offset. 634 * The key objectid corresponds to the first byte in the extent, and the 635 * key type is set to BTRFS_EXTENT_REF_KEY 636 */ 637 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, 638 struct btrfs_root *root, 639 struct btrfs_path *path, u64 bytenr, 640 u64 root_objectid, u64 ref_generation, 641 u64 owner, u64 owner_offset) 642 { 643 u64 hash; 644 struct btrfs_key key; 645 struct btrfs_extent_ref ref; 646 struct btrfs_extent_ref *disk_ref; 647 int ret; 648 649 btrfs_set_stack_ref_root(&ref, root_objectid); 650 btrfs_set_stack_ref_generation(&ref, ref_generation); 651 btrfs_set_stack_ref_objectid(&ref, owner); 652 btrfs_set_stack_ref_offset(&ref, owner_offset); 653 654 hash = hash_extent_ref(root_objectid, ref_generation, owner, 655 owner_offset); 656 key.offset = hash; 657 key.objectid = bytenr; 658 key.type = BTRFS_EXTENT_REF_KEY; 659 660 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); 661 while (ret == -EEXIST) { 662 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 663 struct btrfs_extent_ref); 664 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) 665 goto out; 666 key.offset++; 667 btrfs_release_path(root, path); 668 ret = btrfs_insert_empty_item(trans, root, path, &key, 669 sizeof(ref)); 670 } 671 if (ret) 672 goto out; 673 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 674 struct btrfs_extent_ref); 675 write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref, 676 sizeof(ref)); 677 btrfs_mark_buffer_dirty(path->nodes[0]); 678 out: 679 btrfs_release_path(root, path); 680 return ret; 681 } 682 683 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 684 struct btrfs_root *root, 685 u64 bytenr, u64 num_bytes, 686 u64 root_objectid, u64 ref_generation, 687 u64 owner, u64 owner_offset) 688 { 689 struct btrfs_path *path; 690 int ret; 691 struct btrfs_key key; 692 struct extent_buffer *l; 693 struct btrfs_extent_item *item; 694 u32 refs; 695 696 WARN_ON(num_bytes < root->sectorsize); 697 path = btrfs_alloc_path(); 698 if (!path) 699 return -ENOMEM; 700 701 path->reada = 1; 702 key.objectid = bytenr; 703 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 704 key.offset = num_bytes; 705 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 706 0, 1); 707 if (ret < 0) 708 return ret; 709 if (ret != 0) { 710 BUG(); 711 } 712 BUG_ON(ret != 0); 713 l = path->nodes[0]; 714 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 715 refs = btrfs_extent_refs(l, item); 716 btrfs_set_extent_refs(l, item, refs + 1); 717 btrfs_mark_buffer_dirty(path->nodes[0]); 718 719 btrfs_release_path(root->fs_info->extent_root, path); 720 721 path->reada = 1; 722 ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, 723 path, bytenr, root_objectid, 724 ref_generation, owner, owner_offset); 725 BUG_ON(ret); 726 finish_current_insert(trans, root->fs_info->extent_root); 727 del_pending_extents(trans, root->fs_info->extent_root); 728 729 btrfs_free_path(path); 730 return 0; 731 } 732 733 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 734 struct btrfs_root *root, 735 u64 bytenr, u64 num_bytes, 736 u64 root_objectid, u64 ref_generation, 737 u64 owner, u64 owner_offset) 738 { 739 int ret; 740 741 mutex_lock(&root->fs_info->alloc_mutex); 742 ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 743 root_objectid, ref_generation, 744 owner, owner_offset); 745 mutex_unlock(&root->fs_info->alloc_mutex); 746 return ret; 747 } 748 749 int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 750 struct btrfs_root *root) 751 { 752 finish_current_insert(trans, root->fs_info->extent_root); 753 del_pending_extents(trans, root->fs_info->extent_root); 754 return 0; 755 } 756 757 static int lookup_extent_ref(struct btrfs_trans_handle *trans, 758 struct btrfs_root *root, u64 bytenr, 759 u64 num_bytes, u32 *refs) 760 { 761 struct btrfs_path *path; 762 int ret; 763 struct btrfs_key key; 764 struct extent_buffer *l; 765 struct btrfs_extent_item *item; 766 767 WARN_ON(num_bytes < root->sectorsize); 768 path = btrfs_alloc_path(); 769 path->reada = 1; 770 key.objectid = bytenr; 771 key.offset = num_bytes; 772 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 773 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 774 0, 0); 775 if (ret < 0) 776 goto out; 777 if (ret != 0) { 778 btrfs_print_leaf(root, path->nodes[0]); 779 printk("failed to find block number %Lu\n", bytenr); 780 BUG(); 781 } 782 l = path->nodes[0]; 783 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 784 *refs = btrfs_extent_refs(l, item); 785 out: 786 btrfs_free_path(path); 787 return 0; 788 } 789 790 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, 791 struct btrfs_path *count_path, 792 u64 expected_owner, 793 u64 first_extent) 794 { 795 struct btrfs_root *extent_root = root->fs_info->extent_root; 796 struct btrfs_path *path; 797 u64 bytenr; 798 u64 found_objectid; 799 u64 found_owner; 800 u64 root_objectid = root->root_key.objectid; 801 u32 total_count = 0; 802 u32 extent_refs; 803 u32 cur_count; 804 u32 nritems; 805 int ret; 806 struct btrfs_key key; 807 struct btrfs_key found_key; 808 struct extent_buffer *l; 809 struct btrfs_extent_item *item; 810 struct btrfs_extent_ref *ref_item; 811 int level = -1; 812 813 /* FIXME, needs locking */ 814 BUG(); 815 816 mutex_lock(&root->fs_info->alloc_mutex); 817 path = btrfs_alloc_path(); 818 again: 819 if (level == -1) 820 bytenr = first_extent; 821 else 822 bytenr = count_path->nodes[level]->start; 823 824 cur_count = 0; 825 key.objectid = bytenr; 826 key.offset = 0; 827 828 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 829 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 830 if (ret < 0) 831 goto out; 832 BUG_ON(ret == 0); 833 834 l = path->nodes[0]; 835 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); 836 837 if (found_key.objectid != bytenr || 838 found_key.type != BTRFS_EXTENT_ITEM_KEY) { 839 goto out; 840 } 841 842 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 843 extent_refs = btrfs_extent_refs(l, item); 844 while (1) { 845 l = path->nodes[0]; 846 nritems = btrfs_header_nritems(l); 847 if (path->slots[0] >= nritems) { 848 ret = btrfs_next_leaf(extent_root, path); 849 if (ret == 0) 850 continue; 851 break; 852 } 853 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); 854 if (found_key.objectid != bytenr) 855 break; 856 857 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 858 path->slots[0]++; 859 continue; 860 } 861 862 cur_count++; 863 ref_item = btrfs_item_ptr(l, path->slots[0], 864 struct btrfs_extent_ref); 865 found_objectid = btrfs_ref_root(l, ref_item); 866 867 if (found_objectid != root_objectid) { 868 total_count = 2; 869 goto out; 870 } 871 if (level == -1) { 872 found_owner = btrfs_ref_objectid(l, ref_item); 873 if (found_owner != expected_owner) { 874 total_count = 2; 875 goto out; 876 } 877 /* 878 * nasty. we don't count a reference held by 879 * the running transaction. This allows nodatacow 880 * to avoid cow most of the time 881 */ 882 if (found_owner >= BTRFS_FIRST_FREE_OBJECTID && 883 btrfs_ref_generation(l, ref_item) == 884 root->fs_info->generation) { 885 extent_refs--; 886 } 887 } 888 total_count = 1; 889 path->slots[0]++; 890 } 891 /* 892 * if there is more than one reference against a data extent, 893 * we have to assume the other ref is another snapshot 894 */ 895 if (level == -1 && extent_refs > 1) { 896 total_count = 2; 897 goto out; 898 } 899 if (cur_count == 0) { 900 total_count = 0; 901 goto out; 902 } 903 if (level >= 0 && root->node == count_path->nodes[level]) 904 goto out; 905 level++; 906 btrfs_release_path(root, path); 907 goto again; 908 909 out: 910 btrfs_free_path(path); 911 mutex_unlock(&root->fs_info->alloc_mutex); 912 return total_count; 913 } 914 915 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 916 struct extent_buffer *buf) 917 { 918 u64 bytenr; 919 u32 nritems; 920 struct btrfs_key key; 921 struct btrfs_file_extent_item *fi; 922 int i; 923 int level; 924 int ret; 925 int faili; 926 927 if (!root->ref_cows) 928 return 0; 929 930 mutex_lock(&root->fs_info->alloc_mutex); 931 level = btrfs_header_level(buf); 932 nritems = btrfs_header_nritems(buf); 933 for (i = 0; i < nritems; i++) { 934 if (level == 0) { 935 u64 disk_bytenr; 936 btrfs_item_key_to_cpu(buf, &key, i); 937 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 938 continue; 939 fi = btrfs_item_ptr(buf, i, 940 struct btrfs_file_extent_item); 941 if (btrfs_file_extent_type(buf, fi) == 942 BTRFS_FILE_EXTENT_INLINE) 943 continue; 944 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 945 if (disk_bytenr == 0) 946 continue; 947 ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr, 948 btrfs_file_extent_disk_num_bytes(buf, fi), 949 root->root_key.objectid, trans->transid, 950 key.objectid, key.offset); 951 if (ret) { 952 faili = i; 953 goto fail; 954 } 955 } else { 956 bytenr = btrfs_node_blockptr(buf, i); 957 btrfs_node_key_to_cpu(buf, &key, i); 958 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 959 btrfs_level_size(root, level - 1), 960 root->root_key.objectid, 961 trans->transid, 962 level - 1, key.objectid); 963 if (ret) { 964 faili = i; 965 goto fail; 966 } 967 } 968 } 969 mutex_unlock(&root->fs_info->alloc_mutex); 970 return 0; 971 fail: 972 WARN_ON(1); 973 #if 0 974 for (i =0; i < faili; i++) { 975 if (level == 0) { 976 u64 disk_bytenr; 977 btrfs_item_key_to_cpu(buf, &key, i); 978 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 979 continue; 980 fi = btrfs_item_ptr(buf, i, 981 struct btrfs_file_extent_item); 982 if (btrfs_file_extent_type(buf, fi) == 983 BTRFS_FILE_EXTENT_INLINE) 984 continue; 985 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 986 if (disk_bytenr == 0) 987 continue; 988 err = btrfs_free_extent(trans, root, disk_bytenr, 989 btrfs_file_extent_disk_num_bytes(buf, 990 fi), 0); 991 BUG_ON(err); 992 } else { 993 bytenr = btrfs_node_blockptr(buf, i); 994 err = btrfs_free_extent(trans, root, bytenr, 995 btrfs_level_size(root, level - 1), 0); 996 BUG_ON(err); 997 } 998 } 999 #endif 1000 mutex_unlock(&root->fs_info->alloc_mutex); 1001 return ret; 1002 } 1003 1004 static int write_one_cache_group(struct btrfs_trans_handle *trans, 1005 struct btrfs_root *root, 1006 struct btrfs_path *path, 1007 struct btrfs_block_group_cache *cache) 1008 { 1009 int ret; 1010 int pending_ret; 1011 struct btrfs_root *extent_root = root->fs_info->extent_root; 1012 unsigned long bi; 1013 struct extent_buffer *leaf; 1014 1015 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 1016 if (ret < 0) 1017 goto fail; 1018 BUG_ON(ret); 1019 1020 leaf = path->nodes[0]; 1021 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 1022 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 1023 btrfs_mark_buffer_dirty(leaf); 1024 btrfs_release_path(extent_root, path); 1025 fail: 1026 finish_current_insert(trans, extent_root); 1027 pending_ret = del_pending_extents(trans, extent_root); 1028 if (ret) 1029 return ret; 1030 if (pending_ret) 1031 return pending_ret; 1032 return 0; 1033 1034 } 1035 1036 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 1037 struct btrfs_root *root) 1038 { 1039 struct extent_io_tree *block_group_cache; 1040 struct btrfs_block_group_cache *cache; 1041 int ret; 1042 int err = 0; 1043 int werr = 0; 1044 struct btrfs_path *path; 1045 u64 last = 0; 1046 u64 start; 1047 u64 end; 1048 u64 ptr; 1049 1050 block_group_cache = &root->fs_info->block_group_cache; 1051 path = btrfs_alloc_path(); 1052 if (!path) 1053 return -ENOMEM; 1054 1055 mutex_lock(&root->fs_info->alloc_mutex); 1056 while(1) { 1057 ret = find_first_extent_bit(block_group_cache, last, 1058 &start, &end, BLOCK_GROUP_DIRTY); 1059 if (ret) 1060 break; 1061 1062 last = end + 1; 1063 ret = get_state_private(block_group_cache, start, &ptr); 1064 if (ret) 1065 break; 1066 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; 1067 err = write_one_cache_group(trans, root, 1068 path, cache); 1069 /* 1070 * if we fail to write the cache group, we want 1071 * to keep it marked dirty in hopes that a later 1072 * write will work 1073 */ 1074 if (err) { 1075 werr = err; 1076 continue; 1077 } 1078 clear_extent_bits(block_group_cache, start, end, 1079 BLOCK_GROUP_DIRTY, GFP_NOFS); 1080 } 1081 btrfs_free_path(path); 1082 mutex_unlock(&root->fs_info->alloc_mutex); 1083 return werr; 1084 } 1085 1086 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 1087 u64 flags) 1088 { 1089 struct list_head *head = &info->space_info; 1090 struct list_head *cur; 1091 struct btrfs_space_info *found; 1092 list_for_each(cur, head) { 1093 found = list_entry(cur, struct btrfs_space_info, list); 1094 if (found->flags == flags) 1095 return found; 1096 } 1097 return NULL; 1098 1099 } 1100 1101 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 1102 u64 total_bytes, u64 bytes_used, 1103 struct btrfs_space_info **space_info) 1104 { 1105 struct btrfs_space_info *found; 1106 1107 found = __find_space_info(info, flags); 1108 if (found) { 1109 found->total_bytes += total_bytes; 1110 found->bytes_used += bytes_used; 1111 found->full = 0; 1112 WARN_ON(found->total_bytes < found->bytes_used); 1113 *space_info = found; 1114 return 0; 1115 } 1116 found = kmalloc(sizeof(*found), GFP_NOFS); 1117 if (!found) 1118 return -ENOMEM; 1119 1120 list_add(&found->list, &info->space_info); 1121 found->flags = flags; 1122 found->total_bytes = total_bytes; 1123 found->bytes_used = bytes_used; 1124 found->bytes_pinned = 0; 1125 found->full = 0; 1126 found->force_alloc = 0; 1127 *space_info = found; 1128 return 0; 1129 } 1130 1131 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 1132 { 1133 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 1134 BTRFS_BLOCK_GROUP_RAID1 | 1135 BTRFS_BLOCK_GROUP_RAID10 | 1136 BTRFS_BLOCK_GROUP_DUP); 1137 if (extra_flags) { 1138 if (flags & BTRFS_BLOCK_GROUP_DATA) 1139 fs_info->avail_data_alloc_bits |= extra_flags; 1140 if (flags & BTRFS_BLOCK_GROUP_METADATA) 1141 fs_info->avail_metadata_alloc_bits |= extra_flags; 1142 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 1143 fs_info->avail_system_alloc_bits |= extra_flags; 1144 } 1145 } 1146 1147 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) 1148 { 1149 u64 num_devices = root->fs_info->fs_devices->num_devices; 1150 1151 if (num_devices == 1) 1152 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 1153 if (num_devices < 4) 1154 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 1155 1156 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 1157 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 1158 BTRFS_BLOCK_GROUP_RAID10))) { 1159 flags &= ~BTRFS_BLOCK_GROUP_DUP; 1160 } 1161 1162 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 1163 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 1164 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 1165 } 1166 1167 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 1168 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 1169 (flags & BTRFS_BLOCK_GROUP_RAID10) | 1170 (flags & BTRFS_BLOCK_GROUP_DUP))) 1171 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 1172 return flags; 1173 } 1174 1175 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 1176 struct btrfs_root *extent_root, u64 alloc_bytes, 1177 u64 flags, int force) 1178 { 1179 struct btrfs_space_info *space_info; 1180 u64 thresh; 1181 u64 start; 1182 u64 num_bytes; 1183 int ret; 1184 1185 flags = reduce_alloc_profile(extent_root, flags); 1186 1187 space_info = __find_space_info(extent_root->fs_info, flags); 1188 if (!space_info) { 1189 ret = update_space_info(extent_root->fs_info, flags, 1190 0, 0, &space_info); 1191 BUG_ON(ret); 1192 } 1193 BUG_ON(!space_info); 1194 1195 if (space_info->force_alloc) { 1196 force = 1; 1197 space_info->force_alloc = 0; 1198 } 1199 if (space_info->full) 1200 goto out; 1201 1202 thresh = div_factor(space_info->total_bytes, 6); 1203 if (!force && 1204 (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) < 1205 thresh) 1206 goto out; 1207 1208 mutex_lock(&extent_root->fs_info->chunk_mutex); 1209 ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags); 1210 if (ret == -ENOSPC) { 1211 printk("space info full %Lu\n", flags); 1212 space_info->full = 1; 1213 goto out; 1214 } 1215 BUG_ON(ret); 1216 1217 ret = btrfs_make_block_group(trans, extent_root, 0, flags, 1218 BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); 1219 BUG_ON(ret); 1220 mutex_unlock(&extent_root->fs_info->chunk_mutex); 1221 out: 1222 return 0; 1223 } 1224 1225 static int update_block_group(struct btrfs_trans_handle *trans, 1226 struct btrfs_root *root, 1227 u64 bytenr, u64 num_bytes, int alloc, 1228 int mark_free) 1229 { 1230 struct btrfs_block_group_cache *cache; 1231 struct btrfs_fs_info *info = root->fs_info; 1232 u64 total = num_bytes; 1233 u64 old_val; 1234 u64 byte_in_group; 1235 u64 start; 1236 u64 end; 1237 1238 while(total) { 1239 cache = btrfs_lookup_block_group(info, bytenr); 1240 if (!cache) { 1241 return -1; 1242 } 1243 byte_in_group = bytenr - cache->key.objectid; 1244 WARN_ON(byte_in_group > cache->key.offset); 1245 start = cache->key.objectid; 1246 end = start + cache->key.offset - 1; 1247 set_extent_bits(&info->block_group_cache, start, end, 1248 BLOCK_GROUP_DIRTY, GFP_NOFS); 1249 1250 old_val = btrfs_block_group_used(&cache->item); 1251 num_bytes = min(total, cache->key.offset - byte_in_group); 1252 if (alloc) { 1253 old_val += num_bytes; 1254 cache->space_info->bytes_used += num_bytes; 1255 } else { 1256 old_val -= num_bytes; 1257 cache->space_info->bytes_used -= num_bytes; 1258 if (mark_free) { 1259 set_extent_dirty(&info->free_space_cache, 1260 bytenr, bytenr + num_bytes - 1, 1261 GFP_NOFS); 1262 } 1263 } 1264 btrfs_set_block_group_used(&cache->item, old_val); 1265 total -= num_bytes; 1266 bytenr += num_bytes; 1267 } 1268 return 0; 1269 } 1270 1271 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 1272 { 1273 u64 start; 1274 u64 end; 1275 int ret; 1276 ret = find_first_extent_bit(&root->fs_info->block_group_cache, 1277 search_start, &start, &end, 1278 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | 1279 BLOCK_GROUP_SYSTEM); 1280 if (ret) 1281 return 0; 1282 return start; 1283 } 1284 1285 1286 static int update_pinned_extents(struct btrfs_root *root, 1287 u64 bytenr, u64 num, int pin) 1288 { 1289 u64 len; 1290 struct btrfs_block_group_cache *cache; 1291 struct btrfs_fs_info *fs_info = root->fs_info; 1292 1293 if (pin) { 1294 set_extent_dirty(&fs_info->pinned_extents, 1295 bytenr, bytenr + num - 1, GFP_NOFS); 1296 } else { 1297 clear_extent_dirty(&fs_info->pinned_extents, 1298 bytenr, bytenr + num - 1, GFP_NOFS); 1299 } 1300 while (num > 0) { 1301 cache = btrfs_lookup_block_group(fs_info, bytenr); 1302 if (!cache) { 1303 u64 first = first_logical_byte(root, bytenr); 1304 WARN_ON(first < bytenr); 1305 len = min(first - bytenr, num); 1306 } else { 1307 len = min(num, cache->key.offset - 1308 (bytenr - cache->key.objectid)); 1309 } 1310 if (pin) { 1311 if (cache) { 1312 cache->pinned += len; 1313 cache->space_info->bytes_pinned += len; 1314 } 1315 fs_info->total_pinned += len; 1316 } else { 1317 if (cache) { 1318 cache->pinned -= len; 1319 cache->space_info->bytes_pinned -= len; 1320 } 1321 fs_info->total_pinned -= len; 1322 } 1323 bytenr += len; 1324 num -= len; 1325 } 1326 return 0; 1327 } 1328 1329 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 1330 { 1331 u64 last = 0; 1332 u64 start; 1333 u64 end; 1334 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 1335 int ret; 1336 1337 while(1) { 1338 ret = find_first_extent_bit(pinned_extents, last, 1339 &start, &end, EXTENT_DIRTY); 1340 if (ret) 1341 break; 1342 set_extent_dirty(copy, start, end, GFP_NOFS); 1343 last = end + 1; 1344 } 1345 return 0; 1346 } 1347 1348 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 1349 struct btrfs_root *root, 1350 struct extent_io_tree *unpin) 1351 { 1352 u64 start; 1353 u64 end; 1354 int ret; 1355 struct extent_io_tree *free_space_cache; 1356 free_space_cache = &root->fs_info->free_space_cache; 1357 1358 mutex_lock(&root->fs_info->alloc_mutex); 1359 while(1) { 1360 ret = find_first_extent_bit(unpin, 0, &start, &end, 1361 EXTENT_DIRTY); 1362 if (ret) 1363 break; 1364 update_pinned_extents(root, start, end + 1 - start, 0); 1365 clear_extent_dirty(unpin, start, end, GFP_NOFS); 1366 set_extent_dirty(free_space_cache, start, end, GFP_NOFS); 1367 } 1368 mutex_unlock(&root->fs_info->alloc_mutex); 1369 return 0; 1370 } 1371 1372 static int finish_current_insert(struct btrfs_trans_handle *trans, 1373 struct btrfs_root *extent_root) 1374 { 1375 u64 start; 1376 u64 end; 1377 struct btrfs_fs_info *info = extent_root->fs_info; 1378 struct extent_buffer *eb; 1379 struct btrfs_path *path; 1380 struct btrfs_key ins; 1381 struct btrfs_disk_key first; 1382 struct btrfs_extent_item extent_item; 1383 int ret; 1384 int level; 1385 int err = 0; 1386 1387 btrfs_set_stack_extent_refs(&extent_item, 1); 1388 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); 1389 path = btrfs_alloc_path(); 1390 1391 while(1) { 1392 ret = find_first_extent_bit(&info->extent_ins, 0, &start, 1393 &end, EXTENT_LOCKED); 1394 if (ret) 1395 break; 1396 1397 ins.objectid = start; 1398 ins.offset = end + 1 - start; 1399 err = btrfs_insert_item(trans, extent_root, &ins, 1400 &extent_item, sizeof(extent_item)); 1401 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, 1402 GFP_NOFS); 1403 eb = read_tree_block(extent_root, ins.objectid, ins.offset, 1404 trans->transid); 1405 btrfs_tree_lock(eb); 1406 level = btrfs_header_level(eb); 1407 if (level == 0) { 1408 btrfs_item_key(eb, &first, 0); 1409 } else { 1410 btrfs_node_key(eb, &first, 0); 1411 } 1412 btrfs_tree_unlock(eb); 1413 free_extent_buffer(eb); 1414 /* 1415 * the first key is just a hint, so the race we've created 1416 * against reading it is fine 1417 */ 1418 err = btrfs_insert_extent_backref(trans, extent_root, path, 1419 start, extent_root->root_key.objectid, 1420 0, level, 1421 btrfs_disk_key_objectid(&first)); 1422 BUG_ON(err); 1423 } 1424 btrfs_free_path(path); 1425 return 0; 1426 } 1427 1428 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, 1429 int pending) 1430 { 1431 int err = 0; 1432 1433 if (!pending) { 1434 #if 0 1435 struct extent_buffer *buf; 1436 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 1437 if (buf) { 1438 if (!btrfs_try_tree_lock(buf) && 1439 btrfs_buffer_uptodate(buf, 0)) { 1440 u64 transid = 1441 root->fs_info->running_transaction->transid; 1442 u64 header_transid = 1443 btrfs_header_generation(buf); 1444 if (header_transid == transid && 1445 !btrfs_header_flag(buf, 1446 BTRFS_HEADER_FLAG_WRITTEN)) { 1447 clean_tree_block(NULL, root, buf); 1448 btrfs_tree_unlock(buf); 1449 free_extent_buffer(buf); 1450 return 1; 1451 } 1452 btrfs_tree_unlock(buf); 1453 } 1454 free_extent_buffer(buf); 1455 } 1456 #endif 1457 update_pinned_extents(root, bytenr, num_bytes, 1); 1458 } else { 1459 set_extent_bits(&root->fs_info->pending_del, 1460 bytenr, bytenr + num_bytes - 1, 1461 EXTENT_LOCKED, GFP_NOFS); 1462 } 1463 BUG_ON(err < 0); 1464 return 0; 1465 } 1466 1467 /* 1468 * remove an extent from the root, returns 0 on success 1469 */ 1470 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1471 *root, u64 bytenr, u64 num_bytes, 1472 u64 root_objectid, u64 ref_generation, 1473 u64 owner_objectid, u64 owner_offset, int pin, 1474 int mark_free) 1475 { 1476 struct btrfs_path *path; 1477 struct btrfs_key key; 1478 struct btrfs_fs_info *info = root->fs_info; 1479 struct btrfs_root *extent_root = info->extent_root; 1480 struct extent_buffer *leaf; 1481 int ret; 1482 int extent_slot = 0; 1483 int found_extent = 0; 1484 int num_to_del = 1; 1485 struct btrfs_extent_item *ei; 1486 u32 refs; 1487 1488 key.objectid = bytenr; 1489 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 1490 key.offset = num_bytes; 1491 path = btrfs_alloc_path(); 1492 if (!path) 1493 return -ENOMEM; 1494 1495 path->reada = 1; 1496 ret = lookup_extent_backref(trans, extent_root, path, 1497 bytenr, root_objectid, 1498 ref_generation, 1499 owner_objectid, owner_offset, 1); 1500 if (ret == 0) { 1501 struct btrfs_key found_key; 1502 extent_slot = path->slots[0]; 1503 while(extent_slot > 0) { 1504 extent_slot--; 1505 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 1506 extent_slot); 1507 if (found_key.objectid != bytenr) 1508 break; 1509 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 1510 found_key.offset == num_bytes) { 1511 found_extent = 1; 1512 break; 1513 } 1514 if (path->slots[0] - extent_slot > 5) 1515 break; 1516 } 1517 if (!found_extent) 1518 ret = btrfs_del_item(trans, extent_root, path); 1519 } else { 1520 btrfs_print_leaf(extent_root, path->nodes[0]); 1521 WARN_ON(1); 1522 printk("Unable to find ref byte nr %Lu root %Lu " 1523 " gen %Lu owner %Lu offset %Lu\n", bytenr, 1524 root_objectid, ref_generation, owner_objectid, 1525 owner_offset); 1526 } 1527 if (!found_extent) { 1528 btrfs_release_path(extent_root, path); 1529 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); 1530 if (ret < 0) 1531 return ret; 1532 BUG_ON(ret); 1533 extent_slot = path->slots[0]; 1534 } 1535 1536 leaf = path->nodes[0]; 1537 ei = btrfs_item_ptr(leaf, extent_slot, 1538 struct btrfs_extent_item); 1539 refs = btrfs_extent_refs(leaf, ei); 1540 BUG_ON(refs == 0); 1541 refs -= 1; 1542 btrfs_set_extent_refs(leaf, ei, refs); 1543 1544 btrfs_mark_buffer_dirty(leaf); 1545 1546 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { 1547 /* if the back ref and the extent are next to each other 1548 * they get deleted below in one shot 1549 */ 1550 path->slots[0] = extent_slot; 1551 num_to_del = 2; 1552 } else if (found_extent) { 1553 /* otherwise delete the extent back ref */ 1554 ret = btrfs_del_item(trans, extent_root, path); 1555 BUG_ON(ret); 1556 /* if refs are 0, we need to setup the path for deletion */ 1557 if (refs == 0) { 1558 btrfs_release_path(extent_root, path); 1559 ret = btrfs_search_slot(trans, extent_root, &key, path, 1560 -1, 1); 1561 if (ret < 0) 1562 return ret; 1563 BUG_ON(ret); 1564 } 1565 } 1566 1567 if (refs == 0) { 1568 u64 super_used; 1569 u64 root_used; 1570 1571 if (pin) { 1572 ret = pin_down_bytes(root, bytenr, num_bytes, 0); 1573 if (ret > 0) 1574 mark_free = 1; 1575 BUG_ON(ret < 0); 1576 } 1577 1578 /* block accounting for super block */ 1579 spin_lock_irq(&info->delalloc_lock); 1580 super_used = btrfs_super_bytes_used(&info->super_copy); 1581 btrfs_set_super_bytes_used(&info->super_copy, 1582 super_used - num_bytes); 1583 spin_unlock_irq(&info->delalloc_lock); 1584 1585 /* block accounting for root item */ 1586 root_used = btrfs_root_used(&root->root_item); 1587 btrfs_set_root_used(&root->root_item, 1588 root_used - num_bytes); 1589 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 1590 num_to_del); 1591 if (ret) { 1592 return ret; 1593 } 1594 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 1595 mark_free); 1596 BUG_ON(ret); 1597 } 1598 btrfs_free_path(path); 1599 finish_current_insert(trans, extent_root); 1600 return ret; 1601 } 1602 1603 /* 1604 * find all the blocks marked as pending in the radix tree and remove 1605 * them from the extent map 1606 */ 1607 static int del_pending_extents(struct btrfs_trans_handle *trans, struct 1608 btrfs_root *extent_root) 1609 { 1610 int ret; 1611 int err = 0; 1612 u64 start; 1613 u64 end; 1614 struct extent_io_tree *pending_del; 1615 struct extent_io_tree *pinned_extents; 1616 1617 pending_del = &extent_root->fs_info->pending_del; 1618 pinned_extents = &extent_root->fs_info->pinned_extents; 1619 1620 while(1) { 1621 ret = find_first_extent_bit(pending_del, 0, &start, &end, 1622 EXTENT_LOCKED); 1623 if (ret) 1624 break; 1625 update_pinned_extents(extent_root, start, end + 1 - start, 1); 1626 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, 1627 GFP_NOFS); 1628 ret = __free_extent(trans, extent_root, 1629 start, end + 1 - start, 1630 extent_root->root_key.objectid, 1631 0, 0, 0, 0, 0); 1632 if (ret) 1633 err = ret; 1634 } 1635 return err; 1636 } 1637 1638 /* 1639 * remove an extent from the root, returns 0 on success 1640 */ 1641 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 1642 struct btrfs_root *root, u64 bytenr, 1643 u64 num_bytes, u64 root_objectid, 1644 u64 ref_generation, u64 owner_objectid, 1645 u64 owner_offset, int pin) 1646 { 1647 struct btrfs_root *extent_root = root->fs_info->extent_root; 1648 int pending_ret; 1649 int ret; 1650 1651 WARN_ON(num_bytes < root->sectorsize); 1652 if (!root->ref_cows) 1653 ref_generation = 0; 1654 1655 if (root == extent_root) { 1656 pin_down_bytes(root, bytenr, num_bytes, 1); 1657 return 0; 1658 } 1659 ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, 1660 ref_generation, owner_objectid, owner_offset, 1661 pin, pin == 0); 1662 pending_ret = del_pending_extents(trans, root->fs_info->extent_root); 1663 return ret ? ret : pending_ret; 1664 } 1665 1666 int btrfs_free_extent(struct btrfs_trans_handle *trans, 1667 struct btrfs_root *root, u64 bytenr, 1668 u64 num_bytes, u64 root_objectid, 1669 u64 ref_generation, u64 owner_objectid, 1670 u64 owner_offset, int pin) 1671 { 1672 int ret; 1673 1674 maybe_lock_mutex(root); 1675 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, 1676 root_objectid, ref_generation, 1677 owner_objectid, owner_offset, pin); 1678 maybe_unlock_mutex(root); 1679 return ret; 1680 } 1681 1682 static u64 stripe_align(struct btrfs_root *root, u64 val) 1683 { 1684 u64 mask = ((u64)root->stripesize - 1); 1685 u64 ret = (val + mask) & ~mask; 1686 return ret; 1687 } 1688 1689 /* 1690 * walks the btree of allocated extents and find a hole of a given size. 1691 * The key ins is changed to record the hole: 1692 * ins->objectid == block start 1693 * ins->flags = BTRFS_EXTENT_ITEM_KEY 1694 * ins->offset == number of blocks 1695 * Any available blocks before search_start are skipped. 1696 */ 1697 static int noinline find_free_extent(struct btrfs_trans_handle *trans, 1698 struct btrfs_root *orig_root, 1699 u64 num_bytes, u64 empty_size, 1700 u64 search_start, u64 search_end, 1701 u64 hint_byte, struct btrfs_key *ins, 1702 u64 exclude_start, u64 exclude_nr, 1703 int data) 1704 { 1705 int ret; 1706 u64 orig_search_start; 1707 struct btrfs_root * root = orig_root->fs_info->extent_root; 1708 struct btrfs_fs_info *info = root->fs_info; 1709 u64 total_needed = num_bytes; 1710 u64 *last_ptr = NULL; 1711 struct btrfs_block_group_cache *block_group; 1712 int full_scan = 0; 1713 int wrapped = 0; 1714 int chunk_alloc_done = 0; 1715 int empty_cluster = 2 * 1024 * 1024; 1716 int allowed_chunk_alloc = 0; 1717 1718 WARN_ON(num_bytes < root->sectorsize); 1719 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 1720 1721 if (orig_root->ref_cows || empty_size) 1722 allowed_chunk_alloc = 1; 1723 1724 if (data & BTRFS_BLOCK_GROUP_METADATA) { 1725 last_ptr = &root->fs_info->last_alloc; 1726 empty_cluster = 256 * 1024; 1727 } 1728 1729 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 1730 last_ptr = &root->fs_info->last_data_alloc; 1731 } 1732 1733 if (last_ptr) { 1734 if (*last_ptr) 1735 hint_byte = *last_ptr; 1736 else { 1737 empty_size += empty_cluster; 1738 } 1739 } 1740 1741 search_start = max(search_start, first_logical_byte(root, 0)); 1742 orig_search_start = search_start; 1743 1744 if (search_end == (u64)-1) 1745 search_end = btrfs_super_total_bytes(&info->super_copy); 1746 1747 if (hint_byte) { 1748 block_group = btrfs_lookup_first_block_group(info, hint_byte); 1749 if (!block_group) 1750 hint_byte = search_start; 1751 block_group = __btrfs_find_block_group(root, block_group, 1752 hint_byte, data, 1); 1753 if (last_ptr && *last_ptr == 0 && block_group) 1754 hint_byte = block_group->key.objectid; 1755 } else { 1756 block_group = __btrfs_find_block_group(root, 1757 trans->block_group, 1758 search_start, data, 1); 1759 } 1760 search_start = max(search_start, hint_byte); 1761 1762 total_needed += empty_size; 1763 1764 check_failed: 1765 if (!block_group) { 1766 block_group = btrfs_lookup_first_block_group(info, 1767 search_start); 1768 if (!block_group) 1769 block_group = btrfs_lookup_first_block_group(info, 1770 orig_search_start); 1771 } 1772 if (full_scan && !chunk_alloc_done) { 1773 if (allowed_chunk_alloc) { 1774 do_chunk_alloc(trans, root, 1775 num_bytes + 2 * 1024 * 1024, data, 1); 1776 allowed_chunk_alloc = 0; 1777 } else if (block_group && block_group_bits(block_group, data)) { 1778 block_group->space_info->force_alloc = 1; 1779 } 1780 chunk_alloc_done = 1; 1781 } 1782 ret = find_search_start(root, &block_group, &search_start, 1783 total_needed, data); 1784 if (ret == -ENOSPC && last_ptr && *last_ptr) { 1785 *last_ptr = 0; 1786 block_group = btrfs_lookup_first_block_group(info, 1787 orig_search_start); 1788 search_start = orig_search_start; 1789 ret = find_search_start(root, &block_group, &search_start, 1790 total_needed, data); 1791 } 1792 if (ret == -ENOSPC) 1793 goto enospc; 1794 if (ret) 1795 goto error; 1796 1797 if (last_ptr && *last_ptr && search_start != *last_ptr) { 1798 *last_ptr = 0; 1799 if (!empty_size) { 1800 empty_size += empty_cluster; 1801 total_needed += empty_size; 1802 } 1803 block_group = btrfs_lookup_first_block_group(info, 1804 orig_search_start); 1805 search_start = orig_search_start; 1806 ret = find_search_start(root, &block_group, 1807 &search_start, total_needed, data); 1808 if (ret == -ENOSPC) 1809 goto enospc; 1810 if (ret) 1811 goto error; 1812 } 1813 1814 search_start = stripe_align(root, search_start); 1815 ins->objectid = search_start; 1816 ins->offset = num_bytes; 1817 1818 if (ins->objectid + num_bytes >= search_end) 1819 goto enospc; 1820 1821 if (ins->objectid + num_bytes > 1822 block_group->key.objectid + block_group->key.offset) { 1823 search_start = block_group->key.objectid + 1824 block_group->key.offset; 1825 goto new_group; 1826 } 1827 1828 if (test_range_bit(&info->extent_ins, ins->objectid, 1829 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { 1830 search_start = ins->objectid + num_bytes; 1831 goto new_group; 1832 } 1833 1834 if (test_range_bit(&info->pinned_extents, ins->objectid, 1835 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { 1836 search_start = ins->objectid + num_bytes; 1837 goto new_group; 1838 } 1839 1840 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && 1841 ins->objectid < exclude_start + exclude_nr)) { 1842 search_start = exclude_start + exclude_nr; 1843 goto new_group; 1844 } 1845 1846 if (!(data & BTRFS_BLOCK_GROUP_DATA)) { 1847 block_group = btrfs_lookup_block_group(info, ins->objectid); 1848 if (block_group) 1849 trans->block_group = block_group; 1850 } 1851 ins->offset = num_bytes; 1852 if (last_ptr) { 1853 *last_ptr = ins->objectid + ins->offset; 1854 if (*last_ptr == 1855 btrfs_super_total_bytes(&root->fs_info->super_copy)) { 1856 *last_ptr = 0; 1857 } 1858 } 1859 return 0; 1860 1861 new_group: 1862 if (search_start + num_bytes >= search_end) { 1863 enospc: 1864 search_start = orig_search_start; 1865 if (full_scan) { 1866 ret = -ENOSPC; 1867 goto error; 1868 } 1869 if (wrapped) { 1870 if (!full_scan) 1871 total_needed -= empty_size; 1872 full_scan = 1; 1873 } else 1874 wrapped = 1; 1875 } 1876 block_group = btrfs_lookup_first_block_group(info, search_start); 1877 cond_resched(); 1878 block_group = __btrfs_find_block_group(root, block_group, 1879 search_start, data, 0); 1880 goto check_failed; 1881 1882 error: 1883 return ret; 1884 } 1885 1886 /* 1887 * finds a free extent and does all the dirty work required for allocation 1888 * returns the key for the extent through ins, and a tree buffer for 1889 * the first block of the extent through buf. 1890 * 1891 * returns 0 if everything worked, non-zero otherwise. 1892 */ 1893 int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1894 struct btrfs_root *root, 1895 u64 num_bytes, u64 min_alloc_size, 1896 u64 root_objectid, u64 ref_generation, 1897 u64 owner, u64 owner_offset, 1898 u64 empty_size, u64 hint_byte, 1899 u64 search_end, struct btrfs_key *ins, u64 data) 1900 { 1901 int ret; 1902 int pending_ret; 1903 u64 super_used; 1904 u64 root_used; 1905 u64 search_start = 0; 1906 u64 alloc_profile; 1907 u32 sizes[2]; 1908 struct btrfs_fs_info *info = root->fs_info; 1909 struct btrfs_root *extent_root = info->extent_root; 1910 struct btrfs_extent_item *extent_item; 1911 struct btrfs_extent_ref *ref; 1912 struct btrfs_path *path; 1913 struct btrfs_key keys[2]; 1914 1915 maybe_lock_mutex(root); 1916 1917 if (data) { 1918 alloc_profile = info->avail_data_alloc_bits & 1919 info->data_alloc_profile; 1920 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 1921 } else if (root == root->fs_info->chunk_root) { 1922 alloc_profile = info->avail_system_alloc_bits & 1923 info->system_alloc_profile; 1924 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 1925 } else { 1926 alloc_profile = info->avail_metadata_alloc_bits & 1927 info->metadata_alloc_profile; 1928 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 1929 } 1930 again: 1931 data = reduce_alloc_profile(root, data); 1932 /* 1933 * the only place that sets empty_size is btrfs_realloc_node, which 1934 * is not called recursively on allocations 1935 */ 1936 if (empty_size || root->ref_cows) { 1937 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 1938 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 1939 2 * 1024 * 1024, 1940 BTRFS_BLOCK_GROUP_METADATA | 1941 (info->metadata_alloc_profile & 1942 info->avail_metadata_alloc_bits), 0); 1943 BUG_ON(ret); 1944 } 1945 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 1946 num_bytes + 2 * 1024 * 1024, data, 0); 1947 BUG_ON(ret); 1948 } 1949 1950 WARN_ON(num_bytes < root->sectorsize); 1951 ret = find_free_extent(trans, root, num_bytes, empty_size, 1952 search_start, search_end, hint_byte, ins, 1953 trans->alloc_exclude_start, 1954 trans->alloc_exclude_nr, data); 1955 1956 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 1957 num_bytes = num_bytes >> 1; 1958 num_bytes = max(num_bytes, min_alloc_size); 1959 do_chunk_alloc(trans, root->fs_info->extent_root, 1960 num_bytes, data, 1); 1961 goto again; 1962 } 1963 if (ret) { 1964 printk("allocation failed flags %Lu\n", data); 1965 } 1966 if (ret) { 1967 BUG(); 1968 goto out; 1969 } 1970 1971 /* block accounting for super block */ 1972 spin_lock_irq(&info->delalloc_lock); 1973 super_used = btrfs_super_bytes_used(&info->super_copy); 1974 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); 1975 spin_unlock_irq(&info->delalloc_lock); 1976 1977 /* block accounting for root item */ 1978 root_used = btrfs_root_used(&root->root_item); 1979 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 1980 1981 clear_extent_dirty(&root->fs_info->free_space_cache, 1982 ins->objectid, ins->objectid + ins->offset - 1, 1983 GFP_NOFS); 1984 1985 if (root == extent_root) { 1986 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 1987 ins->objectid + ins->offset - 1, 1988 EXTENT_LOCKED, GFP_NOFS); 1989 goto update_block; 1990 } 1991 1992 WARN_ON(trans->alloc_exclude_nr); 1993 trans->alloc_exclude_start = ins->objectid; 1994 trans->alloc_exclude_nr = ins->offset; 1995 1996 memcpy(&keys[0], ins, sizeof(*ins)); 1997 keys[1].offset = hash_extent_ref(root_objectid, ref_generation, 1998 owner, owner_offset); 1999 keys[1].objectid = ins->objectid; 2000 keys[1].type = BTRFS_EXTENT_REF_KEY; 2001 sizes[0] = sizeof(*extent_item); 2002 sizes[1] = sizeof(*ref); 2003 2004 path = btrfs_alloc_path(); 2005 BUG_ON(!path); 2006 2007 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 2008 sizes, 2); 2009 2010 BUG_ON(ret); 2011 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 2012 struct btrfs_extent_item); 2013 btrfs_set_extent_refs(path->nodes[0], extent_item, 1); 2014 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 2015 struct btrfs_extent_ref); 2016 2017 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 2018 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 2019 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 2020 btrfs_set_ref_offset(path->nodes[0], ref, owner_offset); 2021 2022 btrfs_mark_buffer_dirty(path->nodes[0]); 2023 2024 trans->alloc_exclude_start = 0; 2025 trans->alloc_exclude_nr = 0; 2026 btrfs_free_path(path); 2027 finish_current_insert(trans, extent_root); 2028 pending_ret = del_pending_extents(trans, extent_root); 2029 2030 if (ret) 2031 goto out; 2032 if (pending_ret) { 2033 ret = pending_ret; 2034 goto out; 2035 } 2036 2037 update_block: 2038 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); 2039 if (ret) { 2040 printk("update block group failed for %Lu %Lu\n", 2041 ins->objectid, ins->offset); 2042 BUG(); 2043 } 2044 out: 2045 maybe_unlock_mutex(root); 2046 return ret; 2047 } 2048 /* 2049 * helper function to allocate a block for a given tree 2050 * returns the tree buffer or NULL. 2051 */ 2052 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 2053 struct btrfs_root *root, 2054 u32 blocksize, 2055 u64 root_objectid, 2056 u64 ref_generation, 2057 u64 first_objectid, 2058 int level, 2059 u64 hint, 2060 u64 empty_size) 2061 { 2062 struct btrfs_key ins; 2063 int ret; 2064 struct extent_buffer *buf; 2065 2066 ret = btrfs_alloc_extent(trans, root, blocksize, blocksize, 2067 root_objectid, ref_generation, 2068 level, first_objectid, empty_size, hint, 2069 (u64)-1, &ins, 0); 2070 if (ret) { 2071 BUG_ON(ret > 0); 2072 return ERR_PTR(ret); 2073 } 2074 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); 2075 if (!buf) { 2076 btrfs_free_extent(trans, root, ins.objectid, blocksize, 2077 root->root_key.objectid, ref_generation, 2078 0, 0, 0); 2079 return ERR_PTR(-ENOMEM); 2080 } 2081 btrfs_set_header_generation(buf, trans->transid); 2082 btrfs_tree_lock(buf); 2083 clean_tree_block(trans, root, buf); 2084 btrfs_set_buffer_uptodate(buf); 2085 2086 if (PageDirty(buf->first_page)) { 2087 printk("page %lu dirty\n", buf->first_page->index); 2088 WARN_ON(1); 2089 } 2090 2091 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 2092 buf->start + buf->len - 1, GFP_NOFS); 2093 if (!btrfs_test_opt(root, SSD)) 2094 btrfs_set_buffer_defrag(buf); 2095 trans->blocks_used++; 2096 return buf; 2097 } 2098 2099 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, 2100 struct btrfs_root *root, 2101 struct extent_buffer *leaf) 2102 { 2103 u64 leaf_owner; 2104 u64 leaf_generation; 2105 struct btrfs_key key; 2106 struct btrfs_file_extent_item *fi; 2107 int i; 2108 int nritems; 2109 int ret; 2110 2111 BUG_ON(!btrfs_is_leaf(leaf)); 2112 nritems = btrfs_header_nritems(leaf); 2113 leaf_owner = btrfs_header_owner(leaf); 2114 leaf_generation = btrfs_header_generation(leaf); 2115 2116 for (i = 0; i < nritems; i++) { 2117 u64 disk_bytenr; 2118 2119 btrfs_item_key_to_cpu(leaf, &key, i); 2120 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2121 continue; 2122 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 2123 if (btrfs_file_extent_type(leaf, fi) == 2124 BTRFS_FILE_EXTENT_INLINE) 2125 continue; 2126 /* 2127 * FIXME make sure to insert a trans record that 2128 * repeats the snapshot del on crash 2129 */ 2130 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 2131 if (disk_bytenr == 0) 2132 continue; 2133 ret = __btrfs_free_extent(trans, root, disk_bytenr, 2134 btrfs_file_extent_disk_num_bytes(leaf, fi), 2135 leaf_owner, leaf_generation, 2136 key.objectid, key.offset, 0); 2137 BUG_ON(ret); 2138 } 2139 return 0; 2140 } 2141 2142 static void noinline reada_walk_down(struct btrfs_root *root, 2143 struct extent_buffer *node, 2144 int slot) 2145 { 2146 u64 bytenr; 2147 u64 last = 0; 2148 u32 nritems; 2149 u32 refs; 2150 u32 blocksize; 2151 int ret; 2152 int i; 2153 int level; 2154 int skipped = 0; 2155 2156 nritems = btrfs_header_nritems(node); 2157 level = btrfs_header_level(node); 2158 if (level) 2159 return; 2160 2161 for (i = slot; i < nritems && skipped < 32; i++) { 2162 bytenr = btrfs_node_blockptr(node, i); 2163 if (last && ((bytenr > last && bytenr - last > 32 * 1024) || 2164 (last > bytenr && last - bytenr > 32 * 1024))) { 2165 skipped++; 2166 continue; 2167 } 2168 blocksize = btrfs_level_size(root, level - 1); 2169 if (i != slot) { 2170 ret = lookup_extent_ref(NULL, root, bytenr, 2171 blocksize, &refs); 2172 BUG_ON(ret); 2173 if (refs != 1) { 2174 skipped++; 2175 continue; 2176 } 2177 } 2178 mutex_unlock(&root->fs_info->alloc_mutex); 2179 ret = readahead_tree_block(root, bytenr, blocksize, 2180 btrfs_node_ptr_generation(node, i)); 2181 last = bytenr + blocksize; 2182 cond_resched(); 2183 mutex_lock(&root->fs_info->alloc_mutex); 2184 if (ret) 2185 break; 2186 } 2187 } 2188 2189 /* 2190 * helper function for drop_snapshot, this walks down the tree dropping ref 2191 * counts as it goes. 2192 */ 2193 static int noinline walk_down_tree(struct btrfs_trans_handle *trans, 2194 struct btrfs_root *root, 2195 struct btrfs_path *path, int *level) 2196 { 2197 u64 root_owner; 2198 u64 root_gen; 2199 u64 bytenr; 2200 u64 ptr_gen; 2201 struct extent_buffer *next; 2202 struct extent_buffer *cur; 2203 struct extent_buffer *parent; 2204 u32 blocksize; 2205 int ret; 2206 u32 refs; 2207 2208 mutex_lock(&root->fs_info->alloc_mutex); 2209 2210 WARN_ON(*level < 0); 2211 WARN_ON(*level >= BTRFS_MAX_LEVEL); 2212 ret = lookup_extent_ref(trans, root, 2213 path->nodes[*level]->start, 2214 path->nodes[*level]->len, &refs); 2215 BUG_ON(ret); 2216 if (refs > 1) 2217 goto out; 2218 2219 /* 2220 * walk down to the last node level and free all the leaves 2221 */ 2222 while(*level >= 0) { 2223 WARN_ON(*level < 0); 2224 WARN_ON(*level >= BTRFS_MAX_LEVEL); 2225 cur = path->nodes[*level]; 2226 2227 if (btrfs_header_level(cur) != *level) 2228 WARN_ON(1); 2229 2230 if (path->slots[*level] >= 2231 btrfs_header_nritems(cur)) 2232 break; 2233 if (*level == 0) { 2234 ret = drop_leaf_ref(trans, root, cur); 2235 BUG_ON(ret); 2236 break; 2237 } 2238 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 2239 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 2240 blocksize = btrfs_level_size(root, *level - 1); 2241 2242 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); 2243 BUG_ON(ret); 2244 if (refs != 1) { 2245 parent = path->nodes[*level]; 2246 root_owner = btrfs_header_owner(parent); 2247 root_gen = btrfs_header_generation(parent); 2248 path->slots[*level]++; 2249 ret = __btrfs_free_extent(trans, root, bytenr, 2250 blocksize, root_owner, 2251 root_gen, 0, 0, 1); 2252 BUG_ON(ret); 2253 continue; 2254 } 2255 next = btrfs_find_tree_block(root, bytenr, blocksize); 2256 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { 2257 free_extent_buffer(next); 2258 reada_walk_down(root, cur, path->slots[*level]); 2259 2260 mutex_unlock(&root->fs_info->alloc_mutex); 2261 next = read_tree_block(root, bytenr, blocksize, 2262 ptr_gen); 2263 mutex_lock(&root->fs_info->alloc_mutex); 2264 2265 /* we've dropped the lock, double check */ 2266 ret = lookup_extent_ref(trans, root, bytenr, 2267 blocksize, &refs); 2268 BUG_ON(ret); 2269 if (refs != 1) { 2270 parent = path->nodes[*level]; 2271 root_owner = btrfs_header_owner(parent); 2272 root_gen = btrfs_header_generation(parent); 2273 2274 path->slots[*level]++; 2275 free_extent_buffer(next); 2276 ret = __btrfs_free_extent(trans, root, bytenr, 2277 blocksize, 2278 root_owner, 2279 root_gen, 0, 0, 1); 2280 BUG_ON(ret); 2281 continue; 2282 } 2283 } 2284 WARN_ON(*level <= 0); 2285 if (path->nodes[*level-1]) 2286 free_extent_buffer(path->nodes[*level-1]); 2287 path->nodes[*level-1] = next; 2288 *level = btrfs_header_level(next); 2289 path->slots[*level] = 0; 2290 } 2291 out: 2292 WARN_ON(*level < 0); 2293 WARN_ON(*level >= BTRFS_MAX_LEVEL); 2294 2295 if (path->nodes[*level] == root->node) { 2296 root_owner = root->root_key.objectid; 2297 parent = path->nodes[*level]; 2298 } else { 2299 parent = path->nodes[*level + 1]; 2300 root_owner = btrfs_header_owner(parent); 2301 } 2302 2303 root_gen = btrfs_header_generation(parent); 2304 ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start, 2305 path->nodes[*level]->len, 2306 root_owner, root_gen, 0, 0, 1); 2307 free_extent_buffer(path->nodes[*level]); 2308 path->nodes[*level] = NULL; 2309 *level += 1; 2310 BUG_ON(ret); 2311 mutex_unlock(&root->fs_info->alloc_mutex); 2312 return 0; 2313 } 2314 2315 /* 2316 * helper for dropping snapshots. This walks back up the tree in the path 2317 * to find the first node higher up where we haven't yet gone through 2318 * all the slots 2319 */ 2320 static int noinline walk_up_tree(struct btrfs_trans_handle *trans, 2321 struct btrfs_root *root, 2322 struct btrfs_path *path, int *level) 2323 { 2324 u64 root_owner; 2325 u64 root_gen; 2326 struct btrfs_root_item *root_item = &root->root_item; 2327 int i; 2328 int slot; 2329 int ret; 2330 2331 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 2332 slot = path->slots[i]; 2333 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 2334 struct extent_buffer *node; 2335 struct btrfs_disk_key disk_key; 2336 node = path->nodes[i]; 2337 path->slots[i]++; 2338 *level = i; 2339 WARN_ON(*level == 0); 2340 btrfs_node_key(node, &disk_key, path->slots[i]); 2341 memcpy(&root_item->drop_progress, 2342 &disk_key, sizeof(disk_key)); 2343 root_item->drop_level = i; 2344 return 0; 2345 } else { 2346 if (path->nodes[*level] == root->node) { 2347 root_owner = root->root_key.objectid; 2348 root_gen = 2349 btrfs_header_generation(path->nodes[*level]); 2350 } else { 2351 struct extent_buffer *node; 2352 node = path->nodes[*level + 1]; 2353 root_owner = btrfs_header_owner(node); 2354 root_gen = btrfs_header_generation(node); 2355 } 2356 ret = btrfs_free_extent(trans, root, 2357 path->nodes[*level]->start, 2358 path->nodes[*level]->len, 2359 root_owner, root_gen, 0, 0, 1); 2360 BUG_ON(ret); 2361 free_extent_buffer(path->nodes[*level]); 2362 path->nodes[*level] = NULL; 2363 *level = i + 1; 2364 } 2365 } 2366 return 1; 2367 } 2368 2369 /* 2370 * drop the reference count on the tree rooted at 'snap'. This traverses 2371 * the tree freeing any blocks that have a ref count of zero after being 2372 * decremented. 2373 */ 2374 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 2375 *root) 2376 { 2377 int ret = 0; 2378 int wret; 2379 int level; 2380 struct btrfs_path *path; 2381 int i; 2382 int orig_level; 2383 struct btrfs_root_item *root_item = &root->root_item; 2384 2385 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); 2386 path = btrfs_alloc_path(); 2387 BUG_ON(!path); 2388 2389 level = btrfs_header_level(root->node); 2390 orig_level = level; 2391 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 2392 path->nodes[level] = root->node; 2393 extent_buffer_get(root->node); 2394 path->slots[level] = 0; 2395 } else { 2396 struct btrfs_key key; 2397 struct btrfs_disk_key found_key; 2398 struct extent_buffer *node; 2399 2400 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 2401 level = root_item->drop_level; 2402 path->lowest_level = level; 2403 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2404 if (wret < 0) { 2405 ret = wret; 2406 goto out; 2407 } 2408 node = path->nodes[level]; 2409 btrfs_node_key(node, &found_key, path->slots[level]); 2410 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 2411 sizeof(found_key))); 2412 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 2413 if (path->nodes[i] && path->locks[i]) { 2414 path->locks[i] = 0; 2415 btrfs_tree_unlock(path->nodes[i]); 2416 } 2417 } 2418 } 2419 while(1) { 2420 wret = walk_down_tree(trans, root, path, &level); 2421 if (wret > 0) 2422 break; 2423 if (wret < 0) 2424 ret = wret; 2425 2426 wret = walk_up_tree(trans, root, path, &level); 2427 if (wret > 0) 2428 break; 2429 if (wret < 0) 2430 ret = wret; 2431 ret = -EAGAIN; 2432 break; 2433 } 2434 for (i = 0; i <= orig_level; i++) { 2435 if (path->nodes[i]) { 2436 free_extent_buffer(path->nodes[i]); 2437 path->nodes[i] = NULL; 2438 } 2439 } 2440 out: 2441 btrfs_free_path(path); 2442 return ret; 2443 } 2444 2445 int btrfs_free_block_groups(struct btrfs_fs_info *info) 2446 { 2447 u64 start; 2448 u64 end; 2449 u64 ptr; 2450 int ret; 2451 2452 mutex_lock(&info->alloc_mutex); 2453 while(1) { 2454 ret = find_first_extent_bit(&info->block_group_cache, 0, 2455 &start, &end, (unsigned int)-1); 2456 if (ret) 2457 break; 2458 ret = get_state_private(&info->block_group_cache, start, &ptr); 2459 if (!ret) 2460 kfree((void *)(unsigned long)ptr); 2461 clear_extent_bits(&info->block_group_cache, start, 2462 end, (unsigned int)-1, GFP_NOFS); 2463 } 2464 while(1) { 2465 ret = find_first_extent_bit(&info->free_space_cache, 0, 2466 &start, &end, EXTENT_DIRTY); 2467 if (ret) 2468 break; 2469 clear_extent_dirty(&info->free_space_cache, start, 2470 end, GFP_NOFS); 2471 } 2472 mutex_unlock(&info->alloc_mutex); 2473 return 0; 2474 } 2475 2476 static unsigned long calc_ra(unsigned long start, unsigned long last, 2477 unsigned long nr) 2478 { 2479 return min(last, start + nr - 1); 2480 } 2481 2482 static int noinline relocate_inode_pages(struct inode *inode, u64 start, 2483 u64 len) 2484 { 2485 u64 page_start; 2486 u64 page_end; 2487 unsigned long last_index; 2488 unsigned long i; 2489 struct page *page; 2490 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2491 struct file_ra_state *ra; 2492 unsigned long total_read = 0; 2493 unsigned long ra_pages; 2494 struct btrfs_trans_handle *trans; 2495 2496 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2497 2498 mutex_lock(&inode->i_mutex); 2499 i = start >> PAGE_CACHE_SHIFT; 2500 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 2501 2502 ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; 2503 2504 file_ra_state_init(ra, inode->i_mapping); 2505 2506 for (; i <= last_index; i++) { 2507 if (total_read % ra_pages == 0) { 2508 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 2509 calc_ra(i, last_index, ra_pages)); 2510 } 2511 total_read++; 2512 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) 2513 goto truncate_racing; 2514 2515 page = grab_cache_page(inode->i_mapping, i); 2516 if (!page) { 2517 goto out_unlock; 2518 } 2519 if (!PageUptodate(page)) { 2520 btrfs_readpage(NULL, page); 2521 lock_page(page); 2522 if (!PageUptodate(page)) { 2523 unlock_page(page); 2524 page_cache_release(page); 2525 goto out_unlock; 2526 } 2527 } 2528 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) 2529 ClearPageDirty(page); 2530 #else 2531 cancel_dirty_page(page, PAGE_CACHE_SIZE); 2532 #endif 2533 wait_on_page_writeback(page); 2534 set_page_extent_mapped(page); 2535 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2536 page_end = page_start + PAGE_CACHE_SIZE - 1; 2537 2538 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2539 2540 set_extent_delalloc(io_tree, page_start, 2541 page_end, GFP_NOFS); 2542 set_page_dirty(page); 2543 2544 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2545 unlock_page(page); 2546 page_cache_release(page); 2547 } 2548 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 2549 total_read); 2550 2551 out_unlock: 2552 kfree(ra); 2553 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); 2554 if (trans) { 2555 btrfs_add_ordered_inode(inode); 2556 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 2557 mark_inode_dirty(inode); 2558 } 2559 mutex_unlock(&inode->i_mutex); 2560 return 0; 2561 2562 truncate_racing: 2563 vmtruncate(inode, inode->i_size); 2564 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 2565 total_read); 2566 goto out_unlock; 2567 } 2568 2569 /* 2570 * The back references tell us which tree holds a ref on a block, 2571 * but it is possible for the tree root field in the reference to 2572 * reflect the original root before a snapshot was made. In this 2573 * case we should search through all the children of a given root 2574 * to find potential holders of references on a block. 2575 * 2576 * Instead, we do something a little less fancy and just search 2577 * all the roots for a given key/block combination. 2578 */ 2579 static int find_root_for_ref(struct btrfs_root *root, 2580 struct btrfs_path *path, 2581 struct btrfs_key *key0, 2582 int level, 2583 int file_key, 2584 struct btrfs_root **found_root, 2585 u64 bytenr) 2586 { 2587 struct btrfs_key root_location; 2588 struct btrfs_root *cur_root = *found_root; 2589 struct btrfs_file_extent_item *file_extent; 2590 u64 root_search_start = BTRFS_FS_TREE_OBJECTID; 2591 u64 found_bytenr; 2592 int ret; 2593 int i; 2594 2595 root_location.offset = (u64)-1; 2596 root_location.type = BTRFS_ROOT_ITEM_KEY; 2597 path->lowest_level = level; 2598 path->reada = 0; 2599 while(1) { 2600 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); 2601 found_bytenr = 0; 2602 if (ret == 0 && file_key) { 2603 struct extent_buffer *leaf = path->nodes[0]; 2604 file_extent = btrfs_item_ptr(leaf, path->slots[0], 2605 struct btrfs_file_extent_item); 2606 if (btrfs_file_extent_type(leaf, file_extent) == 2607 BTRFS_FILE_EXTENT_REG) { 2608 found_bytenr = 2609 btrfs_file_extent_disk_bytenr(leaf, 2610 file_extent); 2611 } 2612 } else if (!file_key) { 2613 if (path->nodes[level]) 2614 found_bytenr = path->nodes[level]->start; 2615 } 2616 2617 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2618 if (!path->nodes[i]) 2619 break; 2620 free_extent_buffer(path->nodes[i]); 2621 path->nodes[i] = NULL; 2622 } 2623 btrfs_release_path(cur_root, path); 2624 2625 if (found_bytenr == bytenr) { 2626 *found_root = cur_root; 2627 ret = 0; 2628 goto out; 2629 } 2630 ret = btrfs_search_root(root->fs_info->tree_root, 2631 root_search_start, &root_search_start); 2632 if (ret) 2633 break; 2634 2635 root_location.objectid = root_search_start; 2636 cur_root = btrfs_read_fs_root_no_name(root->fs_info, 2637 &root_location); 2638 if (!cur_root) { 2639 ret = 1; 2640 break; 2641 } 2642 } 2643 out: 2644 path->lowest_level = 0; 2645 return ret; 2646 } 2647 2648 /* 2649 * note, this releases the path 2650 */ 2651 static int noinline relocate_one_reference(struct btrfs_root *extent_root, 2652 struct btrfs_path *path, 2653 struct btrfs_key *extent_key, 2654 u64 *last_file_objectid, 2655 u64 *last_file_offset, 2656 u64 *last_file_root, 2657 u64 last_extent) 2658 { 2659 struct inode *inode; 2660 struct btrfs_root *found_root; 2661 struct btrfs_key root_location; 2662 struct btrfs_key found_key; 2663 struct btrfs_extent_ref *ref; 2664 u64 ref_root; 2665 u64 ref_gen; 2666 u64 ref_objectid; 2667 u64 ref_offset; 2668 int ret; 2669 int level; 2670 2671 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 2672 struct btrfs_extent_ref); 2673 ref_root = btrfs_ref_root(path->nodes[0], ref); 2674 ref_gen = btrfs_ref_generation(path->nodes[0], ref); 2675 ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); 2676 ref_offset = btrfs_ref_offset(path->nodes[0], ref); 2677 btrfs_release_path(extent_root, path); 2678 2679 root_location.objectid = ref_root; 2680 if (ref_gen == 0) 2681 root_location.offset = 0; 2682 else 2683 root_location.offset = (u64)-1; 2684 root_location.type = BTRFS_ROOT_ITEM_KEY; 2685 2686 found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, 2687 &root_location); 2688 BUG_ON(!found_root); 2689 2690 if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 2691 found_key.objectid = ref_objectid; 2692 found_key.type = BTRFS_EXTENT_DATA_KEY; 2693 found_key.offset = ref_offset; 2694 level = 0; 2695 2696 if (last_extent == extent_key->objectid && 2697 *last_file_objectid == ref_objectid && 2698 *last_file_offset == ref_offset && 2699 *last_file_root == ref_root) 2700 goto out; 2701 2702 ret = find_root_for_ref(extent_root, path, &found_key, 2703 level, 1, &found_root, 2704 extent_key->objectid); 2705 2706 if (ret) 2707 goto out; 2708 2709 if (last_extent == extent_key->objectid && 2710 *last_file_objectid == ref_objectid && 2711 *last_file_offset == ref_offset && 2712 *last_file_root == ref_root) 2713 goto out; 2714 2715 inode = btrfs_iget_locked(extent_root->fs_info->sb, 2716 ref_objectid, found_root); 2717 if (inode->i_state & I_NEW) { 2718 /* the inode and parent dir are two different roots */ 2719 BTRFS_I(inode)->root = found_root; 2720 BTRFS_I(inode)->location.objectid = ref_objectid; 2721 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 2722 BTRFS_I(inode)->location.offset = 0; 2723 btrfs_read_locked_inode(inode); 2724 unlock_new_inode(inode); 2725 2726 } 2727 /* this can happen if the reference is not against 2728 * the latest version of the tree root 2729 */ 2730 if (is_bad_inode(inode)) { 2731 goto out; 2732 } 2733 *last_file_objectid = inode->i_ino; 2734 *last_file_root = found_root->root_key.objectid; 2735 *last_file_offset = ref_offset; 2736 2737 relocate_inode_pages(inode, ref_offset, extent_key->offset); 2738 iput(inode); 2739 } else { 2740 struct btrfs_trans_handle *trans; 2741 struct extent_buffer *eb; 2742 int i; 2743 2744 eb = read_tree_block(found_root, extent_key->objectid, 2745 extent_key->offset, 0); 2746 btrfs_tree_lock(eb); 2747 level = btrfs_header_level(eb); 2748 2749 if (level == 0) 2750 btrfs_item_key_to_cpu(eb, &found_key, 0); 2751 else 2752 btrfs_node_key_to_cpu(eb, &found_key, 0); 2753 2754 btrfs_tree_unlock(eb); 2755 free_extent_buffer(eb); 2756 2757 ret = find_root_for_ref(extent_root, path, &found_key, 2758 level, 0, &found_root, 2759 extent_key->objectid); 2760 2761 if (ret) 2762 goto out; 2763 2764 trans = btrfs_start_transaction(found_root, 1); 2765 2766 path->lowest_level = level; 2767 path->reada = 2; 2768 ret = btrfs_search_slot(trans, found_root, &found_key, path, 2769 0, 1); 2770 path->lowest_level = 0; 2771 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2772 if (!path->nodes[i]) 2773 break; 2774 free_extent_buffer(path->nodes[i]); 2775 path->nodes[i] = NULL; 2776 } 2777 btrfs_release_path(found_root, path); 2778 if (found_root == found_root->fs_info->extent_root) 2779 btrfs_extent_post_op(trans, found_root); 2780 btrfs_end_transaction(trans, found_root); 2781 } 2782 2783 out: 2784 return 0; 2785 } 2786 2787 static int noinline del_extent_zero(struct btrfs_root *extent_root, 2788 struct btrfs_path *path, 2789 struct btrfs_key *extent_key) 2790 { 2791 int ret; 2792 struct btrfs_trans_handle *trans; 2793 2794 trans = btrfs_start_transaction(extent_root, 1); 2795 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 2796 if (ret > 0) { 2797 ret = -EIO; 2798 goto out; 2799 } 2800 if (ret < 0) 2801 goto out; 2802 ret = btrfs_del_item(trans, extent_root, path); 2803 out: 2804 btrfs_end_transaction(trans, extent_root); 2805 return ret; 2806 } 2807 2808 static int noinline relocate_one_extent(struct btrfs_root *extent_root, 2809 struct btrfs_path *path, 2810 struct btrfs_key *extent_key) 2811 { 2812 struct btrfs_key key; 2813 struct btrfs_key found_key; 2814 struct extent_buffer *leaf; 2815 u64 last_file_objectid = 0; 2816 u64 last_file_root = 0; 2817 u64 last_file_offset = (u64)-1; 2818 u64 last_extent = 0; 2819 u32 nritems; 2820 u32 item_size; 2821 int ret = 0; 2822 2823 if (extent_key->objectid == 0) { 2824 ret = del_extent_zero(extent_root, path, extent_key); 2825 goto out; 2826 } 2827 key.objectid = extent_key->objectid; 2828 key.type = BTRFS_EXTENT_REF_KEY; 2829 key.offset = 0; 2830 2831 while(1) { 2832 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2833 2834 if (ret < 0) 2835 goto out; 2836 2837 ret = 0; 2838 leaf = path->nodes[0]; 2839 nritems = btrfs_header_nritems(leaf); 2840 if (path->slots[0] == nritems) { 2841 ret = btrfs_next_leaf(extent_root, path); 2842 if (ret > 0) { 2843 ret = 0; 2844 goto out; 2845 } 2846 if (ret < 0) 2847 goto out; 2848 leaf = path->nodes[0]; 2849 } 2850 2851 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2852 if (found_key.objectid != extent_key->objectid) { 2853 break; 2854 } 2855 2856 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 2857 break; 2858 } 2859 2860 key.offset = found_key.offset + 1; 2861 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2862 2863 ret = relocate_one_reference(extent_root, path, extent_key, 2864 &last_file_objectid, 2865 &last_file_offset, 2866 &last_file_root, last_extent); 2867 if (ret) 2868 goto out; 2869 last_extent = extent_key->objectid; 2870 } 2871 ret = 0; 2872 out: 2873 btrfs_release_path(extent_root, path); 2874 return ret; 2875 } 2876 2877 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 2878 { 2879 u64 num_devices; 2880 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 2881 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 2882 2883 num_devices = root->fs_info->fs_devices->num_devices; 2884 if (num_devices == 1) { 2885 stripped |= BTRFS_BLOCK_GROUP_DUP; 2886 stripped = flags & ~stripped; 2887 2888 /* turn raid0 into single device chunks */ 2889 if (flags & BTRFS_BLOCK_GROUP_RAID0) 2890 return stripped; 2891 2892 /* turn mirroring into duplication */ 2893 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2894 BTRFS_BLOCK_GROUP_RAID10)) 2895 return stripped | BTRFS_BLOCK_GROUP_DUP; 2896 return flags; 2897 } else { 2898 /* they already had raid on here, just return */ 2899 if (flags & stripped) 2900 return flags; 2901 2902 stripped |= BTRFS_BLOCK_GROUP_DUP; 2903 stripped = flags & ~stripped; 2904 2905 /* switch duplicated blocks with raid1 */ 2906 if (flags & BTRFS_BLOCK_GROUP_DUP) 2907 return stripped | BTRFS_BLOCK_GROUP_RAID1; 2908 2909 /* turn single device chunks into raid0 */ 2910 return stripped | BTRFS_BLOCK_GROUP_RAID0; 2911 } 2912 return flags; 2913 } 2914 2915 int __alloc_chunk_for_shrink(struct btrfs_root *root, 2916 struct btrfs_block_group_cache *shrink_block_group, 2917 int force) 2918 { 2919 struct btrfs_trans_handle *trans; 2920 u64 new_alloc_flags; 2921 u64 calc; 2922 2923 if (btrfs_block_group_used(&shrink_block_group->item) > 0) { 2924 2925 trans = btrfs_start_transaction(root, 1); 2926 new_alloc_flags = update_block_group_flags(root, 2927 shrink_block_group->flags); 2928 if (new_alloc_flags != shrink_block_group->flags) { 2929 calc = 2930 btrfs_block_group_used(&shrink_block_group->item); 2931 } else { 2932 calc = shrink_block_group->key.offset; 2933 } 2934 do_chunk_alloc(trans, root->fs_info->extent_root, 2935 calc + 2 * 1024 * 1024, new_alloc_flags, force); 2936 btrfs_end_transaction(trans, root); 2937 } 2938 return 0; 2939 } 2940 2941 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) 2942 { 2943 struct btrfs_trans_handle *trans; 2944 struct btrfs_root *tree_root = root->fs_info->tree_root; 2945 struct btrfs_path *path; 2946 u64 cur_byte; 2947 u64 total_found; 2948 u64 shrink_last_byte; 2949 struct btrfs_block_group_cache *shrink_block_group; 2950 struct btrfs_fs_info *info = root->fs_info; 2951 struct btrfs_key key; 2952 struct btrfs_key found_key; 2953 struct extent_buffer *leaf; 2954 u32 nritems; 2955 int ret; 2956 int progress; 2957 2958 mutex_lock(&root->fs_info->alloc_mutex); 2959 shrink_block_group = btrfs_lookup_block_group(root->fs_info, 2960 shrink_start); 2961 BUG_ON(!shrink_block_group); 2962 2963 shrink_last_byte = shrink_block_group->key.objectid + 2964 shrink_block_group->key.offset; 2965 2966 shrink_block_group->space_info->total_bytes -= 2967 shrink_block_group->key.offset; 2968 path = btrfs_alloc_path(); 2969 root = root->fs_info->extent_root; 2970 path->reada = 2; 2971 2972 printk("btrfs relocating block group %llu flags %llu\n", 2973 (unsigned long long)shrink_start, 2974 (unsigned long long)shrink_block_group->flags); 2975 2976 __alloc_chunk_for_shrink(root, shrink_block_group, 1); 2977 2978 again: 2979 2980 shrink_block_group->ro = 1; 2981 2982 total_found = 0; 2983 progress = 0; 2984 key.objectid = shrink_start; 2985 key.offset = 0; 2986 key.type = 0; 2987 cur_byte = key.objectid; 2988 2989 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2990 if (ret < 0) 2991 goto out; 2992 2993 ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY); 2994 if (ret < 0) 2995 goto out; 2996 2997 if (ret == 0) { 2998 leaf = path->nodes[0]; 2999 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 3000 if (found_key.objectid + found_key.offset > shrink_start && 3001 found_key.objectid < shrink_last_byte) { 3002 cur_byte = found_key.objectid; 3003 key.objectid = cur_byte; 3004 } 3005 } 3006 btrfs_release_path(root, path); 3007 3008 while(1) { 3009 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3010 if (ret < 0) 3011 goto out; 3012 3013 leaf = path->nodes[0]; 3014 nritems = btrfs_header_nritems(leaf); 3015 next: 3016 if (path->slots[0] >= nritems) { 3017 ret = btrfs_next_leaf(root, path); 3018 if (ret < 0) 3019 goto out; 3020 if (ret == 1) { 3021 ret = 0; 3022 break; 3023 } 3024 leaf = path->nodes[0]; 3025 nritems = btrfs_header_nritems(leaf); 3026 } 3027 3028 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 3029 3030 if (found_key.objectid >= shrink_last_byte) 3031 break; 3032 3033 if (progress && need_resched()) { 3034 memcpy(&key, &found_key, sizeof(key)); 3035 cond_resched(); 3036 btrfs_release_path(root, path); 3037 btrfs_search_slot(NULL, root, &key, path, 0, 0); 3038 progress = 0; 3039 goto next; 3040 } 3041 progress = 1; 3042 3043 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || 3044 found_key.objectid + found_key.offset <= cur_byte) { 3045 memcpy(&key, &found_key, sizeof(key)); 3046 key.offset++; 3047 path->slots[0]++; 3048 goto next; 3049 } 3050 3051 total_found++; 3052 cur_byte = found_key.objectid + found_key.offset; 3053 key.objectid = cur_byte; 3054 btrfs_release_path(root, path); 3055 ret = relocate_one_extent(root, path, &found_key); 3056 __alloc_chunk_for_shrink(root, shrink_block_group, 0); 3057 } 3058 3059 btrfs_release_path(root, path); 3060 3061 if (total_found > 0) { 3062 printk("btrfs relocate found %llu last extent was %llu\n", 3063 (unsigned long long)total_found, 3064 (unsigned long long)found_key.objectid); 3065 trans = btrfs_start_transaction(tree_root, 1); 3066 btrfs_commit_transaction(trans, tree_root); 3067 3068 btrfs_clean_old_snapshots(tree_root); 3069 3070 trans = btrfs_start_transaction(tree_root, 1); 3071 btrfs_commit_transaction(trans, tree_root); 3072 goto again; 3073 } 3074 3075 /* 3076 * we've freed all the extents, now remove the block 3077 * group item from the tree 3078 */ 3079 trans = btrfs_start_transaction(root, 1); 3080 memcpy(&key, &shrink_block_group->key, sizeof(key)); 3081 3082 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 3083 if (ret > 0) 3084 ret = -EIO; 3085 if (ret < 0) 3086 goto out; 3087 3088 clear_extent_bits(&info->block_group_cache, key.objectid, 3089 key.objectid + key.offset - 1, 3090 (unsigned int)-1, GFP_NOFS); 3091 3092 3093 clear_extent_bits(&info->free_space_cache, 3094 key.objectid, key.objectid + key.offset - 1, 3095 (unsigned int)-1, GFP_NOFS); 3096 3097 memset(shrink_block_group, 0, sizeof(*shrink_block_group)); 3098 kfree(shrink_block_group); 3099 3100 btrfs_del_item(trans, root, path); 3101 btrfs_commit_transaction(trans, root); 3102 3103 /* the code to unpin extents might set a few bits in the free 3104 * space cache for this range again 3105 */ 3106 clear_extent_bits(&info->free_space_cache, 3107 key.objectid, key.objectid + key.offset - 1, 3108 (unsigned int)-1, GFP_NOFS); 3109 out: 3110 btrfs_free_path(path); 3111 mutex_unlock(&root->fs_info->alloc_mutex); 3112 return ret; 3113 } 3114 3115 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, 3116 struct btrfs_key *key) 3117 { 3118 int ret = 0; 3119 struct btrfs_key found_key; 3120 struct extent_buffer *leaf; 3121 int slot; 3122 3123 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 3124 if (ret < 0) 3125 goto out; 3126 3127 while(1) { 3128 slot = path->slots[0]; 3129 leaf = path->nodes[0]; 3130 if (slot >= btrfs_header_nritems(leaf)) { 3131 ret = btrfs_next_leaf(root, path); 3132 if (ret == 0) 3133 continue; 3134 if (ret < 0) 3135 goto out; 3136 break; 3137 } 3138 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3139 3140 if (found_key.objectid >= key->objectid && 3141 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 3142 ret = 0; 3143 goto out; 3144 } 3145 path->slots[0]++; 3146 } 3147 ret = -ENOENT; 3148 out: 3149 return ret; 3150 } 3151 3152 int btrfs_read_block_groups(struct btrfs_root *root) 3153 { 3154 struct btrfs_path *path; 3155 int ret; 3156 int bit; 3157 struct btrfs_block_group_cache *cache; 3158 struct btrfs_fs_info *info = root->fs_info; 3159 struct btrfs_space_info *space_info; 3160 struct extent_io_tree *block_group_cache; 3161 struct btrfs_key key; 3162 struct btrfs_key found_key; 3163 struct extent_buffer *leaf; 3164 3165 block_group_cache = &info->block_group_cache; 3166 root = info->extent_root; 3167 key.objectid = 0; 3168 key.offset = 0; 3169 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 3170 path = btrfs_alloc_path(); 3171 if (!path) 3172 return -ENOMEM; 3173 3174 mutex_lock(&root->fs_info->alloc_mutex); 3175 while(1) { 3176 ret = find_first_block_group(root, path, &key); 3177 if (ret > 0) { 3178 ret = 0; 3179 goto error; 3180 } 3181 if (ret != 0) 3182 goto error; 3183 3184 leaf = path->nodes[0]; 3185 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 3186 cache = kzalloc(sizeof(*cache), GFP_NOFS); 3187 if (!cache) { 3188 ret = -ENOMEM; 3189 break; 3190 } 3191 3192 read_extent_buffer(leaf, &cache->item, 3193 btrfs_item_ptr_offset(leaf, path->slots[0]), 3194 sizeof(cache->item)); 3195 memcpy(&cache->key, &found_key, sizeof(found_key)); 3196 3197 key.objectid = found_key.objectid + found_key.offset; 3198 btrfs_release_path(root, path); 3199 cache->flags = btrfs_block_group_flags(&cache->item); 3200 bit = 0; 3201 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { 3202 bit = BLOCK_GROUP_DATA; 3203 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { 3204 bit = BLOCK_GROUP_SYSTEM; 3205 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { 3206 bit = BLOCK_GROUP_METADATA; 3207 } 3208 set_avail_alloc_bits(info, cache->flags); 3209 3210 ret = update_space_info(info, cache->flags, found_key.offset, 3211 btrfs_block_group_used(&cache->item), 3212 &space_info); 3213 BUG_ON(ret); 3214 cache->space_info = space_info; 3215 3216 /* use EXTENT_LOCKED to prevent merging */ 3217 set_extent_bits(block_group_cache, found_key.objectid, 3218 found_key.objectid + found_key.offset - 1, 3219 bit | EXTENT_LOCKED, GFP_NOFS); 3220 set_state_private(block_group_cache, found_key.objectid, 3221 (unsigned long)cache); 3222 3223 if (key.objectid >= 3224 btrfs_super_total_bytes(&info->super_copy)) 3225 break; 3226 } 3227 ret = 0; 3228 error: 3229 btrfs_free_path(path); 3230 mutex_unlock(&root->fs_info->alloc_mutex); 3231 return ret; 3232 } 3233 3234 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 3235 struct btrfs_root *root, u64 bytes_used, 3236 u64 type, u64 chunk_objectid, u64 chunk_offset, 3237 u64 size) 3238 { 3239 int ret; 3240 int bit = 0; 3241 struct btrfs_root *extent_root; 3242 struct btrfs_block_group_cache *cache; 3243 struct extent_io_tree *block_group_cache; 3244 3245 extent_root = root->fs_info->extent_root; 3246 block_group_cache = &root->fs_info->block_group_cache; 3247 3248 cache = kzalloc(sizeof(*cache), GFP_NOFS); 3249 BUG_ON(!cache); 3250 cache->key.objectid = chunk_offset; 3251 cache->key.offset = size; 3252 btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY); 3253 3254 btrfs_set_block_group_used(&cache->item, bytes_used); 3255 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 3256 cache->flags = type; 3257 btrfs_set_block_group_flags(&cache->item, type); 3258 3259 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 3260 &cache->space_info); 3261 BUG_ON(ret); 3262 3263 bit = block_group_state_bits(type); 3264 set_extent_bits(block_group_cache, chunk_offset, 3265 chunk_offset + size - 1, 3266 bit | EXTENT_LOCKED, GFP_NOFS); 3267 3268 set_state_private(block_group_cache, chunk_offset, 3269 (unsigned long)cache); 3270 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 3271 sizeof(cache->item)); 3272 BUG_ON(ret); 3273 3274 finish_current_insert(trans, extent_root); 3275 ret = del_pending_extents(trans, extent_root); 3276 BUG_ON(ret); 3277 set_avail_alloc_bits(extent_root->fs_info, type); 3278 3279 return 0; 3280 } 3281