1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/crc32c.h> 21 #include <linux/pagemap.h> 22 #include "hash.h" 23 #include "ctree.h" 24 #include "disk-io.h" 25 #include "print-tree.h" 26 #include "transaction.h" 27 28 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK 29 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE 30 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY 31 32 static int finish_current_insert(struct btrfs_trans_handle *trans, struct 33 btrfs_root *extent_root); 34 static int del_pending_extents(struct btrfs_trans_handle *trans, struct 35 btrfs_root *extent_root); 36 static int find_previous_extent(struct btrfs_root *root, 37 struct btrfs_path *path) 38 { 39 struct btrfs_key found_key; 40 struct extent_buffer *leaf; 41 int ret; 42 43 while(1) { 44 if (path->slots[0] == 0) { 45 ret = btrfs_prev_leaf(root, path); 46 if (ret != 0) 47 return ret; 48 } else { 49 path->slots[0]--; 50 } 51 leaf = path->nodes[0]; 52 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 53 if (found_key.type == BTRFS_EXTENT_ITEM_KEY) 54 return 0; 55 } 56 return 1; 57 } 58 59 static int cache_block_group(struct btrfs_root *root, 60 struct btrfs_block_group_cache *block_group) 61 { 62 struct btrfs_path *path; 63 int ret; 64 struct btrfs_key key; 65 struct extent_buffer *leaf; 66 struct extent_io_tree *free_space_cache; 67 int slot; 68 u64 last = 0; 69 u64 hole_size; 70 u64 first_free; 71 int found = 0; 72 73 if (!block_group) 74 return 0; 75 76 root = root->fs_info->extent_root; 77 free_space_cache = &root->fs_info->free_space_cache; 78 79 if (block_group->cached) 80 return 0; 81 82 path = btrfs_alloc_path(); 83 if (!path) 84 return -ENOMEM; 85 86 path->reada = 2; 87 first_free = block_group->key.objectid; 88 key.objectid = block_group->key.objectid; 89 key.offset = 0; 90 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 91 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 92 if (ret < 0) 93 return ret; 94 ret = find_previous_extent(root, path); 95 if (ret < 0) 96 return ret; 97 if (ret == 0) { 98 leaf = path->nodes[0]; 99 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 100 if (key.objectid + key.offset > first_free) 101 first_free = key.objectid + key.offset; 102 } 103 while(1) { 104 leaf = path->nodes[0]; 105 slot = path->slots[0]; 106 if (slot >= btrfs_header_nritems(leaf)) { 107 ret = btrfs_next_leaf(root, path); 108 if (ret < 0) 109 goto err; 110 if (ret == 0) { 111 continue; 112 } else { 113 break; 114 } 115 } 116 btrfs_item_key_to_cpu(leaf, &key, slot); 117 if (key.objectid < block_group->key.objectid) { 118 goto next; 119 } 120 if (key.objectid >= block_group->key.objectid + 121 block_group->key.offset) { 122 break; 123 } 124 125 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 126 if (!found) { 127 last = first_free; 128 found = 1; 129 } 130 if (key.objectid > last) { 131 hole_size = key.objectid - last; 132 set_extent_dirty(free_space_cache, last, 133 last + hole_size - 1, 134 GFP_NOFS); 135 } 136 last = key.objectid + key.offset; 137 } 138 next: 139 path->slots[0]++; 140 } 141 142 if (!found) 143 last = first_free; 144 if (block_group->key.objectid + 145 block_group->key.offset > last) { 146 hole_size = block_group->key.objectid + 147 block_group->key.offset - last; 148 set_extent_dirty(free_space_cache, last, 149 last + hole_size - 1, GFP_NOFS); 150 } 151 block_group->cached = 1; 152 err: 153 btrfs_free_path(path); 154 return 0; 155 } 156 157 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct 158 btrfs_fs_info *info, 159 u64 bytenr) 160 { 161 struct extent_io_tree *block_group_cache; 162 struct btrfs_block_group_cache *block_group = NULL; 163 u64 ptr; 164 u64 start; 165 u64 end; 166 int ret; 167 168 block_group_cache = &info->block_group_cache; 169 ret = find_first_extent_bit(block_group_cache, 170 bytenr, &start, &end, 171 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); 172 if (ret) { 173 return NULL; 174 } 175 ret = get_state_private(block_group_cache, start, &ptr); 176 if (ret) 177 return NULL; 178 179 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; 180 if (block_group->key.objectid <= bytenr && bytenr < 181 block_group->key.objectid + block_group->key.offset) 182 return block_group; 183 return NULL; 184 } 185 static u64 noinline find_search_start(struct btrfs_root *root, 186 struct btrfs_block_group_cache **cache_ret, 187 u64 search_start, int num, int data) 188 { 189 int ret; 190 struct btrfs_block_group_cache *cache = *cache_ret; 191 u64 last; 192 u64 start = 0; 193 u64 end = 0; 194 u64 cache_miss = 0; 195 u64 total_fs_bytes; 196 int wrapped = 0; 197 198 if (!cache) { 199 goto out; 200 } 201 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 202 again: 203 ret = cache_block_group(root, cache); 204 if (ret) 205 goto out; 206 207 last = max(search_start, cache->key.objectid); 208 209 while(1) { 210 ret = find_first_extent_bit(&root->fs_info->free_space_cache, 211 last, &start, &end, EXTENT_DIRTY); 212 if (ret) { 213 if (!cache_miss) 214 cache_miss = last; 215 goto new_group; 216 } 217 218 start = max(last, start); 219 last = end + 1; 220 if (last - start < num) { 221 if (last == cache->key.objectid + cache->key.offset) 222 cache_miss = start; 223 continue; 224 } 225 if (data != BTRFS_BLOCK_GROUP_MIXED && 226 start + num > cache->key.objectid + cache->key.offset) 227 goto new_group; 228 if (start + num > total_fs_bytes) 229 goto new_group; 230 return start; 231 } 232 out: 233 cache = btrfs_lookup_block_group(root->fs_info, search_start); 234 if (!cache) { 235 printk("Unable to find block group for %Lu\n", 236 search_start); 237 WARN_ON(1); 238 return search_start; 239 } 240 return search_start; 241 242 new_group: 243 last = cache->key.objectid + cache->key.offset; 244 wrapped: 245 cache = btrfs_lookup_block_group(root->fs_info, last); 246 if (!cache || cache->key.objectid >= total_fs_bytes) { 247 no_cache: 248 if (!wrapped) { 249 wrapped = 1; 250 last = search_start; 251 data = BTRFS_BLOCK_GROUP_MIXED; 252 goto wrapped; 253 } 254 goto out; 255 } 256 if (cache_miss && !cache->cached) { 257 cache_block_group(root, cache); 258 last = cache_miss; 259 cache = btrfs_lookup_block_group(root->fs_info, last); 260 } 261 cache = btrfs_find_block_group(root, cache, last, data, 0); 262 if (!cache) 263 goto no_cache; 264 *cache_ret = cache; 265 cache_miss = 0; 266 goto again; 267 } 268 269 static u64 div_factor(u64 num, int factor) 270 { 271 if (factor == 10) 272 return num; 273 num *= factor; 274 do_div(num, 10); 275 return num; 276 } 277 278 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, 279 struct btrfs_block_group_cache 280 *hint, u64 search_start, 281 int data, int owner) 282 { 283 struct btrfs_block_group_cache *cache; 284 struct extent_io_tree *block_group_cache; 285 struct btrfs_block_group_cache *found_group = NULL; 286 struct btrfs_fs_info *info = root->fs_info; 287 u64 used; 288 u64 last = 0; 289 u64 hint_last; 290 u64 start; 291 u64 end; 292 u64 free_check; 293 u64 ptr; 294 u64 total_fs_bytes; 295 int bit; 296 int ret; 297 int full_search = 0; 298 int factor = 8; 299 int data_swap = 0; 300 301 block_group_cache = &info->block_group_cache; 302 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 303 304 if (!owner) 305 factor = 8; 306 307 if (data == BTRFS_BLOCK_GROUP_MIXED) { 308 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; 309 factor = 10; 310 } else if (data) 311 bit = BLOCK_GROUP_DATA; 312 else 313 bit = BLOCK_GROUP_METADATA; 314 315 if (search_start && search_start < total_fs_bytes) { 316 struct btrfs_block_group_cache *shint; 317 shint = btrfs_lookup_block_group(info, search_start); 318 if (shint && (shint->data == data || 319 shint->data == BTRFS_BLOCK_GROUP_MIXED)) { 320 used = btrfs_block_group_used(&shint->item); 321 if (used + shint->pinned < 322 div_factor(shint->key.offset, factor)) { 323 return shint; 324 } 325 } 326 } 327 if (hint && hint->key.objectid < total_fs_bytes && 328 (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { 329 used = btrfs_block_group_used(&hint->item); 330 if (used + hint->pinned < 331 div_factor(hint->key.offset, factor)) { 332 return hint; 333 } 334 last = hint->key.objectid + hint->key.offset; 335 hint_last = last; 336 } else { 337 if (hint) 338 hint_last = max(hint->key.objectid, search_start); 339 else 340 hint_last = search_start; 341 342 if (hint_last >= total_fs_bytes) 343 hint_last = search_start; 344 last = hint_last; 345 } 346 again: 347 while(1) { 348 ret = find_first_extent_bit(block_group_cache, last, 349 &start, &end, bit); 350 if (ret) 351 break; 352 353 ret = get_state_private(block_group_cache, start, &ptr); 354 if (ret) 355 break; 356 357 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; 358 last = cache->key.objectid + cache->key.offset; 359 used = btrfs_block_group_used(&cache->item); 360 361 if (cache->key.objectid > total_fs_bytes) 362 break; 363 364 if (full_search) 365 free_check = cache->key.offset; 366 else 367 free_check = div_factor(cache->key.offset, factor); 368 if (used + cache->pinned < free_check) { 369 found_group = cache; 370 goto found; 371 } 372 cond_resched(); 373 } 374 if (!full_search) { 375 last = search_start; 376 full_search = 1; 377 goto again; 378 } 379 if (!data_swap) { 380 data_swap = 1; 381 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; 382 last = search_start; 383 goto again; 384 } 385 found: 386 return found_group; 387 } 388 389 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, 390 u64 owner, u64 owner_offset) 391 { 392 u32 high_crc = ~(u32)0; 393 u32 low_crc = ~(u32)0; 394 __le64 lenum; 395 396 lenum = cpu_to_le64(root_objectid); 397 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 398 lenum = cpu_to_le64(ref_generation); 399 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 400 401 #if 0 402 lenum = cpu_to_le64(owner); 403 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 404 lenum = cpu_to_le64(owner_offset); 405 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 406 #endif 407 return ((u64)high_crc << 32) | (u64)low_crc; 408 } 409 410 static int match_extent_ref(struct extent_buffer *leaf, 411 struct btrfs_extent_ref *disk_ref, 412 struct btrfs_extent_ref *cpu_ref) 413 { 414 int ret; 415 int len; 416 417 if (cpu_ref->objectid) 418 len = sizeof(*cpu_ref); 419 else 420 len = 2 * sizeof(u64); 421 ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref, 422 len); 423 return ret == 0; 424 } 425 426 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, 427 struct btrfs_root *root, 428 struct btrfs_path *path, u64 bytenr, 429 u64 root_objectid, 430 u64 ref_generation, u64 owner, 431 u64 owner_offset, int del) 432 { 433 u64 hash; 434 struct btrfs_key key; 435 struct btrfs_key found_key; 436 struct btrfs_extent_ref ref; 437 struct extent_buffer *leaf; 438 struct btrfs_extent_ref *disk_ref; 439 int ret; 440 int ret2; 441 442 btrfs_set_stack_ref_root(&ref, root_objectid); 443 btrfs_set_stack_ref_generation(&ref, ref_generation); 444 btrfs_set_stack_ref_objectid(&ref, owner); 445 btrfs_set_stack_ref_offset(&ref, owner_offset); 446 447 hash = hash_extent_ref(root_objectid, ref_generation, owner, 448 owner_offset); 449 key.offset = hash; 450 key.objectid = bytenr; 451 key.type = BTRFS_EXTENT_REF_KEY; 452 453 while (1) { 454 ret = btrfs_search_slot(trans, root, &key, path, 455 del ? -1 : 0, del); 456 if (ret < 0) 457 goto out; 458 leaf = path->nodes[0]; 459 if (ret != 0) { 460 u32 nritems = btrfs_header_nritems(leaf); 461 if (path->slots[0] >= nritems) { 462 ret2 = btrfs_next_leaf(root, path); 463 if (ret2) 464 goto out; 465 leaf = path->nodes[0]; 466 } 467 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 468 if (found_key.objectid != bytenr || 469 found_key.type != BTRFS_EXTENT_REF_KEY) 470 goto out; 471 key.offset = found_key.offset; 472 if (del) { 473 btrfs_release_path(root, path); 474 continue; 475 } 476 } 477 disk_ref = btrfs_item_ptr(path->nodes[0], 478 path->slots[0], 479 struct btrfs_extent_ref); 480 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) { 481 ret = 0; 482 goto out; 483 } 484 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 485 key.offset = found_key.offset + 1; 486 btrfs_release_path(root, path); 487 } 488 out: 489 return ret; 490 } 491 492 /* 493 * Back reference rules. Back refs have three main goals: 494 * 495 * 1) differentiate between all holders of references to an extent so that 496 * when a reference is dropped we can make sure it was a valid reference 497 * before freeing the extent. 498 * 499 * 2) Provide enough information to quickly find the holders of an extent 500 * if we notice a given block is corrupted or bad. 501 * 502 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 503 * maintenance. This is actually the same as #2, but with a slightly 504 * different use case. 505 * 506 * File extents can be referenced by: 507 * 508 * - multiple snapshots, subvolumes, or different generations in one subvol 509 * - different files inside a single subvolume (in theory, not implemented yet) 510 * - different offsets inside a file (bookend extents in file.c) 511 * 512 * The extent ref structure has fields for: 513 * 514 * - Objectid of the subvolume root 515 * - Generation number of the tree holding the reference 516 * - objectid of the file holding the reference 517 * - offset in the file corresponding to the key holding the reference 518 * 519 * When a file extent is allocated the fields are filled in: 520 * (root_key.objectid, trans->transid, inode objectid, offset in file) 521 * 522 * When a leaf is cow'd new references are added for every file extent found 523 * in the leaf. It looks the same as the create case, but trans->transid 524 * will be different when the block is cow'd. 525 * 526 * (root_key.objectid, trans->transid, inode objectid, offset in file) 527 * 528 * When a file extent is removed either during snapshot deletion or file 529 * truncation, the corresponding back reference is found 530 * by searching for: 531 * 532 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 533 * inode objectid, offset in file) 534 * 535 * Btree extents can be referenced by: 536 * 537 * - Different subvolumes 538 * - Different generations of the same subvolume 539 * 540 * Storing sufficient information for a full reverse mapping of a btree 541 * block would require storing the lowest key of the block in the backref, 542 * and it would require updating that lowest key either before write out or 543 * every time it changed. Instead, the objectid of the lowest key is stored 544 * along with the level of the tree block. This provides a hint 545 * about where in the btree the block can be found. Searches through the 546 * btree only need to look for a pointer to that block, so they stop one 547 * level higher than the level recorded in the backref. 548 * 549 * Some btrees do not do reference counting on their extents. These 550 * include the extent tree and the tree of tree roots. Backrefs for these 551 * trees always have a generation of zero. 552 * 553 * When a tree block is created, back references are inserted: 554 * 555 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid) 556 * 557 * When a tree block is cow'd in a reference counted root, 558 * new back references are added for all the blocks it points to. 559 * These are of the form (trans->transid will have increased since creation): 560 * 561 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid) 562 * 563 * Because the lowest_key_objectid and the level are just hints 564 * they are not used when backrefs are deleted. When a backref is deleted: 565 * 566 * if backref was for a tree root: 567 * root_objectid = root->root_key.objectid 568 * else 569 * root_objectid = btrfs_header_owner(parent) 570 * 571 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) 572 * 573 * Back Reference Key hashing: 574 * 575 * Back references have four fields, each 64 bits long. Unfortunately, 576 * This is hashed into a single 64 bit number and placed into the key offset. 577 * The key objectid corresponds to the first byte in the extent, and the 578 * key type is set to BTRFS_EXTENT_REF_KEY 579 */ 580 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, 581 struct btrfs_root *root, 582 struct btrfs_path *path, u64 bytenr, 583 u64 root_objectid, u64 ref_generation, 584 u64 owner, u64 owner_offset) 585 { 586 u64 hash; 587 struct btrfs_key key; 588 struct btrfs_extent_ref ref; 589 struct btrfs_extent_ref *disk_ref; 590 int ret; 591 592 btrfs_set_stack_ref_root(&ref, root_objectid); 593 btrfs_set_stack_ref_generation(&ref, ref_generation); 594 btrfs_set_stack_ref_objectid(&ref, owner); 595 btrfs_set_stack_ref_offset(&ref, owner_offset); 596 597 hash = hash_extent_ref(root_objectid, ref_generation, owner, 598 owner_offset); 599 key.offset = hash; 600 key.objectid = bytenr; 601 key.type = BTRFS_EXTENT_REF_KEY; 602 603 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); 604 while (ret == -EEXIST) { 605 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 606 struct btrfs_extent_ref); 607 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) 608 goto out; 609 key.offset++; 610 btrfs_release_path(root, path); 611 ret = btrfs_insert_empty_item(trans, root, path, &key, 612 sizeof(ref)); 613 } 614 if (ret) 615 goto out; 616 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 617 struct btrfs_extent_ref); 618 write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref, 619 sizeof(ref)); 620 btrfs_mark_buffer_dirty(path->nodes[0]); 621 out: 622 btrfs_release_path(root, path); 623 return ret; 624 } 625 626 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 627 struct btrfs_root *root, 628 u64 bytenr, u64 num_bytes, 629 u64 root_objectid, u64 ref_generation, 630 u64 owner, u64 owner_offset) 631 { 632 struct btrfs_path *path; 633 int ret; 634 struct btrfs_key key; 635 struct extent_buffer *l; 636 struct btrfs_extent_item *item; 637 u32 refs; 638 639 WARN_ON(num_bytes < root->sectorsize); 640 path = btrfs_alloc_path(); 641 if (!path) 642 return -ENOMEM; 643 644 path->reada = 0; 645 key.objectid = bytenr; 646 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 647 key.offset = num_bytes; 648 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 649 0, 1); 650 if (ret < 0) 651 return ret; 652 if (ret != 0) { 653 BUG(); 654 } 655 BUG_ON(ret != 0); 656 l = path->nodes[0]; 657 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 658 refs = btrfs_extent_refs(l, item); 659 btrfs_set_extent_refs(l, item, refs + 1); 660 btrfs_mark_buffer_dirty(path->nodes[0]); 661 662 btrfs_release_path(root->fs_info->extent_root, path); 663 664 path->reada = 0; 665 ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, 666 path, bytenr, root_objectid, 667 ref_generation, owner, owner_offset); 668 BUG_ON(ret); 669 finish_current_insert(trans, root->fs_info->extent_root); 670 del_pending_extents(trans, root->fs_info->extent_root); 671 672 btrfs_free_path(path); 673 return 0; 674 } 675 676 int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 677 struct btrfs_root *root) 678 { 679 finish_current_insert(trans, root->fs_info->extent_root); 680 del_pending_extents(trans, root->fs_info->extent_root); 681 return 0; 682 } 683 684 static int lookup_extent_ref(struct btrfs_trans_handle *trans, 685 struct btrfs_root *root, u64 bytenr, 686 u64 num_bytes, u32 *refs) 687 { 688 struct btrfs_path *path; 689 int ret; 690 struct btrfs_key key; 691 struct extent_buffer *l; 692 struct btrfs_extent_item *item; 693 694 WARN_ON(num_bytes < root->sectorsize); 695 path = btrfs_alloc_path(); 696 path->reada = 0; 697 key.objectid = bytenr; 698 key.offset = num_bytes; 699 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 700 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 701 0, 0); 702 if (ret < 0) 703 goto out; 704 if (ret != 0) { 705 btrfs_print_leaf(root, path->nodes[0]); 706 printk("failed to find block number %Lu\n", bytenr); 707 BUG(); 708 } 709 l = path->nodes[0]; 710 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 711 *refs = btrfs_extent_refs(l, item); 712 out: 713 btrfs_free_path(path); 714 return 0; 715 } 716 717 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, 718 struct btrfs_path *count_path, 719 u64 first_extent) 720 { 721 struct btrfs_root *extent_root = root->fs_info->extent_root; 722 struct btrfs_path *path; 723 u64 bytenr; 724 u64 found_objectid; 725 u64 root_objectid = root->root_key.objectid; 726 u32 total_count = 0; 727 u32 cur_count; 728 u32 nritems; 729 int ret; 730 struct btrfs_key key; 731 struct btrfs_key found_key; 732 struct extent_buffer *l; 733 struct btrfs_extent_item *item; 734 struct btrfs_extent_ref *ref_item; 735 int level = -1; 736 737 path = btrfs_alloc_path(); 738 again: 739 if (level == -1) 740 bytenr = first_extent; 741 else 742 bytenr = count_path->nodes[level]->start; 743 744 cur_count = 0; 745 key.objectid = bytenr; 746 key.offset = 0; 747 748 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 749 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 750 if (ret < 0) 751 goto out; 752 BUG_ON(ret == 0); 753 754 l = path->nodes[0]; 755 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); 756 757 if (found_key.objectid != bytenr || 758 found_key.type != BTRFS_EXTENT_ITEM_KEY) { 759 goto out; 760 } 761 762 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 763 while (1) { 764 l = path->nodes[0]; 765 nritems = btrfs_header_nritems(l); 766 if (path->slots[0] >= nritems) { 767 ret = btrfs_next_leaf(extent_root, path); 768 if (ret == 0) 769 continue; 770 break; 771 } 772 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); 773 if (found_key.objectid != bytenr) 774 break; 775 776 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 777 path->slots[0]++; 778 continue; 779 } 780 781 cur_count++; 782 ref_item = btrfs_item_ptr(l, path->slots[0], 783 struct btrfs_extent_ref); 784 found_objectid = btrfs_ref_root(l, ref_item); 785 786 if (found_objectid != root_objectid) { 787 total_count = 2; 788 goto out; 789 } 790 total_count = 1; 791 path->slots[0]++; 792 } 793 if (cur_count == 0) { 794 total_count = 0; 795 goto out; 796 } 797 if (level >= 0 && root->node == count_path->nodes[level]) 798 goto out; 799 level++; 800 btrfs_release_path(root, path); 801 goto again; 802 803 out: 804 btrfs_free_path(path); 805 return total_count; 806 } 807 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, 808 struct btrfs_root *root, u64 owner_objectid) 809 { 810 u64 generation; 811 u64 key_objectid; 812 u64 level; 813 u32 nritems; 814 struct btrfs_disk_key disk_key; 815 816 level = btrfs_header_level(root->node); 817 generation = trans->transid; 818 nritems = btrfs_header_nritems(root->node); 819 if (nritems > 0) { 820 if (level == 0) 821 btrfs_item_key(root->node, &disk_key, 0); 822 else 823 btrfs_node_key(root->node, &disk_key, 0); 824 key_objectid = btrfs_disk_key_objectid(&disk_key); 825 } else { 826 key_objectid = 0; 827 } 828 return btrfs_inc_extent_ref(trans, root, root->node->start, 829 root->node->len, owner_objectid, 830 generation, level, key_objectid); 831 } 832 833 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 834 struct extent_buffer *buf) 835 { 836 u64 bytenr; 837 u32 nritems; 838 struct btrfs_key key; 839 struct btrfs_file_extent_item *fi; 840 int i; 841 int level; 842 int ret; 843 int faili; 844 845 if (!root->ref_cows) 846 return 0; 847 848 level = btrfs_header_level(buf); 849 nritems = btrfs_header_nritems(buf); 850 for (i = 0; i < nritems; i++) { 851 if (level == 0) { 852 u64 disk_bytenr; 853 btrfs_item_key_to_cpu(buf, &key, i); 854 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 855 continue; 856 fi = btrfs_item_ptr(buf, i, 857 struct btrfs_file_extent_item); 858 if (btrfs_file_extent_type(buf, fi) == 859 BTRFS_FILE_EXTENT_INLINE) 860 continue; 861 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 862 if (disk_bytenr == 0) 863 continue; 864 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, 865 btrfs_file_extent_disk_num_bytes(buf, fi), 866 root->root_key.objectid, trans->transid, 867 key.objectid, key.offset); 868 if (ret) { 869 faili = i; 870 goto fail; 871 } 872 } else { 873 bytenr = btrfs_node_blockptr(buf, i); 874 btrfs_node_key_to_cpu(buf, &key, i); 875 ret = btrfs_inc_extent_ref(trans, root, bytenr, 876 btrfs_level_size(root, level - 1), 877 root->root_key.objectid, 878 trans->transid, 879 level - 1, key.objectid); 880 if (ret) { 881 faili = i; 882 goto fail; 883 } 884 } 885 } 886 return 0; 887 fail: 888 WARN_ON(1); 889 #if 0 890 for (i =0; i < faili; i++) { 891 if (level == 0) { 892 u64 disk_bytenr; 893 btrfs_item_key_to_cpu(buf, &key, i); 894 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 895 continue; 896 fi = btrfs_item_ptr(buf, i, 897 struct btrfs_file_extent_item); 898 if (btrfs_file_extent_type(buf, fi) == 899 BTRFS_FILE_EXTENT_INLINE) 900 continue; 901 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 902 if (disk_bytenr == 0) 903 continue; 904 err = btrfs_free_extent(trans, root, disk_bytenr, 905 btrfs_file_extent_disk_num_bytes(buf, 906 fi), 0); 907 BUG_ON(err); 908 } else { 909 bytenr = btrfs_node_blockptr(buf, i); 910 err = btrfs_free_extent(trans, root, bytenr, 911 btrfs_level_size(root, level - 1), 0); 912 BUG_ON(err); 913 } 914 } 915 #endif 916 return ret; 917 } 918 919 static int write_one_cache_group(struct btrfs_trans_handle *trans, 920 struct btrfs_root *root, 921 struct btrfs_path *path, 922 struct btrfs_block_group_cache *cache) 923 { 924 int ret; 925 int pending_ret; 926 struct btrfs_root *extent_root = root->fs_info->extent_root; 927 unsigned long bi; 928 struct extent_buffer *leaf; 929 930 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 931 if (ret < 0) 932 goto fail; 933 BUG_ON(ret); 934 935 leaf = path->nodes[0]; 936 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 937 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 938 btrfs_mark_buffer_dirty(leaf); 939 btrfs_release_path(extent_root, path); 940 fail: 941 finish_current_insert(trans, extent_root); 942 pending_ret = del_pending_extents(trans, extent_root); 943 if (ret) 944 return ret; 945 if (pending_ret) 946 return pending_ret; 947 return 0; 948 949 } 950 951 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 952 struct btrfs_root *root) 953 { 954 struct extent_io_tree *block_group_cache; 955 struct btrfs_block_group_cache *cache; 956 int ret; 957 int err = 0; 958 int werr = 0; 959 struct btrfs_path *path; 960 u64 last = 0; 961 u64 start; 962 u64 end; 963 u64 ptr; 964 965 block_group_cache = &root->fs_info->block_group_cache; 966 path = btrfs_alloc_path(); 967 if (!path) 968 return -ENOMEM; 969 970 while(1) { 971 ret = find_first_extent_bit(block_group_cache, last, 972 &start, &end, BLOCK_GROUP_DIRTY); 973 if (ret) 974 break; 975 976 last = end + 1; 977 ret = get_state_private(block_group_cache, start, &ptr); 978 if (ret) 979 break; 980 981 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; 982 err = write_one_cache_group(trans, root, 983 path, cache); 984 /* 985 * if we fail to write the cache group, we want 986 * to keep it marked dirty in hopes that a later 987 * write will work 988 */ 989 if (err) { 990 werr = err; 991 continue; 992 } 993 clear_extent_bits(block_group_cache, start, end, 994 BLOCK_GROUP_DIRTY, GFP_NOFS); 995 } 996 btrfs_free_path(path); 997 return werr; 998 } 999 1000 static int update_block_group(struct btrfs_trans_handle *trans, 1001 struct btrfs_root *root, 1002 u64 bytenr, u64 num_bytes, int alloc, 1003 int mark_free, int data) 1004 { 1005 struct btrfs_block_group_cache *cache; 1006 struct btrfs_fs_info *info = root->fs_info; 1007 u64 total = num_bytes; 1008 u64 old_val; 1009 u64 byte_in_group; 1010 u64 start; 1011 u64 end; 1012 1013 while(total) { 1014 cache = btrfs_lookup_block_group(info, bytenr); 1015 if (!cache) { 1016 return -1; 1017 } 1018 byte_in_group = bytenr - cache->key.objectid; 1019 WARN_ON(byte_in_group > cache->key.offset); 1020 start = cache->key.objectid; 1021 end = start + cache->key.offset - 1; 1022 set_extent_bits(&info->block_group_cache, start, end, 1023 BLOCK_GROUP_DIRTY, GFP_NOFS); 1024 1025 old_val = btrfs_block_group_used(&cache->item); 1026 num_bytes = min(total, cache->key.offset - byte_in_group); 1027 if (alloc) { 1028 if (cache->data != data && 1029 old_val < (cache->key.offset >> 1)) { 1030 int bit_to_clear; 1031 int bit_to_set; 1032 cache->data = data; 1033 if (data) { 1034 bit_to_clear = BLOCK_GROUP_METADATA; 1035 bit_to_set = BLOCK_GROUP_DATA; 1036 cache->item.flags &= 1037 ~BTRFS_BLOCK_GROUP_MIXED; 1038 cache->item.flags |= 1039 BTRFS_BLOCK_GROUP_DATA; 1040 } else { 1041 bit_to_clear = BLOCK_GROUP_DATA; 1042 bit_to_set = BLOCK_GROUP_METADATA; 1043 cache->item.flags &= 1044 ~BTRFS_BLOCK_GROUP_MIXED; 1045 cache->item.flags &= 1046 ~BTRFS_BLOCK_GROUP_DATA; 1047 } 1048 clear_extent_bits(&info->block_group_cache, 1049 start, end, bit_to_clear, 1050 GFP_NOFS); 1051 set_extent_bits(&info->block_group_cache, 1052 start, end, bit_to_set, 1053 GFP_NOFS); 1054 } else if (cache->data != data && 1055 cache->data != BTRFS_BLOCK_GROUP_MIXED) { 1056 cache->data = BTRFS_BLOCK_GROUP_MIXED; 1057 set_extent_bits(&info->block_group_cache, 1058 start, end, 1059 BLOCK_GROUP_DATA | 1060 BLOCK_GROUP_METADATA, 1061 GFP_NOFS); 1062 } 1063 old_val += num_bytes; 1064 } else { 1065 old_val -= num_bytes; 1066 if (mark_free) { 1067 set_extent_dirty(&info->free_space_cache, 1068 bytenr, bytenr + num_bytes - 1, 1069 GFP_NOFS); 1070 } 1071 } 1072 btrfs_set_block_group_used(&cache->item, old_val); 1073 total -= num_bytes; 1074 bytenr += num_bytes; 1075 } 1076 return 0; 1077 } 1078 static int update_pinned_extents(struct btrfs_root *root, 1079 u64 bytenr, u64 num, int pin) 1080 { 1081 u64 len; 1082 struct btrfs_block_group_cache *cache; 1083 struct btrfs_fs_info *fs_info = root->fs_info; 1084 1085 if (pin) { 1086 set_extent_dirty(&fs_info->pinned_extents, 1087 bytenr, bytenr + num - 1, GFP_NOFS); 1088 } else { 1089 clear_extent_dirty(&fs_info->pinned_extents, 1090 bytenr, bytenr + num - 1, GFP_NOFS); 1091 } 1092 while (num > 0) { 1093 cache = btrfs_lookup_block_group(fs_info, bytenr); 1094 WARN_ON(!cache); 1095 len = min(num, cache->key.offset - 1096 (bytenr - cache->key.objectid)); 1097 if (pin) { 1098 cache->pinned += len; 1099 fs_info->total_pinned += len; 1100 } else { 1101 cache->pinned -= len; 1102 fs_info->total_pinned -= len; 1103 } 1104 bytenr += len; 1105 num -= len; 1106 } 1107 return 0; 1108 } 1109 1110 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 1111 { 1112 u64 last = 0; 1113 u64 start; 1114 u64 end; 1115 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 1116 int ret; 1117 1118 while(1) { 1119 ret = find_first_extent_bit(pinned_extents, last, 1120 &start, &end, EXTENT_DIRTY); 1121 if (ret) 1122 break; 1123 set_extent_dirty(copy, start, end, GFP_NOFS); 1124 last = end + 1; 1125 } 1126 return 0; 1127 } 1128 1129 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 1130 struct btrfs_root *root, 1131 struct extent_io_tree *unpin) 1132 { 1133 u64 start; 1134 u64 end; 1135 int ret; 1136 struct extent_io_tree *free_space_cache; 1137 free_space_cache = &root->fs_info->free_space_cache; 1138 1139 while(1) { 1140 ret = find_first_extent_bit(unpin, 0, &start, &end, 1141 EXTENT_DIRTY); 1142 if (ret) 1143 break; 1144 update_pinned_extents(root, start, end + 1 - start, 0); 1145 clear_extent_dirty(unpin, start, end, GFP_NOFS); 1146 set_extent_dirty(free_space_cache, start, end, GFP_NOFS); 1147 } 1148 return 0; 1149 } 1150 1151 static int finish_current_insert(struct btrfs_trans_handle *trans, 1152 struct btrfs_root *extent_root) 1153 { 1154 u64 start; 1155 u64 end; 1156 struct btrfs_fs_info *info = extent_root->fs_info; 1157 struct extent_buffer *eb; 1158 struct btrfs_path *path; 1159 struct btrfs_key ins; 1160 struct btrfs_disk_key first; 1161 struct btrfs_extent_item extent_item; 1162 int ret; 1163 int level; 1164 int err = 0; 1165 1166 btrfs_set_stack_extent_refs(&extent_item, 1); 1167 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); 1168 path = btrfs_alloc_path(); 1169 1170 while(1) { 1171 ret = find_first_extent_bit(&info->extent_ins, 0, &start, 1172 &end, EXTENT_LOCKED); 1173 if (ret) 1174 break; 1175 1176 ins.objectid = start; 1177 ins.offset = end + 1 - start; 1178 err = btrfs_insert_item(trans, extent_root, &ins, 1179 &extent_item, sizeof(extent_item)); 1180 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, 1181 GFP_NOFS); 1182 eb = read_tree_block(extent_root, ins.objectid, ins.offset); 1183 level = btrfs_header_level(eb); 1184 if (level == 0) { 1185 btrfs_item_key(eb, &first, 0); 1186 } else { 1187 btrfs_node_key(eb, &first, 0); 1188 } 1189 err = btrfs_insert_extent_backref(trans, extent_root, path, 1190 start, extent_root->root_key.objectid, 1191 0, level, 1192 btrfs_disk_key_objectid(&first)); 1193 BUG_ON(err); 1194 free_extent_buffer(eb); 1195 } 1196 btrfs_free_path(path); 1197 return 0; 1198 } 1199 1200 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, 1201 int pending) 1202 { 1203 int err = 0; 1204 struct extent_buffer *buf; 1205 1206 if (!pending) { 1207 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 1208 if (buf) { 1209 if (btrfs_buffer_uptodate(buf)) { 1210 u64 transid = 1211 root->fs_info->running_transaction->transid; 1212 u64 header_transid = 1213 btrfs_header_generation(buf); 1214 if (header_transid == transid) { 1215 clean_tree_block(NULL, root, buf); 1216 free_extent_buffer(buf); 1217 return 1; 1218 } 1219 } 1220 free_extent_buffer(buf); 1221 } 1222 update_pinned_extents(root, bytenr, num_bytes, 1); 1223 } else { 1224 set_extent_bits(&root->fs_info->pending_del, 1225 bytenr, bytenr + num_bytes - 1, 1226 EXTENT_LOCKED, GFP_NOFS); 1227 } 1228 BUG_ON(err < 0); 1229 return 0; 1230 } 1231 1232 /* 1233 * remove an extent from the root, returns 0 on success 1234 */ 1235 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1236 *root, u64 bytenr, u64 num_bytes, 1237 u64 root_objectid, u64 ref_generation, 1238 u64 owner_objectid, u64 owner_offset, int pin, 1239 int mark_free) 1240 { 1241 struct btrfs_path *path; 1242 struct btrfs_key key; 1243 struct btrfs_fs_info *info = root->fs_info; 1244 struct btrfs_root *extent_root = info->extent_root; 1245 struct extent_buffer *leaf; 1246 int ret; 1247 struct btrfs_extent_item *ei; 1248 u32 refs; 1249 1250 key.objectid = bytenr; 1251 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 1252 key.offset = num_bytes; 1253 path = btrfs_alloc_path(); 1254 if (!path) 1255 return -ENOMEM; 1256 1257 path->reada = 0; 1258 ret = lookup_extent_backref(trans, extent_root, path, 1259 bytenr, root_objectid, 1260 ref_generation, 1261 owner_objectid, owner_offset, 1); 1262 if (ret == 0) { 1263 ret = btrfs_del_item(trans, extent_root, path); 1264 } else { 1265 btrfs_print_leaf(extent_root, path->nodes[0]); 1266 WARN_ON(1); 1267 printk("Unable to find ref byte nr %Lu root %Lu " 1268 " gen %Lu owner %Lu offset %Lu\n", bytenr, 1269 root_objectid, ref_generation, owner_objectid, 1270 owner_offset); 1271 } 1272 btrfs_release_path(extent_root, path); 1273 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); 1274 if (ret < 0) 1275 return ret; 1276 BUG_ON(ret); 1277 1278 leaf = path->nodes[0]; 1279 ei = btrfs_item_ptr(leaf, path->slots[0], 1280 struct btrfs_extent_item); 1281 refs = btrfs_extent_refs(leaf, ei); 1282 BUG_ON(refs == 0); 1283 refs -= 1; 1284 btrfs_set_extent_refs(leaf, ei, refs); 1285 btrfs_mark_buffer_dirty(leaf); 1286 1287 if (refs == 0) { 1288 u64 super_used; 1289 u64 root_used; 1290 1291 if (pin) { 1292 ret = pin_down_bytes(root, bytenr, num_bytes, 0); 1293 if (ret > 0) 1294 mark_free = 1; 1295 BUG_ON(ret < 0); 1296 } 1297 1298 /* block accounting for super block */ 1299 super_used = btrfs_super_bytes_used(&info->super_copy); 1300 btrfs_set_super_bytes_used(&info->super_copy, 1301 super_used - num_bytes); 1302 1303 /* block accounting for root item */ 1304 root_used = btrfs_root_used(&root->root_item); 1305 btrfs_set_root_used(&root->root_item, 1306 root_used - num_bytes); 1307 1308 ret = btrfs_del_item(trans, extent_root, path); 1309 if (ret) { 1310 return ret; 1311 } 1312 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 1313 mark_free, 0); 1314 BUG_ON(ret); 1315 } 1316 btrfs_free_path(path); 1317 finish_current_insert(trans, extent_root); 1318 return ret; 1319 } 1320 1321 /* 1322 * find all the blocks marked as pending in the radix tree and remove 1323 * them from the extent map 1324 */ 1325 static int del_pending_extents(struct btrfs_trans_handle *trans, struct 1326 btrfs_root *extent_root) 1327 { 1328 int ret; 1329 int err = 0; 1330 u64 start; 1331 u64 end; 1332 struct extent_io_tree *pending_del; 1333 struct extent_io_tree *pinned_extents; 1334 1335 pending_del = &extent_root->fs_info->pending_del; 1336 pinned_extents = &extent_root->fs_info->pinned_extents; 1337 1338 while(1) { 1339 ret = find_first_extent_bit(pending_del, 0, &start, &end, 1340 EXTENT_LOCKED); 1341 if (ret) 1342 break; 1343 update_pinned_extents(extent_root, start, end + 1 - start, 1); 1344 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, 1345 GFP_NOFS); 1346 ret = __free_extent(trans, extent_root, 1347 start, end + 1 - start, 1348 extent_root->root_key.objectid, 1349 0, 0, 0, 0, 0); 1350 if (ret) 1351 err = ret; 1352 } 1353 return err; 1354 } 1355 1356 /* 1357 * remove an extent from the root, returns 0 on success 1358 */ 1359 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1360 *root, u64 bytenr, u64 num_bytes, 1361 u64 root_objectid, u64 ref_generation, 1362 u64 owner_objectid, u64 owner_offset, int pin) 1363 { 1364 struct btrfs_root *extent_root = root->fs_info->extent_root; 1365 int pending_ret; 1366 int ret; 1367 1368 WARN_ON(num_bytes < root->sectorsize); 1369 if (!root->ref_cows) 1370 ref_generation = 0; 1371 1372 if (root == extent_root) { 1373 pin_down_bytes(root, bytenr, num_bytes, 1); 1374 return 0; 1375 } 1376 ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, 1377 ref_generation, owner_objectid, owner_offset, 1378 pin, pin == 0); 1379 pending_ret = del_pending_extents(trans, root->fs_info->extent_root); 1380 return ret ? ret : pending_ret; 1381 } 1382 1383 static u64 stripe_align(struct btrfs_root *root, u64 val) 1384 { 1385 u64 mask = ((u64)root->stripesize - 1); 1386 u64 ret = (val + mask) & ~mask; 1387 return ret; 1388 } 1389 1390 /* 1391 * walks the btree of allocated extents and find a hole of a given size. 1392 * The key ins is changed to record the hole: 1393 * ins->objectid == block start 1394 * ins->flags = BTRFS_EXTENT_ITEM_KEY 1395 * ins->offset == number of blocks 1396 * Any available blocks before search_start are skipped. 1397 */ 1398 static int noinline find_free_extent(struct btrfs_trans_handle *trans, 1399 struct btrfs_root *orig_root, 1400 u64 num_bytes, u64 empty_size, 1401 u64 search_start, u64 search_end, 1402 u64 hint_byte, struct btrfs_key *ins, 1403 u64 exclude_start, u64 exclude_nr, 1404 int data) 1405 { 1406 struct btrfs_path *path; 1407 struct btrfs_key key; 1408 u64 hole_size = 0; 1409 u64 aligned; 1410 int ret; 1411 int slot = 0; 1412 u64 last_byte = 0; 1413 u64 orig_search_start = search_start; 1414 int start_found; 1415 struct extent_buffer *l; 1416 struct btrfs_root * root = orig_root->fs_info->extent_root; 1417 struct btrfs_fs_info *info = root->fs_info; 1418 u64 total_needed = num_bytes; 1419 int level; 1420 struct btrfs_block_group_cache *block_group; 1421 int full_scan = 0; 1422 int wrapped = 0; 1423 u64 cached_start; 1424 1425 WARN_ON(num_bytes < root->sectorsize); 1426 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 1427 1428 level = btrfs_header_level(root->node); 1429 1430 if (num_bytes >= 32 * 1024 * 1024 && hint_byte) { 1431 data = BTRFS_BLOCK_GROUP_MIXED; 1432 } 1433 1434 /* for SSD, cluster allocations together as much as possible */ 1435 if (btrfs_test_opt(root, SSD)) { 1436 if (!data) { 1437 if (root->fs_info->last_alloc) 1438 hint_byte = root->fs_info->last_alloc; 1439 else { 1440 hint_byte = hint_byte & 1441 ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); 1442 empty_size += 16 * 1024 * 1024; 1443 } 1444 } 1445 } 1446 1447 search_end = min(search_end, 1448 btrfs_super_total_bytes(&info->super_copy)); 1449 if (hint_byte) { 1450 block_group = btrfs_lookup_block_group(info, hint_byte); 1451 if (!block_group) 1452 hint_byte = search_start; 1453 block_group = btrfs_find_block_group(root, block_group, 1454 hint_byte, data, 1); 1455 } else { 1456 block_group = btrfs_find_block_group(root, 1457 trans->block_group, 1458 search_start, data, 1); 1459 } 1460 1461 total_needed += empty_size; 1462 path = btrfs_alloc_path(); 1463 check_failed: 1464 if (!block_group) { 1465 block_group = btrfs_lookup_block_group(info, search_start); 1466 if (!block_group) 1467 block_group = btrfs_lookup_block_group(info, 1468 orig_search_start); 1469 } 1470 search_start = find_search_start(root, &block_group, search_start, 1471 total_needed, data); 1472 1473 if (!data && btrfs_test_opt(root, SSD) && info->last_alloc && 1474 search_start != info->last_alloc) { 1475 info->last_alloc = 0; 1476 if (!empty_size) { 1477 empty_size += 16 * 1024 * 1024; 1478 total_needed += empty_size; 1479 } 1480 search_start = find_search_start(root, &block_group, 1481 search_start, total_needed, 1482 data); 1483 } 1484 1485 search_start = stripe_align(root, search_start); 1486 cached_start = search_start; 1487 btrfs_init_path(path); 1488 ins->objectid = search_start; 1489 ins->offset = 0; 1490 start_found = 0; 1491 path->reada = 2; 1492 1493 ret = btrfs_search_slot(trans, root, ins, path, 0, 0); 1494 if (ret < 0) 1495 goto error; 1496 ret = find_previous_extent(root, path); 1497 if (ret < 0) 1498 goto error; 1499 l = path->nodes[0]; 1500 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 1501 while (1) { 1502 l = path->nodes[0]; 1503 slot = path->slots[0]; 1504 if (slot >= btrfs_header_nritems(l)) { 1505 ret = btrfs_next_leaf(root, path); 1506 if (ret == 0) 1507 continue; 1508 if (ret < 0) 1509 goto error; 1510 1511 search_start = max(search_start, 1512 block_group->key.objectid); 1513 if (!start_found) { 1514 aligned = stripe_align(root, search_start); 1515 ins->objectid = aligned; 1516 if (aligned >= search_end) { 1517 ret = -ENOSPC; 1518 goto error; 1519 } 1520 ins->offset = search_end - aligned; 1521 start_found = 1; 1522 goto check_pending; 1523 } 1524 ins->objectid = stripe_align(root, 1525 last_byte > search_start ? 1526 last_byte : search_start); 1527 if (search_end <= ins->objectid) { 1528 ret = -ENOSPC; 1529 goto error; 1530 } 1531 ins->offset = search_end - ins->objectid; 1532 BUG_ON(ins->objectid >= search_end); 1533 goto check_pending; 1534 } 1535 btrfs_item_key_to_cpu(l, &key, slot); 1536 1537 if (key.objectid >= search_start && key.objectid > last_byte && 1538 start_found) { 1539 if (last_byte < search_start) 1540 last_byte = search_start; 1541 aligned = stripe_align(root, last_byte); 1542 hole_size = key.objectid - aligned; 1543 if (key.objectid > aligned && hole_size >= num_bytes) { 1544 ins->objectid = aligned; 1545 ins->offset = hole_size; 1546 goto check_pending; 1547 } 1548 } 1549 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { 1550 if (!start_found && btrfs_key_type(&key) == 1551 BTRFS_BLOCK_GROUP_ITEM_KEY) { 1552 last_byte = key.objectid; 1553 start_found = 1; 1554 } 1555 goto next; 1556 } 1557 1558 1559 start_found = 1; 1560 last_byte = key.objectid + key.offset; 1561 1562 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && 1563 last_byte >= block_group->key.objectid + 1564 block_group->key.offset) { 1565 btrfs_release_path(root, path); 1566 search_start = block_group->key.objectid + 1567 block_group->key.offset; 1568 goto new_group; 1569 } 1570 next: 1571 path->slots[0]++; 1572 cond_resched(); 1573 } 1574 check_pending: 1575 /* we have to make sure we didn't find an extent that has already 1576 * been allocated by the map tree or the original allocation 1577 */ 1578 btrfs_release_path(root, path); 1579 BUG_ON(ins->objectid < search_start); 1580 1581 if (ins->objectid + num_bytes >= search_end) 1582 goto enospc; 1583 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && 1584 ins->objectid + num_bytes > block_group-> 1585 key.objectid + block_group->key.offset) { 1586 search_start = block_group->key.objectid + 1587 block_group->key.offset; 1588 goto new_group; 1589 } 1590 if (test_range_bit(&info->extent_ins, ins->objectid, 1591 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { 1592 search_start = ins->objectid + num_bytes; 1593 goto new_group; 1594 } 1595 if (test_range_bit(&info->pinned_extents, ins->objectid, 1596 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { 1597 search_start = ins->objectid + num_bytes; 1598 goto new_group; 1599 } 1600 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && 1601 ins->objectid < exclude_start + exclude_nr)) { 1602 search_start = exclude_start + exclude_nr; 1603 goto new_group; 1604 } 1605 if (!data) { 1606 block_group = btrfs_lookup_block_group(info, ins->objectid); 1607 if (block_group) 1608 trans->block_group = block_group; 1609 } 1610 ins->offset = num_bytes; 1611 btrfs_free_path(path); 1612 return 0; 1613 1614 new_group: 1615 if (search_start + num_bytes >= search_end) { 1616 enospc: 1617 search_start = orig_search_start; 1618 if (full_scan) { 1619 ret = -ENOSPC; 1620 goto error; 1621 } 1622 if (wrapped) { 1623 if (!full_scan) 1624 total_needed -= empty_size; 1625 full_scan = 1; 1626 data = BTRFS_BLOCK_GROUP_MIXED; 1627 } else 1628 wrapped = 1; 1629 } 1630 block_group = btrfs_lookup_block_group(info, search_start); 1631 cond_resched(); 1632 block_group = btrfs_find_block_group(root, block_group, 1633 search_start, data, 0); 1634 goto check_failed; 1635 1636 error: 1637 btrfs_release_path(root, path); 1638 btrfs_free_path(path); 1639 if (btrfs_test_opt(root, SSD) && !ret && !data) 1640 info->last_alloc = ins->objectid + ins->offset; 1641 return ret; 1642 } 1643 /* 1644 * finds a free extent and does all the dirty work required for allocation 1645 * returns the key for the extent through ins, and a tree buffer for 1646 * the first block of the extent through buf. 1647 * 1648 * returns 0 if everything worked, non-zero otherwise. 1649 */ 1650 int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1651 struct btrfs_root *root, 1652 u64 num_bytes, u64 root_objectid, u64 ref_generation, 1653 u64 owner, u64 owner_offset, 1654 u64 empty_size, u64 hint_byte, 1655 u64 search_end, struct btrfs_key *ins, int data) 1656 { 1657 int ret; 1658 int pending_ret; 1659 u64 super_used; 1660 u64 root_used; 1661 u64 search_start = 0; 1662 u64 new_hint; 1663 struct btrfs_fs_info *info = root->fs_info; 1664 struct btrfs_root *extent_root = info->extent_root; 1665 struct btrfs_extent_item extent_item; 1666 struct btrfs_path *path; 1667 1668 btrfs_set_stack_extent_refs(&extent_item, 1); 1669 1670 new_hint = max(hint_byte, root->fs_info->alloc_start); 1671 if (new_hint < btrfs_super_total_bytes(&info->super_copy)) 1672 hint_byte = new_hint; 1673 1674 WARN_ON(num_bytes < root->sectorsize); 1675 ret = find_free_extent(trans, root, num_bytes, empty_size, 1676 search_start, search_end, hint_byte, ins, 1677 trans->alloc_exclude_start, 1678 trans->alloc_exclude_nr, data); 1679 BUG_ON(ret); 1680 if (ret) 1681 return ret; 1682 1683 /* block accounting for super block */ 1684 super_used = btrfs_super_bytes_used(&info->super_copy); 1685 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); 1686 1687 /* block accounting for root item */ 1688 root_used = btrfs_root_used(&root->root_item); 1689 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 1690 1691 clear_extent_dirty(&root->fs_info->free_space_cache, 1692 ins->objectid, ins->objectid + ins->offset - 1, 1693 GFP_NOFS); 1694 1695 if (root == extent_root) { 1696 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 1697 ins->objectid + ins->offset - 1, 1698 EXTENT_LOCKED, GFP_NOFS); 1699 WARN_ON(data == 1); 1700 goto update_block; 1701 } 1702 1703 WARN_ON(trans->alloc_exclude_nr); 1704 trans->alloc_exclude_start = ins->objectid; 1705 trans->alloc_exclude_nr = ins->offset; 1706 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, 1707 sizeof(extent_item)); 1708 1709 trans->alloc_exclude_start = 0; 1710 trans->alloc_exclude_nr = 0; 1711 BUG_ON(ret); 1712 1713 path = btrfs_alloc_path(); 1714 BUG_ON(!path); 1715 ret = btrfs_insert_extent_backref(trans, extent_root, path, 1716 ins->objectid, root_objectid, 1717 ref_generation, owner, owner_offset); 1718 1719 BUG_ON(ret); 1720 btrfs_free_path(path); 1721 finish_current_insert(trans, extent_root); 1722 pending_ret = del_pending_extents(trans, extent_root); 1723 1724 if (ret) { 1725 return ret; 1726 } 1727 if (pending_ret) { 1728 return pending_ret; 1729 } 1730 1731 update_block: 1732 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, 1733 data); 1734 BUG_ON(ret); 1735 return 0; 1736 } 1737 1738 /* 1739 * helper function to allocate a block for a given tree 1740 * returns the tree buffer or NULL. 1741 */ 1742 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1743 struct btrfs_root *root, 1744 u32 blocksize, 1745 u64 root_objectid, u64 hint, 1746 u64 empty_size) 1747 { 1748 u64 ref_generation; 1749 1750 if (root->ref_cows) 1751 ref_generation = trans->transid; 1752 else 1753 ref_generation = 0; 1754 1755 1756 return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid, 1757 ref_generation, 0, 0, hint, empty_size); 1758 } 1759 1760 /* 1761 * helper function to allocate a block for a given tree 1762 * returns the tree buffer or NULL. 1763 */ 1764 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1765 struct btrfs_root *root, 1766 u32 blocksize, 1767 u64 root_objectid, 1768 u64 ref_generation, 1769 u64 first_objectid, 1770 int level, 1771 u64 hint, 1772 u64 empty_size) 1773 { 1774 struct btrfs_key ins; 1775 int ret; 1776 struct extent_buffer *buf; 1777 1778 ret = btrfs_alloc_extent(trans, root, blocksize, 1779 root_objectid, ref_generation, 1780 level, first_objectid, empty_size, hint, 1781 (u64)-1, &ins, 0); 1782 if (ret) { 1783 BUG_ON(ret > 0); 1784 return ERR_PTR(ret); 1785 } 1786 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); 1787 if (!buf) { 1788 btrfs_free_extent(trans, root, ins.objectid, blocksize, 1789 root->root_key.objectid, ref_generation, 1790 0, 0, 0); 1791 return ERR_PTR(-ENOMEM); 1792 } 1793 btrfs_set_header_generation(buf, trans->transid); 1794 clean_tree_block(trans, root, buf); 1795 wait_on_tree_block_writeback(root, buf); 1796 btrfs_set_buffer_uptodate(buf); 1797 1798 if (PageDirty(buf->first_page)) { 1799 printk("page %lu dirty\n", buf->first_page->index); 1800 WARN_ON(1); 1801 } 1802 1803 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 1804 buf->start + buf->len - 1, GFP_NOFS); 1805 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, 1806 buf->start, buf->start + buf->len - 1, 1807 EXTENT_CSUM, GFP_NOFS); 1808 buf->flags |= EXTENT_CSUM; 1809 if (!btrfs_test_opt(root, SSD)) 1810 btrfs_set_buffer_defrag(buf); 1811 trans->blocks_used++; 1812 return buf; 1813 } 1814 1815 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, 1816 struct btrfs_root *root, 1817 struct extent_buffer *leaf) 1818 { 1819 u64 leaf_owner; 1820 u64 leaf_generation; 1821 struct btrfs_key key; 1822 struct btrfs_file_extent_item *fi; 1823 int i; 1824 int nritems; 1825 int ret; 1826 1827 BUG_ON(!btrfs_is_leaf(leaf)); 1828 nritems = btrfs_header_nritems(leaf); 1829 leaf_owner = btrfs_header_owner(leaf); 1830 leaf_generation = btrfs_header_generation(leaf); 1831 1832 for (i = 0; i < nritems; i++) { 1833 u64 disk_bytenr; 1834 1835 btrfs_item_key_to_cpu(leaf, &key, i); 1836 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1837 continue; 1838 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1839 if (btrfs_file_extent_type(leaf, fi) == 1840 BTRFS_FILE_EXTENT_INLINE) 1841 continue; 1842 /* 1843 * FIXME make sure to insert a trans record that 1844 * repeats the snapshot del on crash 1845 */ 1846 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1847 if (disk_bytenr == 0) 1848 continue; 1849 ret = btrfs_free_extent(trans, root, disk_bytenr, 1850 btrfs_file_extent_disk_num_bytes(leaf, fi), 1851 leaf_owner, leaf_generation, 1852 key.objectid, key.offset, 0); 1853 BUG_ON(ret); 1854 } 1855 return 0; 1856 } 1857 1858 static void noinline reada_walk_down(struct btrfs_root *root, 1859 struct extent_buffer *node) 1860 { 1861 int i; 1862 u32 nritems; 1863 u64 bytenr; 1864 int ret; 1865 u32 refs; 1866 int level; 1867 u32 blocksize; 1868 1869 nritems = btrfs_header_nritems(node); 1870 level = btrfs_header_level(node); 1871 for (i = 0; i < nritems; i++) { 1872 bytenr = btrfs_node_blockptr(node, i); 1873 blocksize = btrfs_level_size(root, level - 1); 1874 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); 1875 BUG_ON(ret); 1876 if (refs != 1) 1877 continue; 1878 mutex_unlock(&root->fs_info->fs_mutex); 1879 ret = readahead_tree_block(root, bytenr, blocksize); 1880 cond_resched(); 1881 mutex_lock(&root->fs_info->fs_mutex); 1882 if (ret) 1883 break; 1884 } 1885 } 1886 1887 /* 1888 * helper function for drop_snapshot, this walks down the tree dropping ref 1889 * counts as it goes. 1890 */ 1891 static int noinline walk_down_tree(struct btrfs_trans_handle *trans, 1892 struct btrfs_root *root, 1893 struct btrfs_path *path, int *level) 1894 { 1895 u64 root_owner; 1896 u64 root_gen; 1897 u64 bytenr; 1898 struct extent_buffer *next; 1899 struct extent_buffer *cur; 1900 struct extent_buffer *parent; 1901 u32 blocksize; 1902 int ret; 1903 u32 refs; 1904 1905 WARN_ON(*level < 0); 1906 WARN_ON(*level >= BTRFS_MAX_LEVEL); 1907 ret = lookup_extent_ref(trans, root, 1908 path->nodes[*level]->start, 1909 path->nodes[*level]->len, &refs); 1910 BUG_ON(ret); 1911 if (refs > 1) 1912 goto out; 1913 1914 /* 1915 * walk down to the last node level and free all the leaves 1916 */ 1917 while(*level >= 0) { 1918 WARN_ON(*level < 0); 1919 WARN_ON(*level >= BTRFS_MAX_LEVEL); 1920 cur = path->nodes[*level]; 1921 1922 if (*level > 0 && path->slots[*level] == 0) 1923 reada_walk_down(root, cur); 1924 1925 if (btrfs_header_level(cur) != *level) 1926 WARN_ON(1); 1927 1928 if (path->slots[*level] >= 1929 btrfs_header_nritems(cur)) 1930 break; 1931 if (*level == 0) { 1932 ret = drop_leaf_ref(trans, root, cur); 1933 BUG_ON(ret); 1934 break; 1935 } 1936 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 1937 blocksize = btrfs_level_size(root, *level - 1); 1938 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); 1939 BUG_ON(ret); 1940 if (refs != 1) { 1941 parent = path->nodes[*level]; 1942 root_owner = btrfs_header_owner(parent); 1943 root_gen = btrfs_header_generation(parent); 1944 path->slots[*level]++; 1945 ret = btrfs_free_extent(trans, root, bytenr, 1946 blocksize, root_owner, 1947 root_gen, 0, 0, 1); 1948 BUG_ON(ret); 1949 continue; 1950 } 1951 next = btrfs_find_tree_block(root, bytenr, blocksize); 1952 if (!next || !btrfs_buffer_uptodate(next)) { 1953 free_extent_buffer(next); 1954 mutex_unlock(&root->fs_info->fs_mutex); 1955 next = read_tree_block(root, bytenr, blocksize); 1956 mutex_lock(&root->fs_info->fs_mutex); 1957 1958 /* we dropped the lock, check one more time */ 1959 ret = lookup_extent_ref(trans, root, bytenr, 1960 blocksize, &refs); 1961 BUG_ON(ret); 1962 if (refs != 1) { 1963 parent = path->nodes[*level]; 1964 root_owner = btrfs_header_owner(parent); 1965 root_gen = btrfs_header_generation(parent); 1966 1967 path->slots[*level]++; 1968 free_extent_buffer(next); 1969 ret = btrfs_free_extent(trans, root, bytenr, 1970 blocksize, 1971 root_owner, 1972 root_gen, 0, 0, 1); 1973 BUG_ON(ret); 1974 continue; 1975 } 1976 } 1977 WARN_ON(*level <= 0); 1978 if (path->nodes[*level-1]) 1979 free_extent_buffer(path->nodes[*level-1]); 1980 path->nodes[*level-1] = next; 1981 *level = btrfs_header_level(next); 1982 path->slots[*level] = 0; 1983 } 1984 out: 1985 WARN_ON(*level < 0); 1986 WARN_ON(*level >= BTRFS_MAX_LEVEL); 1987 1988 if (path->nodes[*level] == root->node) { 1989 root_owner = root->root_key.objectid; 1990 parent = path->nodes[*level]; 1991 } else { 1992 parent = path->nodes[*level + 1]; 1993 root_owner = btrfs_header_owner(parent); 1994 } 1995 1996 root_gen = btrfs_header_generation(parent); 1997 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, 1998 path->nodes[*level]->len, 1999 root_owner, root_gen, 0, 0, 1); 2000 free_extent_buffer(path->nodes[*level]); 2001 path->nodes[*level] = NULL; 2002 *level += 1; 2003 BUG_ON(ret); 2004 return 0; 2005 } 2006 2007 /* 2008 * helper for dropping snapshots. This walks back up the tree in the path 2009 * to find the first node higher up where we haven't yet gone through 2010 * all the slots 2011 */ 2012 static int noinline walk_up_tree(struct btrfs_trans_handle *trans, 2013 struct btrfs_root *root, 2014 struct btrfs_path *path, int *level) 2015 { 2016 u64 root_owner; 2017 u64 root_gen; 2018 struct btrfs_root_item *root_item = &root->root_item; 2019 int i; 2020 int slot; 2021 int ret; 2022 2023 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 2024 slot = path->slots[i]; 2025 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 2026 struct extent_buffer *node; 2027 struct btrfs_disk_key disk_key; 2028 node = path->nodes[i]; 2029 path->slots[i]++; 2030 *level = i; 2031 WARN_ON(*level == 0); 2032 btrfs_node_key(node, &disk_key, path->slots[i]); 2033 memcpy(&root_item->drop_progress, 2034 &disk_key, sizeof(disk_key)); 2035 root_item->drop_level = i; 2036 return 0; 2037 } else { 2038 if (path->nodes[*level] == root->node) { 2039 root_owner = root->root_key.objectid; 2040 root_gen = 2041 btrfs_header_generation(path->nodes[*level]); 2042 } else { 2043 struct extent_buffer *node; 2044 node = path->nodes[*level + 1]; 2045 root_owner = btrfs_header_owner(node); 2046 root_gen = btrfs_header_generation(node); 2047 } 2048 ret = btrfs_free_extent(trans, root, 2049 path->nodes[*level]->start, 2050 path->nodes[*level]->len, 2051 root_owner, root_gen, 0, 0, 1); 2052 BUG_ON(ret); 2053 free_extent_buffer(path->nodes[*level]); 2054 path->nodes[*level] = NULL; 2055 *level = i + 1; 2056 } 2057 } 2058 return 1; 2059 } 2060 2061 /* 2062 * drop the reference count on the tree rooted at 'snap'. This traverses 2063 * the tree freeing any blocks that have a ref count of zero after being 2064 * decremented. 2065 */ 2066 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 2067 *root) 2068 { 2069 int ret = 0; 2070 int wret; 2071 int level; 2072 struct btrfs_path *path; 2073 int i; 2074 int orig_level; 2075 struct btrfs_root_item *root_item = &root->root_item; 2076 2077 path = btrfs_alloc_path(); 2078 BUG_ON(!path); 2079 2080 level = btrfs_header_level(root->node); 2081 orig_level = level; 2082 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 2083 path->nodes[level] = root->node; 2084 extent_buffer_get(root->node); 2085 path->slots[level] = 0; 2086 } else { 2087 struct btrfs_key key; 2088 struct btrfs_disk_key found_key; 2089 struct extent_buffer *node; 2090 2091 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 2092 level = root_item->drop_level; 2093 path->lowest_level = level; 2094 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2095 if (wret < 0) { 2096 ret = wret; 2097 goto out; 2098 } 2099 node = path->nodes[level]; 2100 btrfs_node_key(node, &found_key, path->slots[level]); 2101 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 2102 sizeof(found_key))); 2103 } 2104 while(1) { 2105 wret = walk_down_tree(trans, root, path, &level); 2106 if (wret > 0) 2107 break; 2108 if (wret < 0) 2109 ret = wret; 2110 2111 wret = walk_up_tree(trans, root, path, &level); 2112 if (wret > 0) 2113 break; 2114 if (wret < 0) 2115 ret = wret; 2116 ret = -EAGAIN; 2117 break; 2118 } 2119 for (i = 0; i <= orig_level; i++) { 2120 if (path->nodes[i]) { 2121 free_extent_buffer(path->nodes[i]); 2122 path->nodes[i] = NULL; 2123 } 2124 } 2125 out: 2126 btrfs_free_path(path); 2127 return ret; 2128 } 2129 2130 int btrfs_free_block_groups(struct btrfs_fs_info *info) 2131 { 2132 u64 start; 2133 u64 end; 2134 u64 ptr; 2135 int ret; 2136 while(1) { 2137 ret = find_first_extent_bit(&info->block_group_cache, 0, 2138 &start, &end, (unsigned int)-1); 2139 if (ret) 2140 break; 2141 ret = get_state_private(&info->block_group_cache, start, &ptr); 2142 if (!ret) 2143 kfree((void *)(unsigned long)ptr); 2144 clear_extent_bits(&info->block_group_cache, start, 2145 end, (unsigned int)-1, GFP_NOFS); 2146 } 2147 while(1) { 2148 ret = find_first_extent_bit(&info->free_space_cache, 0, 2149 &start, &end, EXTENT_DIRTY); 2150 if (ret) 2151 break; 2152 clear_extent_dirty(&info->free_space_cache, start, 2153 end, GFP_NOFS); 2154 } 2155 return 0; 2156 } 2157 2158 static int noinline relocate_inode_pages(struct inode *inode, u64 start, 2159 u64 len) 2160 { 2161 u64 page_start; 2162 u64 page_end; 2163 u64 delalloc_start; 2164 u64 existing_delalloc; 2165 unsigned long last_index; 2166 unsigned long i; 2167 struct page *page; 2168 struct btrfs_root *root = BTRFS_I(inode)->root; 2169 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2170 struct file_ra_state *ra; 2171 2172 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2173 2174 mutex_lock(&inode->i_mutex); 2175 i = start >> PAGE_CACHE_SHIFT; 2176 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 2177 2178 file_ra_state_init(ra, inode->i_mapping); 2179 btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index); 2180 kfree(ra); 2181 2182 for (; i <= last_index; i++) { 2183 page = grab_cache_page(inode->i_mapping, i); 2184 if (!page) 2185 goto out_unlock; 2186 if (!PageUptodate(page)) { 2187 btrfs_readpage(NULL, page); 2188 lock_page(page); 2189 if (!PageUptodate(page)) { 2190 unlock_page(page); 2191 page_cache_release(page); 2192 goto out_unlock; 2193 } 2194 } 2195 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2196 page_end = page_start + PAGE_CACHE_SIZE - 1; 2197 2198 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2199 2200 delalloc_start = page_start; 2201 existing_delalloc = count_range_bits(io_tree, 2202 &delalloc_start, page_end, 2203 PAGE_CACHE_SIZE, EXTENT_DELALLOC); 2204 2205 set_extent_delalloc(io_tree, page_start, 2206 page_end, GFP_NOFS); 2207 2208 spin_lock(&root->fs_info->delalloc_lock); 2209 root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - 2210 existing_delalloc; 2211 spin_unlock(&root->fs_info->delalloc_lock); 2212 2213 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2214 set_page_dirty(page); 2215 unlock_page(page); 2216 page_cache_release(page); 2217 } 2218 2219 out_unlock: 2220 mutex_unlock(&inode->i_mutex); 2221 return 0; 2222 } 2223 2224 /* 2225 * note, this releases the path 2226 */ 2227 static int noinline relocate_one_reference(struct btrfs_root *extent_root, 2228 struct btrfs_path *path, 2229 struct btrfs_key *extent_key) 2230 { 2231 struct inode *inode; 2232 struct btrfs_root *found_root; 2233 struct btrfs_key *root_location; 2234 struct btrfs_extent_ref *ref; 2235 u64 ref_root; 2236 u64 ref_gen; 2237 u64 ref_objectid; 2238 u64 ref_offset; 2239 int ret; 2240 2241 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 2242 struct btrfs_extent_ref); 2243 ref_root = btrfs_ref_root(path->nodes[0], ref); 2244 ref_gen = btrfs_ref_generation(path->nodes[0], ref); 2245 ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); 2246 ref_offset = btrfs_ref_offset(path->nodes[0], ref); 2247 btrfs_release_path(extent_root, path); 2248 2249 root_location = kmalloc(sizeof(*root_location), GFP_NOFS); 2250 root_location->objectid = ref_root; 2251 if (ref_gen == 0) 2252 root_location->offset = 0; 2253 else 2254 root_location->offset = (u64)-1; 2255 root_location->type = BTRFS_ROOT_ITEM_KEY; 2256 2257 found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, 2258 root_location); 2259 BUG_ON(!found_root); 2260 kfree(root_location); 2261 2262 if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 2263 mutex_unlock(&extent_root->fs_info->fs_mutex); 2264 inode = btrfs_iget_locked(extent_root->fs_info->sb, 2265 ref_objectid, found_root); 2266 if (inode->i_state & I_NEW) { 2267 /* the inode and parent dir are two different roots */ 2268 BTRFS_I(inode)->root = found_root; 2269 BTRFS_I(inode)->location.objectid = ref_objectid; 2270 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 2271 BTRFS_I(inode)->location.offset = 0; 2272 btrfs_read_locked_inode(inode); 2273 unlock_new_inode(inode); 2274 2275 } 2276 /* this can happen if the reference is not against 2277 * the latest version of the tree root 2278 */ 2279 if (is_bad_inode(inode)) { 2280 mutex_lock(&extent_root->fs_info->fs_mutex); 2281 goto out; 2282 } 2283 relocate_inode_pages(inode, ref_offset, extent_key->offset); 2284 /* FIXME, data=ordered will help get rid of this */ 2285 filemap_fdatawrite(inode->i_mapping); 2286 iput(inode); 2287 mutex_lock(&extent_root->fs_info->fs_mutex); 2288 } else { 2289 struct btrfs_trans_handle *trans; 2290 struct btrfs_key found_key; 2291 struct extent_buffer *eb; 2292 int level; 2293 int i; 2294 2295 trans = btrfs_start_transaction(found_root, 1); 2296 eb = read_tree_block(found_root, extent_key->objectid, 2297 extent_key->offset); 2298 level = btrfs_header_level(eb); 2299 2300 if (level == 0) 2301 btrfs_item_key_to_cpu(eb, &found_key, 0); 2302 else 2303 btrfs_node_key_to_cpu(eb, &found_key, 0); 2304 2305 free_extent_buffer(eb); 2306 2307 path->lowest_level = level; 2308 path->reada = 2; 2309 ret = btrfs_search_slot(trans, found_root, &found_key, path, 2310 0, 1); 2311 path->lowest_level = 0; 2312 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2313 if (!path->nodes[i]) 2314 break; 2315 free_extent_buffer(path->nodes[i]); 2316 path->nodes[i] = NULL; 2317 } 2318 btrfs_release_path(found_root, path); 2319 btrfs_end_transaction(trans, found_root); 2320 } 2321 2322 out: 2323 return 0; 2324 } 2325 2326 static int noinline relocate_one_extent(struct btrfs_root *extent_root, 2327 struct btrfs_path *path, 2328 struct btrfs_key *extent_key) 2329 { 2330 struct btrfs_key key; 2331 struct btrfs_key found_key; 2332 struct extent_buffer *leaf; 2333 u32 nritems; 2334 u32 item_size; 2335 int ret = 0; 2336 2337 key.objectid = extent_key->objectid; 2338 key.type = BTRFS_EXTENT_REF_KEY; 2339 key.offset = 0; 2340 2341 while(1) { 2342 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2343 2344 if (ret < 0) 2345 goto out; 2346 2347 ret = 0; 2348 leaf = path->nodes[0]; 2349 nritems = btrfs_header_nritems(leaf); 2350 if (path->slots[0] == nritems) 2351 goto out; 2352 2353 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2354 if (found_key.objectid != extent_key->objectid) 2355 break; 2356 2357 if (found_key.type != BTRFS_EXTENT_REF_KEY) 2358 break; 2359 2360 key.offset = found_key.offset + 1; 2361 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2362 2363 ret = relocate_one_reference(extent_root, path, extent_key); 2364 if (ret) 2365 goto out; 2366 } 2367 ret = 0; 2368 out: 2369 btrfs_release_path(extent_root, path); 2370 return ret; 2371 } 2372 2373 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) 2374 { 2375 struct btrfs_trans_handle *trans; 2376 struct btrfs_root *tree_root = root->fs_info->tree_root; 2377 struct btrfs_path *path; 2378 u64 cur_byte; 2379 u64 total_found; 2380 struct btrfs_fs_info *info = root->fs_info; 2381 struct extent_io_tree *block_group_cache; 2382 struct btrfs_key key; 2383 struct btrfs_key found_key; 2384 struct extent_buffer *leaf; 2385 u32 nritems; 2386 int ret; 2387 int progress = 0; 2388 2389 btrfs_set_super_total_bytes(&info->super_copy, new_size); 2390 clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, 2391 GFP_NOFS); 2392 block_group_cache = &info->block_group_cache; 2393 path = btrfs_alloc_path(); 2394 root = root->fs_info->extent_root; 2395 path->reada = 2; 2396 2397 again: 2398 total_found = 0; 2399 key.objectid = new_size; 2400 key.offset = 0; 2401 key.type = 0; 2402 cur_byte = key.objectid; 2403 2404 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2405 if (ret < 0) 2406 goto out; 2407 2408 ret = find_previous_extent(root, path); 2409 if (ret < 0) 2410 goto out; 2411 if (ret == 0) { 2412 leaf = path->nodes[0]; 2413 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2414 if (found_key.objectid + found_key.offset > new_size) { 2415 cur_byte = found_key.objectid; 2416 key.objectid = cur_byte; 2417 } 2418 } 2419 btrfs_release_path(root, path); 2420 2421 while(1) { 2422 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2423 if (ret < 0) 2424 goto out; 2425 2426 leaf = path->nodes[0]; 2427 nritems = btrfs_header_nritems(leaf); 2428 next: 2429 if (path->slots[0] >= nritems) { 2430 ret = btrfs_next_leaf(root, path); 2431 if (ret < 0) 2432 goto out; 2433 if (ret == 1) { 2434 ret = 0; 2435 break; 2436 } 2437 leaf = path->nodes[0]; 2438 nritems = btrfs_header_nritems(leaf); 2439 } 2440 2441 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2442 2443 if (progress && need_resched()) { 2444 memcpy(&key, &found_key, sizeof(key)); 2445 mutex_unlock(&root->fs_info->fs_mutex); 2446 cond_resched(); 2447 mutex_lock(&root->fs_info->fs_mutex); 2448 btrfs_release_path(root, path); 2449 btrfs_search_slot(NULL, root, &key, path, 0, 0); 2450 progress = 0; 2451 goto next; 2452 } 2453 progress = 1; 2454 2455 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || 2456 found_key.objectid + found_key.offset <= cur_byte) { 2457 path->slots[0]++; 2458 goto next; 2459 } 2460 2461 total_found++; 2462 cur_byte = found_key.objectid + found_key.offset; 2463 key.objectid = cur_byte; 2464 btrfs_release_path(root, path); 2465 ret = relocate_one_extent(root, path, &found_key); 2466 } 2467 2468 btrfs_release_path(root, path); 2469 2470 if (total_found > 0) { 2471 trans = btrfs_start_transaction(tree_root, 1); 2472 btrfs_commit_transaction(trans, tree_root); 2473 2474 mutex_unlock(&root->fs_info->fs_mutex); 2475 btrfs_clean_old_snapshots(tree_root); 2476 mutex_lock(&root->fs_info->fs_mutex); 2477 2478 trans = btrfs_start_transaction(tree_root, 1); 2479 btrfs_commit_transaction(trans, tree_root); 2480 goto again; 2481 } 2482 2483 trans = btrfs_start_transaction(root, 1); 2484 key.objectid = new_size; 2485 key.offset = 0; 2486 key.type = 0; 2487 while(1) { 2488 u64 ptr; 2489 2490 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2491 if (ret < 0) 2492 goto out; 2493 2494 leaf = path->nodes[0]; 2495 nritems = btrfs_header_nritems(leaf); 2496 bg_next: 2497 if (path->slots[0] >= nritems) { 2498 ret = btrfs_next_leaf(root, path); 2499 if (ret < 0) 2500 break; 2501 if (ret == 1) { 2502 ret = 0; 2503 break; 2504 } 2505 leaf = path->nodes[0]; 2506 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2507 2508 /* 2509 * btrfs_next_leaf doesn't cow buffers, we have to 2510 * do the search again 2511 */ 2512 memcpy(&key, &found_key, sizeof(key)); 2513 btrfs_release_path(root, path); 2514 goto resched_check; 2515 } 2516 2517 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2518 if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { 2519 printk("shrinker found key %Lu %u %Lu\n", 2520 found_key.objectid, found_key.type, 2521 found_key.offset); 2522 path->slots[0]++; 2523 goto bg_next; 2524 } 2525 ret = get_state_private(&info->block_group_cache, 2526 found_key.objectid, &ptr); 2527 if (!ret) 2528 kfree((void *)(unsigned long)ptr); 2529 2530 clear_extent_bits(&info->block_group_cache, found_key.objectid, 2531 found_key.objectid + found_key.offset - 1, 2532 (unsigned int)-1, GFP_NOFS); 2533 2534 key.objectid = found_key.objectid + 1; 2535 btrfs_del_item(trans, root, path); 2536 btrfs_release_path(root, path); 2537 resched_check: 2538 if (need_resched()) { 2539 mutex_unlock(&root->fs_info->fs_mutex); 2540 cond_resched(); 2541 mutex_lock(&root->fs_info->fs_mutex); 2542 } 2543 } 2544 clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, 2545 GFP_NOFS); 2546 btrfs_commit_transaction(trans, root); 2547 out: 2548 btrfs_free_path(path); 2549 return ret; 2550 } 2551 2552 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, 2553 struct btrfs_root *root, u64 new_size) 2554 { 2555 struct btrfs_path *path; 2556 u64 nr = 0; 2557 u64 cur_byte; 2558 u64 old_size; 2559 unsigned long rem; 2560 struct btrfs_block_group_cache *cache; 2561 struct btrfs_block_group_item *item; 2562 struct btrfs_fs_info *info = root->fs_info; 2563 struct extent_io_tree *block_group_cache; 2564 struct btrfs_key key; 2565 struct extent_buffer *leaf; 2566 int ret; 2567 int bit; 2568 2569 old_size = btrfs_super_total_bytes(&info->super_copy); 2570 block_group_cache = &info->block_group_cache; 2571 2572 root = info->extent_root; 2573 2574 cache = btrfs_lookup_block_group(root->fs_info, old_size - 1); 2575 2576 cur_byte = cache->key.objectid + cache->key.offset; 2577 if (cur_byte >= new_size) 2578 goto set_size; 2579 2580 key.offset = BTRFS_BLOCK_GROUP_SIZE; 2581 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 2582 2583 path = btrfs_alloc_path(); 2584 if (!path) 2585 return -ENOMEM; 2586 2587 while(cur_byte < new_size) { 2588 key.objectid = cur_byte; 2589 ret = btrfs_insert_empty_item(trans, root, path, &key, 2590 sizeof(struct btrfs_block_group_item)); 2591 BUG_ON(ret); 2592 leaf = path->nodes[0]; 2593 item = btrfs_item_ptr(leaf, path->slots[0], 2594 struct btrfs_block_group_item); 2595 2596 btrfs_set_disk_block_group_used(leaf, item, 0); 2597 div_long_long_rem(nr, 3, &rem); 2598 if (rem) { 2599 btrfs_set_disk_block_group_flags(leaf, item, 2600 BTRFS_BLOCK_GROUP_DATA); 2601 } else { 2602 btrfs_set_disk_block_group_flags(leaf, item, 0); 2603 } 2604 nr++; 2605 2606 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2607 BUG_ON(!cache); 2608 2609 read_extent_buffer(leaf, &cache->item, (unsigned long)item, 2610 sizeof(cache->item)); 2611 2612 memcpy(&cache->key, &key, sizeof(key)); 2613 cache->cached = 0; 2614 cache->pinned = 0; 2615 cur_byte = key.objectid + key.offset; 2616 btrfs_release_path(root, path); 2617 2618 if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { 2619 bit = BLOCK_GROUP_DATA; 2620 cache->data = BTRFS_BLOCK_GROUP_DATA; 2621 } else { 2622 bit = BLOCK_GROUP_METADATA; 2623 cache->data = 0; 2624 } 2625 2626 /* use EXTENT_LOCKED to prevent merging */ 2627 set_extent_bits(block_group_cache, key.objectid, 2628 key.objectid + key.offset - 1, 2629 bit | EXTENT_LOCKED, GFP_NOFS); 2630 set_state_private(block_group_cache, key.objectid, 2631 (unsigned long)cache); 2632 } 2633 btrfs_free_path(path); 2634 set_size: 2635 btrfs_set_super_total_bytes(&info->super_copy, new_size); 2636 return 0; 2637 } 2638 2639 int btrfs_read_block_groups(struct btrfs_root *root) 2640 { 2641 struct btrfs_path *path; 2642 int ret; 2643 int err = 0; 2644 int bit; 2645 struct btrfs_block_group_cache *cache; 2646 struct btrfs_fs_info *info = root->fs_info; 2647 struct extent_io_tree *block_group_cache; 2648 struct btrfs_key key; 2649 struct btrfs_key found_key; 2650 struct extent_buffer *leaf; 2651 2652 block_group_cache = &info->block_group_cache; 2653 2654 root = info->extent_root; 2655 key.objectid = 0; 2656 key.offset = BTRFS_BLOCK_GROUP_SIZE; 2657 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 2658 2659 path = btrfs_alloc_path(); 2660 if (!path) 2661 return -ENOMEM; 2662 2663 while(1) { 2664 ret = btrfs_search_slot(NULL, info->extent_root, 2665 &key, path, 0, 0); 2666 if (ret != 0) { 2667 err = ret; 2668 break; 2669 } 2670 leaf = path->nodes[0]; 2671 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2672 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2673 if (!cache) { 2674 err = -1; 2675 break; 2676 } 2677 2678 read_extent_buffer(leaf, &cache->item, 2679 btrfs_item_ptr_offset(leaf, path->slots[0]), 2680 sizeof(cache->item)); 2681 memcpy(&cache->key, &found_key, sizeof(found_key)); 2682 cache->cached = 0; 2683 cache->pinned = 0; 2684 key.objectid = found_key.objectid + found_key.offset; 2685 btrfs_release_path(root, path); 2686 2687 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) { 2688 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; 2689 cache->data = BTRFS_BLOCK_GROUP_MIXED; 2690 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { 2691 bit = BLOCK_GROUP_DATA; 2692 cache->data = BTRFS_BLOCK_GROUP_DATA; 2693 } else { 2694 bit = BLOCK_GROUP_METADATA; 2695 cache->data = 0; 2696 } 2697 2698 /* use EXTENT_LOCKED to prevent merging */ 2699 set_extent_bits(block_group_cache, found_key.objectid, 2700 found_key.objectid + found_key.offset - 1, 2701 bit | EXTENT_LOCKED, GFP_NOFS); 2702 set_state_private(block_group_cache, found_key.objectid, 2703 (unsigned long)cache); 2704 2705 if (key.objectid >= 2706 btrfs_super_total_bytes(&info->super_copy)) 2707 break; 2708 } 2709 2710 btrfs_free_path(path); 2711 return 0; 2712 } 2713