1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/crc32c.h> 21 #include <linux/pagemap.h> 22 #include "hash.h" 23 #include "ctree.h" 24 #include "disk-io.h" 25 #include "print-tree.h" 26 #include "transaction.h" 27 28 #define BLOCK_GROUP_DATA EXTENT_WRITEBACK 29 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE 30 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY 31 32 static int finish_current_insert(struct btrfs_trans_handle *trans, struct 33 btrfs_root *extent_root); 34 static int del_pending_extents(struct btrfs_trans_handle *trans, struct 35 btrfs_root *extent_root); 36 static int find_previous_extent(struct btrfs_root *root, 37 struct btrfs_path *path) 38 { 39 struct btrfs_key found_key; 40 struct extent_buffer *leaf; 41 int ret; 42 43 while(1) { 44 if (path->slots[0] == 0) { 45 ret = btrfs_prev_leaf(root, path); 46 if (ret != 0) 47 return ret; 48 } else { 49 path->slots[0]--; 50 } 51 leaf = path->nodes[0]; 52 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 53 if (found_key.type == BTRFS_EXTENT_ITEM_KEY) 54 return 0; 55 } 56 return 1; 57 } 58 59 static int cache_block_group(struct btrfs_root *root, 60 struct btrfs_block_group_cache *block_group) 61 { 62 struct btrfs_path *path; 63 int ret; 64 struct btrfs_key key; 65 struct extent_buffer *leaf; 66 struct extent_map_tree *free_space_cache; 67 int slot; 68 u64 last = 0; 69 u64 hole_size; 70 u64 first_free; 71 int found = 0; 72 73 if (!block_group) 74 return 0; 75 76 root = root->fs_info->extent_root; 77 free_space_cache = &root->fs_info->free_space_cache; 78 79 if (block_group->cached) 80 return 0; 81 82 path = btrfs_alloc_path(); 83 if (!path) 84 return -ENOMEM; 85 86 path->reada = 2; 87 first_free = block_group->key.objectid; 88 key.objectid = block_group->key.objectid; 89 key.offset = 0; 90 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 91 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 92 if (ret < 0) 93 return ret; 94 ret = find_previous_extent(root, path); 95 if (ret < 0) 96 return ret; 97 if (ret == 0) { 98 leaf = path->nodes[0]; 99 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 100 if (key.objectid + key.offset > first_free) 101 first_free = key.objectid + key.offset; 102 } 103 while(1) { 104 leaf = path->nodes[0]; 105 slot = path->slots[0]; 106 if (slot >= btrfs_header_nritems(leaf)) { 107 ret = btrfs_next_leaf(root, path); 108 if (ret < 0) 109 goto err; 110 if (ret == 0) { 111 continue; 112 } else { 113 break; 114 } 115 } 116 btrfs_item_key_to_cpu(leaf, &key, slot); 117 if (key.objectid < block_group->key.objectid) { 118 goto next; 119 } 120 if (key.objectid >= block_group->key.objectid + 121 block_group->key.offset) { 122 break; 123 } 124 125 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 126 if (!found) { 127 last = first_free; 128 found = 1; 129 } 130 if (key.objectid > last) { 131 hole_size = key.objectid - last; 132 set_extent_dirty(free_space_cache, last, 133 last + hole_size - 1, 134 GFP_NOFS); 135 } 136 last = key.objectid + key.offset; 137 } 138 next: 139 path->slots[0]++; 140 } 141 142 if (!found) 143 last = first_free; 144 if (block_group->key.objectid + 145 block_group->key.offset > last) { 146 hole_size = block_group->key.objectid + 147 block_group->key.offset - last; 148 set_extent_dirty(free_space_cache, last, 149 last + hole_size - 1, GFP_NOFS); 150 } 151 block_group->cached = 1; 152 err: 153 btrfs_free_path(path); 154 return 0; 155 } 156 157 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct 158 btrfs_fs_info *info, 159 u64 bytenr) 160 { 161 struct extent_map_tree *block_group_cache; 162 struct btrfs_block_group_cache *block_group = NULL; 163 u64 ptr; 164 u64 start; 165 u64 end; 166 int ret; 167 168 block_group_cache = &info->block_group_cache; 169 ret = find_first_extent_bit(block_group_cache, 170 bytenr, &start, &end, 171 BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); 172 if (ret) { 173 return NULL; 174 } 175 ret = get_state_private(block_group_cache, start, &ptr); 176 if (ret) 177 return NULL; 178 179 block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr; 180 if (block_group->key.objectid <= bytenr && bytenr < 181 block_group->key.objectid + block_group->key.offset) 182 return block_group; 183 return NULL; 184 } 185 static u64 noinline find_search_start(struct btrfs_root *root, 186 struct btrfs_block_group_cache **cache_ret, 187 u64 search_start, int num, int data) 188 { 189 int ret; 190 struct btrfs_block_group_cache *cache = *cache_ret; 191 u64 last; 192 u64 start = 0; 193 u64 end = 0; 194 u64 cache_miss = 0; 195 u64 total_fs_bytes; 196 int wrapped = 0; 197 198 if (!cache) { 199 goto out; 200 } 201 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 202 again: 203 ret = cache_block_group(root, cache); 204 if (ret) 205 goto out; 206 207 last = max(search_start, cache->key.objectid); 208 209 while(1) { 210 ret = find_first_extent_bit(&root->fs_info->free_space_cache, 211 last, &start, &end, EXTENT_DIRTY); 212 if (ret) { 213 if (!cache_miss) 214 cache_miss = last; 215 goto new_group; 216 } 217 218 start = max(last, start); 219 last = end + 1; 220 if (last - start < num) { 221 if (last == cache->key.objectid + cache->key.offset) 222 cache_miss = start; 223 continue; 224 } 225 if (data != BTRFS_BLOCK_GROUP_MIXED && 226 start + num > cache->key.objectid + cache->key.offset) 227 goto new_group; 228 if (start + num > total_fs_bytes) 229 goto new_group; 230 return start; 231 } 232 out: 233 cache = btrfs_lookup_block_group(root->fs_info, search_start); 234 if (!cache) { 235 printk("Unable to find block group for %Lu\n", 236 search_start); 237 WARN_ON(1); 238 return search_start; 239 } 240 return search_start; 241 242 new_group: 243 last = cache->key.objectid + cache->key.offset; 244 wrapped: 245 cache = btrfs_lookup_block_group(root->fs_info, last); 246 if (!cache || cache->key.objectid >= total_fs_bytes) { 247 no_cache: 248 if (!wrapped) { 249 wrapped = 1; 250 last = search_start; 251 data = BTRFS_BLOCK_GROUP_MIXED; 252 goto wrapped; 253 } 254 goto out; 255 } 256 if (cache_miss && !cache->cached) { 257 cache_block_group(root, cache); 258 last = cache_miss; 259 cache = btrfs_lookup_block_group(root->fs_info, last); 260 } 261 cache = btrfs_find_block_group(root, cache, last, data, 0); 262 if (!cache) 263 goto no_cache; 264 *cache_ret = cache; 265 cache_miss = 0; 266 goto again; 267 } 268 269 static u64 div_factor(u64 num, int factor) 270 { 271 if (factor == 10) 272 return num; 273 num *= factor; 274 do_div(num, 10); 275 return num; 276 } 277 278 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, 279 struct btrfs_block_group_cache 280 *hint, u64 search_start, 281 int data, int owner) 282 { 283 struct btrfs_block_group_cache *cache; 284 struct extent_map_tree *block_group_cache; 285 struct btrfs_block_group_cache *found_group = NULL; 286 struct btrfs_fs_info *info = root->fs_info; 287 u64 used; 288 u64 last = 0; 289 u64 hint_last; 290 u64 start; 291 u64 end; 292 u64 free_check; 293 u64 ptr; 294 u64 total_fs_bytes; 295 int bit; 296 int ret; 297 int full_search = 0; 298 int factor = 8; 299 int data_swap = 0; 300 301 block_group_cache = &info->block_group_cache; 302 total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); 303 304 if (!owner) 305 factor = 8; 306 307 if (data == BTRFS_BLOCK_GROUP_MIXED) { 308 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; 309 factor = 10; 310 } else if (data) 311 bit = BLOCK_GROUP_DATA; 312 else 313 bit = BLOCK_GROUP_METADATA; 314 315 if (search_start && search_start < total_fs_bytes) { 316 struct btrfs_block_group_cache *shint; 317 shint = btrfs_lookup_block_group(info, search_start); 318 if (shint && (shint->data == data || 319 shint->data == BTRFS_BLOCK_GROUP_MIXED)) { 320 used = btrfs_block_group_used(&shint->item); 321 if (used + shint->pinned < 322 div_factor(shint->key.offset, factor)) { 323 return shint; 324 } 325 } 326 } 327 if (hint && hint->key.objectid < total_fs_bytes && 328 (hint->data == data || hint->data == BTRFS_BLOCK_GROUP_MIXED)) { 329 used = btrfs_block_group_used(&hint->item); 330 if (used + hint->pinned < 331 div_factor(hint->key.offset, factor)) { 332 return hint; 333 } 334 last = hint->key.objectid + hint->key.offset; 335 hint_last = last; 336 } else { 337 if (hint) 338 hint_last = max(hint->key.objectid, search_start); 339 else 340 hint_last = search_start; 341 342 if (hint_last >= total_fs_bytes) 343 hint_last = search_start; 344 last = hint_last; 345 } 346 again: 347 while(1) { 348 ret = find_first_extent_bit(block_group_cache, last, 349 &start, &end, bit); 350 if (ret) 351 break; 352 353 ret = get_state_private(block_group_cache, start, &ptr); 354 if (ret) 355 break; 356 357 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; 358 last = cache->key.objectid + cache->key.offset; 359 used = btrfs_block_group_used(&cache->item); 360 361 if (cache->key.objectid > total_fs_bytes) 362 break; 363 364 if (full_search) 365 free_check = cache->key.offset; 366 else 367 free_check = div_factor(cache->key.offset, factor); 368 if (used + cache->pinned < free_check) { 369 found_group = cache; 370 goto found; 371 } 372 cond_resched(); 373 } 374 if (!full_search) { 375 last = search_start; 376 full_search = 1; 377 goto again; 378 } 379 if (!data_swap) { 380 data_swap = 1; 381 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; 382 last = search_start; 383 goto again; 384 } 385 found: 386 return found_group; 387 } 388 389 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation, 390 u64 owner, u64 owner_offset) 391 { 392 u32 high_crc = ~(u32)0; 393 u32 low_crc = ~(u32)0; 394 __le64 lenum; 395 396 lenum = cpu_to_le64(root_objectid); 397 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 398 lenum = cpu_to_le64(ref_generation); 399 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 400 401 #if 0 402 lenum = cpu_to_le64(owner); 403 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 404 lenum = cpu_to_le64(owner_offset); 405 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 406 #endif 407 return ((u64)high_crc << 32) | (u64)low_crc; 408 } 409 410 static int match_extent_ref(struct extent_buffer *leaf, 411 struct btrfs_extent_ref *disk_ref, 412 struct btrfs_extent_ref *cpu_ref) 413 { 414 int ret; 415 int len; 416 417 if (cpu_ref->objectid) 418 len = sizeof(*cpu_ref); 419 else 420 len = 2 * sizeof(u64); 421 ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref, 422 len); 423 return ret == 0; 424 } 425 426 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans, 427 struct btrfs_root *root, 428 struct btrfs_path *path, u64 bytenr, 429 u64 root_objectid, 430 u64 ref_generation, u64 owner, 431 u64 owner_offset, int del) 432 { 433 u64 hash; 434 struct btrfs_key key; 435 struct btrfs_key found_key; 436 struct btrfs_extent_ref ref; 437 struct extent_buffer *leaf; 438 struct btrfs_extent_ref *disk_ref; 439 int ret; 440 int ret2; 441 442 btrfs_set_stack_ref_root(&ref, root_objectid); 443 btrfs_set_stack_ref_generation(&ref, ref_generation); 444 btrfs_set_stack_ref_objectid(&ref, owner); 445 btrfs_set_stack_ref_offset(&ref, owner_offset); 446 447 hash = hash_extent_ref(root_objectid, ref_generation, owner, 448 owner_offset); 449 key.offset = hash; 450 key.objectid = bytenr; 451 key.type = BTRFS_EXTENT_REF_KEY; 452 453 while (1) { 454 ret = btrfs_search_slot(trans, root, &key, path, 455 del ? -1 : 0, del); 456 if (ret < 0) 457 goto out; 458 leaf = path->nodes[0]; 459 if (ret != 0) { 460 u32 nritems = btrfs_header_nritems(leaf); 461 if (path->slots[0] >= nritems) { 462 ret2 = btrfs_next_leaf(root, path); 463 if (ret2) 464 goto out; 465 leaf = path->nodes[0]; 466 } 467 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 468 if (found_key.objectid != bytenr || 469 found_key.type != BTRFS_EXTENT_REF_KEY) 470 goto out; 471 key.offset = found_key.offset; 472 if (del) { 473 btrfs_release_path(root, path); 474 continue; 475 } 476 } 477 disk_ref = btrfs_item_ptr(path->nodes[0], 478 path->slots[0], 479 struct btrfs_extent_ref); 480 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) { 481 ret = 0; 482 goto out; 483 } 484 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 485 key.offset = found_key.offset + 1; 486 btrfs_release_path(root, path); 487 } 488 out: 489 return ret; 490 } 491 492 /* 493 * Back reference rules. Back refs have three main goals: 494 * 495 * 1) differentiate between all holders of references to an extent so that 496 * when a reference is dropped we can make sure it was a valid reference 497 * before freeing the extent. 498 * 499 * 2) Provide enough information to quickly find the holders of an extent 500 * if we notice a given block is corrupted or bad. 501 * 502 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 503 * maintenance. This is actually the same as #2, but with a slightly 504 * different use case. 505 * 506 * File extents can be referenced by: 507 * 508 * - multiple snapshots, subvolumes, or different generations in one subvol 509 * - different files inside a single subvolume (in theory, not implemented yet) 510 * - different offsets inside a file (bookend extents in file.c) 511 * 512 * The extent ref structure has fields for: 513 * 514 * - Objectid of the subvolume root 515 * - Generation number of the tree holding the reference 516 * - objectid of the file holding the reference 517 * - offset in the file corresponding to the key holding the reference 518 * 519 * When a file extent is allocated the fields are filled in: 520 * (root_key.objectid, trans->transid, inode objectid, offset in file) 521 * 522 * When a leaf is cow'd new references are added for every file extent found 523 * in the leaf. It looks the same as the create case, but trans->transid 524 * will be different when the block is cow'd. 525 * 526 * (root_key.objectid, trans->transid, inode objectid, offset in file) 527 * 528 * When a file extent is removed either during snapshot deletion or file 529 * truncation, the corresponding back reference is found 530 * by searching for: 531 * 532 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 533 * inode objectid, offset in file) 534 * 535 * Btree extents can be referenced by: 536 * 537 * - Different subvolumes 538 * - Different generations of the same subvolume 539 * 540 * Storing sufficient information for a full reverse mapping of a btree 541 * block would require storing the lowest key of the block in the backref, 542 * and it would require updating that lowest key either before write out or 543 * every time it changed. Instead, the objectid of the lowest key is stored 544 * along with the level of the tree block. This provides a hint 545 * about where in the btree the block can be found. Searches through the 546 * btree only need to look for a pointer to that block, so they stop one 547 * level higher than the level recorded in the backref. 548 * 549 * Some btrees do not do reference counting on their extents. These 550 * include the extent tree and the tree of tree roots. Backrefs for these 551 * trees always have a generation of zero. 552 * 553 * When a tree block is created, back references are inserted: 554 * 555 * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid) 556 * 557 * When a tree block is cow'd in a reference counted root, 558 * new back references are added for all the blocks it points to. 559 * These are of the form (trans->transid will have increased since creation): 560 * 561 * (root->root_key.objectid, trans->transid, level, lowest_key_objectid) 562 * 563 * Because the lowest_key_objectid and the level are just hints 564 * they are not used when backrefs are deleted. When a backref is deleted: 565 * 566 * if backref was for a tree root: 567 * root_objectid = root->root_key.objectid 568 * else 569 * root_objectid = btrfs_header_owner(parent) 570 * 571 * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0) 572 * 573 * Back Reference Key hashing: 574 * 575 * Back references have four fields, each 64 bits long. Unfortunately, 576 * This is hashed into a single 64 bit number and placed into the key offset. 577 * The key objectid corresponds to the first byte in the extent, and the 578 * key type is set to BTRFS_EXTENT_REF_KEY 579 */ 580 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, 581 struct btrfs_root *root, 582 struct btrfs_path *path, u64 bytenr, 583 u64 root_objectid, u64 ref_generation, 584 u64 owner, u64 owner_offset) 585 { 586 u64 hash; 587 struct btrfs_key key; 588 struct btrfs_extent_ref ref; 589 struct btrfs_extent_ref *disk_ref; 590 int ret; 591 592 btrfs_set_stack_ref_root(&ref, root_objectid); 593 btrfs_set_stack_ref_generation(&ref, ref_generation); 594 btrfs_set_stack_ref_objectid(&ref, owner); 595 btrfs_set_stack_ref_offset(&ref, owner_offset); 596 597 hash = hash_extent_ref(root_objectid, ref_generation, owner, 598 owner_offset); 599 key.offset = hash; 600 key.objectid = bytenr; 601 key.type = BTRFS_EXTENT_REF_KEY; 602 603 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref)); 604 while (ret == -EEXIST) { 605 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 606 struct btrfs_extent_ref); 607 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) 608 goto out; 609 key.offset++; 610 btrfs_release_path(root, path); 611 ret = btrfs_insert_empty_item(trans, root, path, &key, 612 sizeof(ref)); 613 } 614 if (ret) 615 goto out; 616 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 617 struct btrfs_extent_ref); 618 write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref, 619 sizeof(ref)); 620 btrfs_mark_buffer_dirty(path->nodes[0]); 621 out: 622 btrfs_release_path(root, path); 623 return ret; 624 } 625 626 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 627 struct btrfs_root *root, 628 u64 bytenr, u64 num_bytes, 629 u64 root_objectid, u64 ref_generation, 630 u64 owner, u64 owner_offset) 631 { 632 struct btrfs_path *path; 633 int ret; 634 struct btrfs_key key; 635 struct extent_buffer *l; 636 struct btrfs_extent_item *item; 637 u32 refs; 638 639 WARN_ON(num_bytes < root->sectorsize); 640 path = btrfs_alloc_path(); 641 if (!path) 642 return -ENOMEM; 643 644 path->reada = 0; 645 key.objectid = bytenr; 646 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 647 key.offset = num_bytes; 648 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 649 0, 1); 650 if (ret < 0) 651 return ret; 652 if (ret != 0) { 653 BUG(); 654 } 655 BUG_ON(ret != 0); 656 l = path->nodes[0]; 657 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 658 refs = btrfs_extent_refs(l, item); 659 btrfs_set_extent_refs(l, item, refs + 1); 660 btrfs_mark_buffer_dirty(path->nodes[0]); 661 662 btrfs_release_path(root->fs_info->extent_root, path); 663 664 path->reada = 0; 665 ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root, 666 path, bytenr, root_objectid, 667 ref_generation, owner, owner_offset); 668 BUG_ON(ret); 669 finish_current_insert(trans, root->fs_info->extent_root); 670 del_pending_extents(trans, root->fs_info->extent_root); 671 672 btrfs_free_path(path); 673 return 0; 674 } 675 676 int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 677 struct btrfs_root *root) 678 { 679 finish_current_insert(trans, root->fs_info->extent_root); 680 del_pending_extents(trans, root->fs_info->extent_root); 681 return 0; 682 } 683 684 static int lookup_extent_ref(struct btrfs_trans_handle *trans, 685 struct btrfs_root *root, u64 bytenr, 686 u64 num_bytes, u32 *refs) 687 { 688 struct btrfs_path *path; 689 int ret; 690 struct btrfs_key key; 691 struct extent_buffer *l; 692 struct btrfs_extent_item *item; 693 694 WARN_ON(num_bytes < root->sectorsize); 695 path = btrfs_alloc_path(); 696 path->reada = 0; 697 key.objectid = bytenr; 698 key.offset = num_bytes; 699 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 700 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 701 0, 0); 702 if (ret < 0) 703 goto out; 704 if (ret != 0) { 705 btrfs_print_leaf(root, path->nodes[0]); 706 printk("failed to find block number %Lu\n", bytenr); 707 BUG(); 708 } 709 l = path->nodes[0]; 710 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 711 *refs = btrfs_extent_refs(l, item); 712 out: 713 btrfs_free_path(path); 714 return 0; 715 } 716 717 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, 718 struct btrfs_path *count_path, 719 u64 first_extent) 720 { 721 struct btrfs_root *extent_root = root->fs_info->extent_root; 722 struct btrfs_path *path; 723 u64 bytenr; 724 u64 found_objectid; 725 u64 root_objectid = root->root_key.objectid; 726 u32 total_count = 0; 727 u32 cur_count; 728 u32 nritems; 729 int ret; 730 struct btrfs_key key; 731 struct btrfs_key found_key; 732 struct extent_buffer *l; 733 struct btrfs_extent_item *item; 734 struct btrfs_extent_ref *ref_item; 735 int level = -1; 736 737 path = btrfs_alloc_path(); 738 again: 739 if (level == -1) 740 bytenr = first_extent; 741 else 742 bytenr = count_path->nodes[level]->start; 743 744 cur_count = 0; 745 key.objectid = bytenr; 746 key.offset = 0; 747 748 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 749 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 750 if (ret < 0) 751 goto out; 752 BUG_ON(ret == 0); 753 754 l = path->nodes[0]; 755 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); 756 757 if (found_key.objectid != bytenr || 758 found_key.type != BTRFS_EXTENT_ITEM_KEY) { 759 goto out; 760 } 761 762 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 763 while (1) { 764 l = path->nodes[0]; 765 nritems = btrfs_header_nritems(l); 766 if (path->slots[0] >= nritems) { 767 ret = btrfs_next_leaf(extent_root, path); 768 if (ret == 0) 769 continue; 770 break; 771 } 772 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); 773 if (found_key.objectid != bytenr) 774 break; 775 776 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 777 path->slots[0]++; 778 continue; 779 } 780 781 cur_count++; 782 ref_item = btrfs_item_ptr(l, path->slots[0], 783 struct btrfs_extent_ref); 784 found_objectid = btrfs_ref_root(l, ref_item); 785 786 if (found_objectid != root_objectid) { 787 total_count = 2; 788 goto out; 789 } 790 total_count = 1; 791 path->slots[0]++; 792 } 793 if (cur_count == 0) { 794 total_count = 0; 795 goto out; 796 } 797 if (level >= 0 && root->node == count_path->nodes[level]) 798 goto out; 799 level++; 800 btrfs_release_path(root, path); 801 goto again; 802 803 out: 804 btrfs_free_path(path); 805 return total_count; 806 } 807 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, 808 struct btrfs_root *root, u64 owner_objectid) 809 { 810 u64 generation; 811 u64 key_objectid; 812 u64 level; 813 u32 nritems; 814 struct btrfs_disk_key disk_key; 815 816 level = btrfs_header_level(root->node); 817 generation = trans->transid; 818 nritems = btrfs_header_nritems(root->node); 819 if (nritems > 0) { 820 if (level == 0) 821 btrfs_item_key(root->node, &disk_key, 0); 822 else 823 btrfs_node_key(root->node, &disk_key, 0); 824 key_objectid = btrfs_disk_key_objectid(&disk_key); 825 } else { 826 key_objectid = 0; 827 } 828 return btrfs_inc_extent_ref(trans, root, root->node->start, 829 root->node->len, owner_objectid, 830 generation, level, key_objectid); 831 } 832 833 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 834 struct extent_buffer *buf) 835 { 836 u64 bytenr; 837 u32 nritems; 838 struct btrfs_key key; 839 struct btrfs_file_extent_item *fi; 840 int i; 841 int level; 842 int ret; 843 int faili; 844 845 if (!root->ref_cows) 846 return 0; 847 848 level = btrfs_header_level(buf); 849 nritems = btrfs_header_nritems(buf); 850 for (i = 0; i < nritems; i++) { 851 if (level == 0) { 852 u64 disk_bytenr; 853 btrfs_item_key_to_cpu(buf, &key, i); 854 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 855 continue; 856 fi = btrfs_item_ptr(buf, i, 857 struct btrfs_file_extent_item); 858 if (btrfs_file_extent_type(buf, fi) == 859 BTRFS_FILE_EXTENT_INLINE) 860 continue; 861 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 862 if (disk_bytenr == 0) 863 continue; 864 ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, 865 btrfs_file_extent_disk_num_bytes(buf, fi), 866 root->root_key.objectid, trans->transid, 867 key.objectid, key.offset); 868 if (ret) { 869 faili = i; 870 goto fail; 871 } 872 } else { 873 bytenr = btrfs_node_blockptr(buf, i); 874 btrfs_node_key_to_cpu(buf, &key, i); 875 ret = btrfs_inc_extent_ref(trans, root, bytenr, 876 btrfs_level_size(root, level - 1), 877 root->root_key.objectid, 878 trans->transid, 879 level - 1, key.objectid); 880 if (ret) { 881 faili = i; 882 goto fail; 883 } 884 } 885 } 886 return 0; 887 fail: 888 WARN_ON(1); 889 #if 0 890 for (i =0; i < faili; i++) { 891 if (level == 0) { 892 u64 disk_bytenr; 893 btrfs_item_key_to_cpu(buf, &key, i); 894 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 895 continue; 896 fi = btrfs_item_ptr(buf, i, 897 struct btrfs_file_extent_item); 898 if (btrfs_file_extent_type(buf, fi) == 899 BTRFS_FILE_EXTENT_INLINE) 900 continue; 901 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 902 if (disk_bytenr == 0) 903 continue; 904 err = btrfs_free_extent(trans, root, disk_bytenr, 905 btrfs_file_extent_disk_num_bytes(buf, 906 fi), 0); 907 BUG_ON(err); 908 } else { 909 bytenr = btrfs_node_blockptr(buf, i); 910 err = btrfs_free_extent(trans, root, bytenr, 911 btrfs_level_size(root, level - 1), 0); 912 BUG_ON(err); 913 } 914 } 915 #endif 916 return ret; 917 } 918 919 static int write_one_cache_group(struct btrfs_trans_handle *trans, 920 struct btrfs_root *root, 921 struct btrfs_path *path, 922 struct btrfs_block_group_cache *cache) 923 { 924 int ret; 925 int pending_ret; 926 struct btrfs_root *extent_root = root->fs_info->extent_root; 927 unsigned long bi; 928 struct extent_buffer *leaf; 929 930 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 931 if (ret < 0) 932 goto fail; 933 BUG_ON(ret); 934 935 leaf = path->nodes[0]; 936 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 937 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 938 btrfs_mark_buffer_dirty(leaf); 939 btrfs_release_path(extent_root, path); 940 fail: 941 finish_current_insert(trans, extent_root); 942 pending_ret = del_pending_extents(trans, extent_root); 943 if (ret) 944 return ret; 945 if (pending_ret) 946 return pending_ret; 947 return 0; 948 949 } 950 951 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 952 struct btrfs_root *root) 953 { 954 struct extent_map_tree *block_group_cache; 955 struct btrfs_block_group_cache *cache; 956 int ret; 957 int err = 0; 958 int werr = 0; 959 struct btrfs_path *path; 960 u64 last = 0; 961 u64 start; 962 u64 end; 963 u64 ptr; 964 965 block_group_cache = &root->fs_info->block_group_cache; 966 path = btrfs_alloc_path(); 967 if (!path) 968 return -ENOMEM; 969 970 while(1) { 971 ret = find_first_extent_bit(block_group_cache, last, 972 &start, &end, BLOCK_GROUP_DIRTY); 973 if (ret) 974 break; 975 976 last = end + 1; 977 ret = get_state_private(block_group_cache, start, &ptr); 978 if (ret) 979 break; 980 981 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr; 982 err = write_one_cache_group(trans, root, 983 path, cache); 984 /* 985 * if we fail to write the cache group, we want 986 * to keep it marked dirty in hopes that a later 987 * write will work 988 */ 989 if (err) { 990 werr = err; 991 continue; 992 } 993 clear_extent_bits(block_group_cache, start, end, 994 BLOCK_GROUP_DIRTY, GFP_NOFS); 995 } 996 btrfs_free_path(path); 997 return werr; 998 } 999 1000 static int update_block_group(struct btrfs_trans_handle *trans, 1001 struct btrfs_root *root, 1002 u64 bytenr, u64 num_bytes, int alloc, 1003 int mark_free, int data) 1004 { 1005 struct btrfs_block_group_cache *cache; 1006 struct btrfs_fs_info *info = root->fs_info; 1007 u64 total = num_bytes; 1008 u64 old_val; 1009 u64 byte_in_group; 1010 u64 start; 1011 u64 end; 1012 1013 while(total) { 1014 cache = btrfs_lookup_block_group(info, bytenr); 1015 if (!cache) { 1016 return -1; 1017 } 1018 byte_in_group = bytenr - cache->key.objectid; 1019 WARN_ON(byte_in_group > cache->key.offset); 1020 start = cache->key.objectid; 1021 end = start + cache->key.offset - 1; 1022 set_extent_bits(&info->block_group_cache, start, end, 1023 BLOCK_GROUP_DIRTY, GFP_NOFS); 1024 1025 old_val = btrfs_block_group_used(&cache->item); 1026 num_bytes = min(total, cache->key.offset - byte_in_group); 1027 if (alloc) { 1028 if (cache->data != data && 1029 old_val < (cache->key.offset >> 1)) { 1030 int bit_to_clear; 1031 int bit_to_set; 1032 cache->data = data; 1033 if (data) { 1034 bit_to_clear = BLOCK_GROUP_METADATA; 1035 bit_to_set = BLOCK_GROUP_DATA; 1036 cache->item.flags &= 1037 ~BTRFS_BLOCK_GROUP_MIXED; 1038 cache->item.flags |= 1039 BTRFS_BLOCK_GROUP_DATA; 1040 } else { 1041 bit_to_clear = BLOCK_GROUP_DATA; 1042 bit_to_set = BLOCK_GROUP_METADATA; 1043 cache->item.flags &= 1044 ~BTRFS_BLOCK_GROUP_MIXED; 1045 cache->item.flags &= 1046 ~BTRFS_BLOCK_GROUP_DATA; 1047 } 1048 clear_extent_bits(&info->block_group_cache, 1049 start, end, bit_to_clear, 1050 GFP_NOFS); 1051 set_extent_bits(&info->block_group_cache, 1052 start, end, bit_to_set, 1053 GFP_NOFS); 1054 } else if (cache->data != data && 1055 cache->data != BTRFS_BLOCK_GROUP_MIXED) { 1056 cache->data = BTRFS_BLOCK_GROUP_MIXED; 1057 set_extent_bits(&info->block_group_cache, 1058 start, end, 1059 BLOCK_GROUP_DATA | 1060 BLOCK_GROUP_METADATA, 1061 GFP_NOFS); 1062 } 1063 old_val += num_bytes; 1064 } else { 1065 old_val -= num_bytes; 1066 if (mark_free) { 1067 set_extent_dirty(&info->free_space_cache, 1068 bytenr, bytenr + num_bytes - 1, 1069 GFP_NOFS); 1070 } 1071 } 1072 btrfs_set_block_group_used(&cache->item, old_val); 1073 total -= num_bytes; 1074 bytenr += num_bytes; 1075 } 1076 return 0; 1077 } 1078 static int update_pinned_extents(struct btrfs_root *root, 1079 u64 bytenr, u64 num, int pin) 1080 { 1081 u64 len; 1082 struct btrfs_block_group_cache *cache; 1083 struct btrfs_fs_info *fs_info = root->fs_info; 1084 1085 if (pin) { 1086 set_extent_dirty(&fs_info->pinned_extents, 1087 bytenr, bytenr + num - 1, GFP_NOFS); 1088 } else { 1089 clear_extent_dirty(&fs_info->pinned_extents, 1090 bytenr, bytenr + num - 1, GFP_NOFS); 1091 } 1092 while (num > 0) { 1093 cache = btrfs_lookup_block_group(fs_info, bytenr); 1094 WARN_ON(!cache); 1095 len = min(num, cache->key.offset - 1096 (bytenr - cache->key.objectid)); 1097 if (pin) { 1098 cache->pinned += len; 1099 fs_info->total_pinned += len; 1100 } else { 1101 cache->pinned -= len; 1102 fs_info->total_pinned -= len; 1103 } 1104 bytenr += len; 1105 num -= len; 1106 } 1107 return 0; 1108 } 1109 1110 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) 1111 { 1112 u64 last = 0; 1113 u64 start; 1114 u64 end; 1115 struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; 1116 int ret; 1117 1118 while(1) { 1119 ret = find_first_extent_bit(pinned_extents, last, 1120 &start, &end, EXTENT_DIRTY); 1121 if (ret) 1122 break; 1123 set_extent_dirty(copy, start, end, GFP_NOFS); 1124 last = end + 1; 1125 } 1126 return 0; 1127 } 1128 1129 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 1130 struct btrfs_root *root, 1131 struct extent_map_tree *unpin) 1132 { 1133 u64 start; 1134 u64 end; 1135 int ret; 1136 struct extent_map_tree *free_space_cache; 1137 free_space_cache = &root->fs_info->free_space_cache; 1138 1139 while(1) { 1140 ret = find_first_extent_bit(unpin, 0, &start, &end, 1141 EXTENT_DIRTY); 1142 if (ret) 1143 break; 1144 update_pinned_extents(root, start, end + 1 - start, 0); 1145 clear_extent_dirty(unpin, start, end, GFP_NOFS); 1146 set_extent_dirty(free_space_cache, start, end, GFP_NOFS); 1147 } 1148 return 0; 1149 } 1150 1151 static int finish_current_insert(struct btrfs_trans_handle *trans, 1152 struct btrfs_root *extent_root) 1153 { 1154 u64 start; 1155 u64 end; 1156 struct btrfs_fs_info *info = extent_root->fs_info; 1157 struct extent_buffer *eb; 1158 struct btrfs_path *path; 1159 struct btrfs_key ins; 1160 struct btrfs_disk_key first; 1161 struct btrfs_extent_item extent_item; 1162 int ret; 1163 int level; 1164 int err = 0; 1165 1166 btrfs_set_stack_extent_refs(&extent_item, 1); 1167 btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); 1168 path = btrfs_alloc_path(); 1169 1170 while(1) { 1171 ret = find_first_extent_bit(&info->extent_ins, 0, &start, 1172 &end, EXTENT_LOCKED); 1173 if (ret) 1174 break; 1175 1176 ins.objectid = start; 1177 ins.offset = end + 1 - start; 1178 err = btrfs_insert_item(trans, extent_root, &ins, 1179 &extent_item, sizeof(extent_item)); 1180 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED, 1181 GFP_NOFS); 1182 eb = read_tree_block(extent_root, ins.objectid, ins.offset); 1183 level = btrfs_header_level(eb); 1184 if (level == 0) { 1185 btrfs_item_key(eb, &first, 0); 1186 } else { 1187 btrfs_node_key(eb, &first, 0); 1188 } 1189 err = btrfs_insert_extent_backref(trans, extent_root, path, 1190 start, extent_root->root_key.objectid, 1191 0, level, 1192 btrfs_disk_key_objectid(&first)); 1193 BUG_ON(err); 1194 free_extent_buffer(eb); 1195 } 1196 btrfs_free_path(path); 1197 return 0; 1198 } 1199 1200 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, 1201 int pending) 1202 { 1203 int err = 0; 1204 struct extent_buffer *buf; 1205 1206 if (!pending) { 1207 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 1208 if (buf) { 1209 if (btrfs_buffer_uptodate(buf)) { 1210 u64 transid = 1211 root->fs_info->running_transaction->transid; 1212 u64 header_transid = 1213 btrfs_header_generation(buf); 1214 if (header_transid == transid) { 1215 free_extent_buffer(buf); 1216 return 1; 1217 } 1218 } 1219 free_extent_buffer(buf); 1220 } 1221 update_pinned_extents(root, bytenr, num_bytes, 1); 1222 } else { 1223 set_extent_bits(&root->fs_info->pending_del, 1224 bytenr, bytenr + num_bytes - 1, 1225 EXTENT_LOCKED, GFP_NOFS); 1226 } 1227 BUG_ON(err < 0); 1228 return 0; 1229 } 1230 1231 /* 1232 * remove an extent from the root, returns 0 on success 1233 */ 1234 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1235 *root, u64 bytenr, u64 num_bytes, 1236 u64 root_objectid, u64 ref_generation, 1237 u64 owner_objectid, u64 owner_offset, int pin, 1238 int mark_free) 1239 { 1240 struct btrfs_path *path; 1241 struct btrfs_key key; 1242 struct btrfs_fs_info *info = root->fs_info; 1243 struct btrfs_root *extent_root = info->extent_root; 1244 struct extent_buffer *leaf; 1245 int ret; 1246 struct btrfs_extent_item *ei; 1247 u32 refs; 1248 1249 key.objectid = bytenr; 1250 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 1251 key.offset = num_bytes; 1252 1253 path = btrfs_alloc_path(); 1254 if (!path) 1255 return -ENOMEM; 1256 1257 path->reada = 0; 1258 ret = lookup_extent_backref(trans, extent_root, path, 1259 bytenr, root_objectid, 1260 ref_generation, 1261 owner_objectid, owner_offset, 1); 1262 if (ret == 0) { 1263 ret = btrfs_del_item(trans, extent_root, path); 1264 } else { 1265 btrfs_print_leaf(extent_root, path->nodes[0]); 1266 WARN_ON(1); 1267 printk("Unable to find ref byte nr %Lu root %Lu " 1268 " gen %Lu owner %Lu offset %Lu\n", bytenr, 1269 root_objectid, ref_generation, owner_objectid, 1270 owner_offset); 1271 } 1272 btrfs_release_path(extent_root, path); 1273 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); 1274 if (ret < 0) 1275 return ret; 1276 BUG_ON(ret); 1277 1278 leaf = path->nodes[0]; 1279 ei = btrfs_item_ptr(leaf, path->slots[0], 1280 struct btrfs_extent_item); 1281 refs = btrfs_extent_refs(leaf, ei); 1282 BUG_ON(refs == 0); 1283 refs -= 1; 1284 btrfs_set_extent_refs(leaf, ei, refs); 1285 btrfs_mark_buffer_dirty(leaf); 1286 1287 if (refs == 0) { 1288 u64 super_used; 1289 u64 root_used; 1290 1291 if (pin) { 1292 ret = pin_down_bytes(root, bytenr, num_bytes, 0); 1293 if (ret > 0) 1294 mark_free = 1; 1295 BUG_ON(ret < 0); 1296 } 1297 1298 /* block accounting for super block */ 1299 super_used = btrfs_super_bytes_used(&info->super_copy); 1300 btrfs_set_super_bytes_used(&info->super_copy, 1301 super_used - num_bytes); 1302 1303 /* block accounting for root item */ 1304 root_used = btrfs_root_used(&root->root_item); 1305 btrfs_set_root_used(&root->root_item, 1306 root_used - num_bytes); 1307 1308 ret = btrfs_del_item(trans, extent_root, path); 1309 if (ret) { 1310 return ret; 1311 } 1312 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 1313 mark_free, 0); 1314 BUG_ON(ret); 1315 } 1316 btrfs_free_path(path); 1317 finish_current_insert(trans, extent_root); 1318 return ret; 1319 } 1320 1321 /* 1322 * find all the blocks marked as pending in the radix tree and remove 1323 * them from the extent map 1324 */ 1325 static int del_pending_extents(struct btrfs_trans_handle *trans, struct 1326 btrfs_root *extent_root) 1327 { 1328 int ret; 1329 int err = 0; 1330 u64 start; 1331 u64 end; 1332 struct extent_map_tree *pending_del; 1333 struct extent_map_tree *pinned_extents; 1334 1335 pending_del = &extent_root->fs_info->pending_del; 1336 pinned_extents = &extent_root->fs_info->pinned_extents; 1337 1338 while(1) { 1339 ret = find_first_extent_bit(pending_del, 0, &start, &end, 1340 EXTENT_LOCKED); 1341 if (ret) 1342 break; 1343 update_pinned_extents(extent_root, start, end + 1 - start, 1); 1344 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED, 1345 GFP_NOFS); 1346 ret = __free_extent(trans, extent_root, 1347 start, end + 1 - start, 1348 extent_root->root_key.objectid, 1349 0, 0, 0, 0, 0); 1350 if (ret) 1351 err = ret; 1352 } 1353 return err; 1354 } 1355 1356 /* 1357 * remove an extent from the root, returns 0 on success 1358 */ 1359 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root 1360 *root, u64 bytenr, u64 num_bytes, 1361 u64 root_objectid, u64 ref_generation, 1362 u64 owner_objectid, u64 owner_offset, int pin) 1363 { 1364 struct btrfs_root *extent_root = root->fs_info->extent_root; 1365 int pending_ret; 1366 int ret; 1367 1368 WARN_ON(num_bytes < root->sectorsize); 1369 if (!root->ref_cows) 1370 ref_generation = 0; 1371 1372 if (root == extent_root) { 1373 pin_down_bytes(root, bytenr, num_bytes, 1); 1374 return 0; 1375 } 1376 ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid, 1377 ref_generation, owner_objectid, owner_offset, 1378 pin, pin == 0); 1379 pending_ret = del_pending_extents(trans, root->fs_info->extent_root); 1380 return ret ? ret : pending_ret; 1381 } 1382 1383 static u64 stripe_align(struct btrfs_root *root, u64 val) 1384 { 1385 u64 mask = ((u64)root->stripesize - 1); 1386 u64 ret = (val + mask) & ~mask; 1387 return ret; 1388 } 1389 1390 /* 1391 * walks the btree of allocated extents and find a hole of a given size. 1392 * The key ins is changed to record the hole: 1393 * ins->objectid == block start 1394 * ins->flags = BTRFS_EXTENT_ITEM_KEY 1395 * ins->offset == number of blocks 1396 * Any available blocks before search_start are skipped. 1397 */ 1398 static int noinline find_free_extent(struct btrfs_trans_handle *trans, 1399 struct btrfs_root *orig_root, 1400 u64 num_bytes, u64 empty_size, 1401 u64 search_start, u64 search_end, 1402 u64 hint_byte, struct btrfs_key *ins, 1403 u64 exclude_start, u64 exclude_nr, 1404 int data) 1405 { 1406 struct btrfs_path *path; 1407 struct btrfs_key key; 1408 u64 hole_size = 0; 1409 u64 aligned; 1410 int ret; 1411 int slot = 0; 1412 u64 last_byte = 0; 1413 u64 orig_search_start = search_start; 1414 int start_found; 1415 struct extent_buffer *l; 1416 struct btrfs_root * root = orig_root->fs_info->extent_root; 1417 struct btrfs_fs_info *info = root->fs_info; 1418 u64 total_needed = num_bytes; 1419 int level; 1420 struct btrfs_block_group_cache *block_group; 1421 int full_scan = 0; 1422 int wrapped = 0; 1423 u64 cached_start; 1424 1425 WARN_ON(num_bytes < root->sectorsize); 1426 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 1427 1428 level = btrfs_header_level(root->node); 1429 1430 if (num_bytes >= 32 * 1024 * 1024 && hint_byte) { 1431 data = BTRFS_BLOCK_GROUP_MIXED; 1432 } 1433 1434 search_end = min(search_end, 1435 btrfs_super_total_bytes(&info->super_copy)); 1436 if (hint_byte) { 1437 block_group = btrfs_lookup_block_group(info, hint_byte); 1438 if (!block_group) 1439 hint_byte = search_start; 1440 block_group = btrfs_find_block_group(root, block_group, 1441 hint_byte, data, 1); 1442 } else { 1443 block_group = btrfs_find_block_group(root, 1444 trans->block_group, 1445 search_start, data, 1); 1446 } 1447 1448 total_needed += empty_size; 1449 path = btrfs_alloc_path(); 1450 check_failed: 1451 if (!block_group) { 1452 block_group = btrfs_lookup_block_group(info, search_start); 1453 if (!block_group) 1454 block_group = btrfs_lookup_block_group(info, 1455 orig_search_start); 1456 } 1457 search_start = find_search_start(root, &block_group, search_start, 1458 total_needed, data); 1459 search_start = stripe_align(root, search_start); 1460 cached_start = search_start; 1461 btrfs_init_path(path); 1462 ins->objectid = search_start; 1463 ins->offset = 0; 1464 start_found = 0; 1465 path->reada = 2; 1466 1467 ret = btrfs_search_slot(trans, root, ins, path, 0, 0); 1468 if (ret < 0) 1469 goto error; 1470 ret = find_previous_extent(root, path); 1471 if (ret < 0) 1472 goto error; 1473 l = path->nodes[0]; 1474 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 1475 while (1) { 1476 l = path->nodes[0]; 1477 slot = path->slots[0]; 1478 if (slot >= btrfs_header_nritems(l)) { 1479 ret = btrfs_next_leaf(root, path); 1480 if (ret == 0) 1481 continue; 1482 if (ret < 0) 1483 goto error; 1484 1485 search_start = max(search_start, 1486 block_group->key.objectid); 1487 if (!start_found) { 1488 aligned = stripe_align(root, search_start); 1489 ins->objectid = aligned; 1490 if (aligned >= search_end) { 1491 ret = -ENOSPC; 1492 goto error; 1493 } 1494 ins->offset = search_end - aligned; 1495 start_found = 1; 1496 goto check_pending; 1497 } 1498 ins->objectid = stripe_align(root, 1499 last_byte > search_start ? 1500 last_byte : search_start); 1501 if (search_end <= ins->objectid) { 1502 ret = -ENOSPC; 1503 goto error; 1504 } 1505 ins->offset = search_end - ins->objectid; 1506 BUG_ON(ins->objectid >= search_end); 1507 goto check_pending; 1508 } 1509 btrfs_item_key_to_cpu(l, &key, slot); 1510 1511 if (key.objectid >= search_start && key.objectid > last_byte && 1512 start_found) { 1513 if (last_byte < search_start) 1514 last_byte = search_start; 1515 aligned = stripe_align(root, last_byte); 1516 hole_size = key.objectid - aligned; 1517 if (key.objectid > aligned && hole_size >= num_bytes) { 1518 ins->objectid = aligned; 1519 ins->offset = hole_size; 1520 goto check_pending; 1521 } 1522 } 1523 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { 1524 if (!start_found && btrfs_key_type(&key) == 1525 BTRFS_BLOCK_GROUP_ITEM_KEY) { 1526 last_byte = key.objectid; 1527 start_found = 1; 1528 } 1529 goto next; 1530 } 1531 1532 1533 start_found = 1; 1534 last_byte = key.objectid + key.offset; 1535 1536 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && 1537 last_byte >= block_group->key.objectid + 1538 block_group->key.offset) { 1539 btrfs_release_path(root, path); 1540 search_start = block_group->key.objectid + 1541 block_group->key.offset; 1542 goto new_group; 1543 } 1544 next: 1545 path->slots[0]++; 1546 cond_resched(); 1547 } 1548 check_pending: 1549 /* we have to make sure we didn't find an extent that has already 1550 * been allocated by the map tree or the original allocation 1551 */ 1552 btrfs_release_path(root, path); 1553 BUG_ON(ins->objectid < search_start); 1554 1555 if (ins->objectid + num_bytes >= search_end) 1556 goto enospc; 1557 if (!full_scan && data != BTRFS_BLOCK_GROUP_MIXED && 1558 ins->objectid + num_bytes > block_group-> 1559 key.objectid + block_group->key.offset) { 1560 search_start = block_group->key.objectid + 1561 block_group->key.offset; 1562 goto new_group; 1563 } 1564 if (test_range_bit(&info->extent_ins, ins->objectid, 1565 ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { 1566 search_start = ins->objectid + num_bytes; 1567 goto new_group; 1568 } 1569 if (test_range_bit(&info->pinned_extents, ins->objectid, 1570 ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { 1571 search_start = ins->objectid + num_bytes; 1572 goto new_group; 1573 } 1574 if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && 1575 ins->objectid < exclude_start + exclude_nr)) { 1576 search_start = exclude_start + exclude_nr; 1577 goto new_group; 1578 } 1579 if (!data) { 1580 block_group = btrfs_lookup_block_group(info, ins->objectid); 1581 if (block_group) 1582 trans->block_group = block_group; 1583 } 1584 ins->offset = num_bytes; 1585 btrfs_free_path(path); 1586 return 0; 1587 1588 new_group: 1589 if (search_start + num_bytes >= search_end) { 1590 enospc: 1591 search_start = orig_search_start; 1592 if (full_scan) { 1593 ret = -ENOSPC; 1594 goto error; 1595 } 1596 if (wrapped) { 1597 if (!full_scan) 1598 total_needed -= empty_size; 1599 full_scan = 1; 1600 data = BTRFS_BLOCK_GROUP_MIXED; 1601 } else 1602 wrapped = 1; 1603 } 1604 block_group = btrfs_lookup_block_group(info, search_start); 1605 cond_resched(); 1606 block_group = btrfs_find_block_group(root, block_group, 1607 search_start, data, 0); 1608 goto check_failed; 1609 1610 error: 1611 btrfs_release_path(root, path); 1612 btrfs_free_path(path); 1613 return ret; 1614 } 1615 /* 1616 * finds a free extent and does all the dirty work required for allocation 1617 * returns the key for the extent through ins, and a tree buffer for 1618 * the first block of the extent through buf. 1619 * 1620 * returns 0 if everything worked, non-zero otherwise. 1621 */ 1622 int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 1623 struct btrfs_root *root, 1624 u64 num_bytes, u64 root_objectid, u64 ref_generation, 1625 u64 owner, u64 owner_offset, 1626 u64 empty_size, u64 hint_byte, 1627 u64 search_end, struct btrfs_key *ins, int data) 1628 { 1629 int ret; 1630 int pending_ret; 1631 u64 super_used; 1632 u64 root_used; 1633 u64 search_start = 0; 1634 u64 new_hint; 1635 struct btrfs_fs_info *info = root->fs_info; 1636 struct btrfs_root *extent_root = info->extent_root; 1637 struct btrfs_extent_item extent_item; 1638 struct btrfs_path *path; 1639 1640 btrfs_set_stack_extent_refs(&extent_item, 1); 1641 1642 new_hint = max(hint_byte, root->fs_info->alloc_start); 1643 if (new_hint < btrfs_super_total_bytes(&info->super_copy)) 1644 hint_byte = new_hint; 1645 1646 WARN_ON(num_bytes < root->sectorsize); 1647 ret = find_free_extent(trans, root, num_bytes, empty_size, 1648 search_start, search_end, hint_byte, ins, 1649 trans->alloc_exclude_start, 1650 trans->alloc_exclude_nr, data); 1651 if (ret) 1652 printk("find free extent returns %d\n", ret); 1653 BUG_ON(ret); 1654 if (ret) 1655 return ret; 1656 1657 /* block accounting for super block */ 1658 super_used = btrfs_super_bytes_used(&info->super_copy); 1659 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); 1660 1661 /* block accounting for root item */ 1662 root_used = btrfs_root_used(&root->root_item); 1663 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 1664 1665 clear_extent_dirty(&root->fs_info->free_space_cache, 1666 ins->objectid, ins->objectid + ins->offset - 1, 1667 GFP_NOFS); 1668 1669 if (root == extent_root) { 1670 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 1671 ins->objectid + ins->offset - 1, 1672 EXTENT_LOCKED, GFP_NOFS); 1673 WARN_ON(data == 1); 1674 goto update_block; 1675 } 1676 1677 WARN_ON(trans->alloc_exclude_nr); 1678 trans->alloc_exclude_start = ins->objectid; 1679 trans->alloc_exclude_nr = ins->offset; 1680 ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, 1681 sizeof(extent_item)); 1682 1683 trans->alloc_exclude_start = 0; 1684 trans->alloc_exclude_nr = 0; 1685 BUG_ON(ret); 1686 1687 path = btrfs_alloc_path(); 1688 BUG_ON(!path); 1689 ret = btrfs_insert_extent_backref(trans, extent_root, path, 1690 ins->objectid, root_objectid, 1691 ref_generation, owner, owner_offset); 1692 1693 BUG_ON(ret); 1694 btrfs_free_path(path); 1695 finish_current_insert(trans, extent_root); 1696 pending_ret = del_pending_extents(trans, extent_root); 1697 1698 if (ret) { 1699 return ret; 1700 } 1701 if (pending_ret) { 1702 return pending_ret; 1703 } 1704 1705 update_block: 1706 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, 1707 data); 1708 BUG_ON(ret); 1709 return 0; 1710 } 1711 1712 /* 1713 * helper function to allocate a block for a given tree 1714 * returns the tree buffer or NULL. 1715 */ 1716 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1717 struct btrfs_root *root, 1718 u32 blocksize, 1719 u64 root_objectid, u64 hint, 1720 u64 empty_size) 1721 { 1722 u64 ref_generation; 1723 1724 if (root->ref_cows) 1725 ref_generation = trans->transid; 1726 else 1727 ref_generation = 0; 1728 1729 1730 return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid, 1731 ref_generation, 0, 0, hint, empty_size); 1732 } 1733 1734 /* 1735 * helper function to allocate a block for a given tree 1736 * returns the tree buffer or NULL. 1737 */ 1738 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 1739 struct btrfs_root *root, 1740 u32 blocksize, 1741 u64 root_objectid, 1742 u64 ref_generation, 1743 u64 first_objectid, 1744 int level, 1745 u64 hint, 1746 u64 empty_size) 1747 { 1748 struct btrfs_key ins; 1749 int ret; 1750 struct extent_buffer *buf; 1751 1752 ret = btrfs_alloc_extent(trans, root, blocksize, 1753 root_objectid, ref_generation, 1754 level, first_objectid, empty_size, hint, 1755 (u64)-1, &ins, 0); 1756 if (ret) { 1757 BUG_ON(ret > 0); 1758 return ERR_PTR(ret); 1759 } 1760 buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); 1761 if (!buf) { 1762 btrfs_free_extent(trans, root, ins.objectid, blocksize, 1763 root->root_key.objectid, ref_generation, 1764 0, 0, 0); 1765 return ERR_PTR(-ENOMEM); 1766 } 1767 btrfs_set_buffer_uptodate(buf); 1768 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 1769 buf->start + buf->len - 1, GFP_NOFS); 1770 set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, 1771 buf->start, buf->start + buf->len - 1, 1772 EXTENT_CSUM, GFP_NOFS); 1773 buf->flags |= EXTENT_CSUM; 1774 btrfs_set_buffer_defrag(buf); 1775 trans->blocks_used++; 1776 return buf; 1777 } 1778 1779 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, 1780 struct btrfs_root *root, 1781 struct extent_buffer *leaf) 1782 { 1783 u64 leaf_owner; 1784 u64 leaf_generation; 1785 struct btrfs_key key; 1786 struct btrfs_file_extent_item *fi; 1787 int i; 1788 int nritems; 1789 int ret; 1790 1791 BUG_ON(!btrfs_is_leaf(leaf)); 1792 nritems = btrfs_header_nritems(leaf); 1793 leaf_owner = btrfs_header_owner(leaf); 1794 leaf_generation = btrfs_header_generation(leaf); 1795 1796 for (i = 0; i < nritems; i++) { 1797 u64 disk_bytenr; 1798 1799 btrfs_item_key_to_cpu(leaf, &key, i); 1800 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1801 continue; 1802 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1803 if (btrfs_file_extent_type(leaf, fi) == 1804 BTRFS_FILE_EXTENT_INLINE) 1805 continue; 1806 /* 1807 * FIXME make sure to insert a trans record that 1808 * repeats the snapshot del on crash 1809 */ 1810 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1811 if (disk_bytenr == 0) 1812 continue; 1813 ret = btrfs_free_extent(trans, root, disk_bytenr, 1814 btrfs_file_extent_disk_num_bytes(leaf, fi), 1815 leaf_owner, leaf_generation, 1816 key.objectid, key.offset, 0); 1817 BUG_ON(ret); 1818 } 1819 return 0; 1820 } 1821 1822 static void noinline reada_walk_down(struct btrfs_root *root, 1823 struct extent_buffer *node) 1824 { 1825 int i; 1826 u32 nritems; 1827 u64 bytenr; 1828 int ret; 1829 u32 refs; 1830 int level; 1831 u32 blocksize; 1832 1833 nritems = btrfs_header_nritems(node); 1834 level = btrfs_header_level(node); 1835 for (i = 0; i < nritems; i++) { 1836 bytenr = btrfs_node_blockptr(node, i); 1837 blocksize = btrfs_level_size(root, level - 1); 1838 ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); 1839 BUG_ON(ret); 1840 if (refs != 1) 1841 continue; 1842 mutex_unlock(&root->fs_info->fs_mutex); 1843 ret = readahead_tree_block(root, bytenr, blocksize); 1844 cond_resched(); 1845 mutex_lock(&root->fs_info->fs_mutex); 1846 if (ret) 1847 break; 1848 } 1849 } 1850 1851 /* 1852 * helper function for drop_snapshot, this walks down the tree dropping ref 1853 * counts as it goes. 1854 */ 1855 static int noinline walk_down_tree(struct btrfs_trans_handle *trans, 1856 struct btrfs_root *root, 1857 struct btrfs_path *path, int *level) 1858 { 1859 u64 root_owner; 1860 u64 root_gen; 1861 u64 bytenr; 1862 struct extent_buffer *next; 1863 struct extent_buffer *cur; 1864 struct extent_buffer *parent; 1865 u32 blocksize; 1866 int ret; 1867 u32 refs; 1868 1869 WARN_ON(*level < 0); 1870 WARN_ON(*level >= BTRFS_MAX_LEVEL); 1871 ret = lookup_extent_ref(trans, root, 1872 path->nodes[*level]->start, 1873 path->nodes[*level]->len, &refs); 1874 BUG_ON(ret); 1875 if (refs > 1) 1876 goto out; 1877 1878 /* 1879 * walk down to the last node level and free all the leaves 1880 */ 1881 while(*level >= 0) { 1882 WARN_ON(*level < 0); 1883 WARN_ON(*level >= BTRFS_MAX_LEVEL); 1884 cur = path->nodes[*level]; 1885 1886 if (*level > 0 && path->slots[*level] == 0) 1887 reada_walk_down(root, cur); 1888 1889 if (btrfs_header_level(cur) != *level) 1890 WARN_ON(1); 1891 1892 if (path->slots[*level] >= 1893 btrfs_header_nritems(cur)) 1894 break; 1895 if (*level == 0) { 1896 ret = drop_leaf_ref(trans, root, cur); 1897 BUG_ON(ret); 1898 break; 1899 } 1900 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 1901 blocksize = btrfs_level_size(root, *level - 1); 1902 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); 1903 BUG_ON(ret); 1904 if (refs != 1) { 1905 parent = path->nodes[*level]; 1906 root_owner = btrfs_header_owner(parent); 1907 root_gen = btrfs_header_generation(parent); 1908 path->slots[*level]++; 1909 ret = btrfs_free_extent(trans, root, bytenr, 1910 blocksize, root_owner, 1911 root_gen, 0, 0, 1); 1912 BUG_ON(ret); 1913 continue; 1914 } 1915 next = btrfs_find_tree_block(root, bytenr, blocksize); 1916 if (!next || !btrfs_buffer_uptodate(next)) { 1917 free_extent_buffer(next); 1918 mutex_unlock(&root->fs_info->fs_mutex); 1919 next = read_tree_block(root, bytenr, blocksize); 1920 mutex_lock(&root->fs_info->fs_mutex); 1921 1922 /* we dropped the lock, check one more time */ 1923 ret = lookup_extent_ref(trans, root, bytenr, 1924 blocksize, &refs); 1925 BUG_ON(ret); 1926 if (refs != 1) { 1927 parent = path->nodes[*level]; 1928 root_owner = btrfs_header_owner(parent); 1929 root_gen = btrfs_header_generation(parent); 1930 1931 path->slots[*level]++; 1932 free_extent_buffer(next); 1933 ret = btrfs_free_extent(trans, root, bytenr, 1934 blocksize, 1935 root_owner, 1936 root_gen, 0, 0, 1); 1937 BUG_ON(ret); 1938 continue; 1939 } 1940 } 1941 WARN_ON(*level <= 0); 1942 if (path->nodes[*level-1]) 1943 free_extent_buffer(path->nodes[*level-1]); 1944 path->nodes[*level-1] = next; 1945 *level = btrfs_header_level(next); 1946 path->slots[*level] = 0; 1947 } 1948 out: 1949 WARN_ON(*level < 0); 1950 WARN_ON(*level >= BTRFS_MAX_LEVEL); 1951 1952 if (path->nodes[*level] == root->node) { 1953 root_owner = root->root_key.objectid; 1954 parent = path->nodes[*level]; 1955 } else { 1956 parent = path->nodes[*level + 1]; 1957 root_owner = btrfs_header_owner(parent); 1958 } 1959 1960 root_gen = btrfs_header_generation(parent); 1961 ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, 1962 path->nodes[*level]->len, 1963 root_owner, root_gen, 0, 0, 1); 1964 free_extent_buffer(path->nodes[*level]); 1965 path->nodes[*level] = NULL; 1966 *level += 1; 1967 BUG_ON(ret); 1968 return 0; 1969 } 1970 1971 /* 1972 * helper for dropping snapshots. This walks back up the tree in the path 1973 * to find the first node higher up where we haven't yet gone through 1974 * all the slots 1975 */ 1976 static int noinline walk_up_tree(struct btrfs_trans_handle *trans, 1977 struct btrfs_root *root, 1978 struct btrfs_path *path, int *level) 1979 { 1980 u64 root_owner; 1981 u64 root_gen; 1982 struct btrfs_root_item *root_item = &root->root_item; 1983 int i; 1984 int slot; 1985 int ret; 1986 1987 for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 1988 slot = path->slots[i]; 1989 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 1990 struct extent_buffer *node; 1991 struct btrfs_disk_key disk_key; 1992 node = path->nodes[i]; 1993 path->slots[i]++; 1994 *level = i; 1995 WARN_ON(*level == 0); 1996 btrfs_node_key(node, &disk_key, path->slots[i]); 1997 memcpy(&root_item->drop_progress, 1998 &disk_key, sizeof(disk_key)); 1999 root_item->drop_level = i; 2000 return 0; 2001 } else { 2002 if (path->nodes[*level] == root->node) { 2003 root_owner = root->root_key.objectid; 2004 root_gen = 2005 btrfs_header_generation(path->nodes[*level]); 2006 } else { 2007 struct extent_buffer *node; 2008 node = path->nodes[*level + 1]; 2009 root_owner = btrfs_header_owner(node); 2010 root_gen = btrfs_header_generation(node); 2011 } 2012 ret = btrfs_free_extent(trans, root, 2013 path->nodes[*level]->start, 2014 path->nodes[*level]->len, 2015 root_owner, root_gen, 0, 0, 1); 2016 BUG_ON(ret); 2017 free_extent_buffer(path->nodes[*level]); 2018 path->nodes[*level] = NULL; 2019 *level = i + 1; 2020 } 2021 } 2022 return 1; 2023 } 2024 2025 /* 2026 * drop the reference count on the tree rooted at 'snap'. This traverses 2027 * the tree freeing any blocks that have a ref count of zero after being 2028 * decremented. 2029 */ 2030 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 2031 *root) 2032 { 2033 int ret = 0; 2034 int wret; 2035 int level; 2036 struct btrfs_path *path; 2037 int i; 2038 int orig_level; 2039 struct btrfs_root_item *root_item = &root->root_item; 2040 2041 path = btrfs_alloc_path(); 2042 BUG_ON(!path); 2043 2044 level = btrfs_header_level(root->node); 2045 orig_level = level; 2046 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 2047 path->nodes[level] = root->node; 2048 extent_buffer_get(root->node); 2049 path->slots[level] = 0; 2050 } else { 2051 struct btrfs_key key; 2052 struct btrfs_disk_key found_key; 2053 struct extent_buffer *node; 2054 2055 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 2056 level = root_item->drop_level; 2057 path->lowest_level = level; 2058 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2059 if (wret < 0) { 2060 ret = wret; 2061 goto out; 2062 } 2063 node = path->nodes[level]; 2064 btrfs_node_key(node, &found_key, path->slots[level]); 2065 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 2066 sizeof(found_key))); 2067 } 2068 while(1) { 2069 wret = walk_down_tree(trans, root, path, &level); 2070 if (wret > 0) 2071 break; 2072 if (wret < 0) 2073 ret = wret; 2074 2075 wret = walk_up_tree(trans, root, path, &level); 2076 if (wret > 0) 2077 break; 2078 if (wret < 0) 2079 ret = wret; 2080 ret = -EAGAIN; 2081 break; 2082 } 2083 for (i = 0; i <= orig_level; i++) { 2084 if (path->nodes[i]) { 2085 free_extent_buffer(path->nodes[i]); 2086 path->nodes[i] = NULL; 2087 } 2088 } 2089 out: 2090 btrfs_free_path(path); 2091 return ret; 2092 } 2093 2094 int btrfs_free_block_groups(struct btrfs_fs_info *info) 2095 { 2096 u64 start; 2097 u64 end; 2098 u64 ptr; 2099 int ret; 2100 while(1) { 2101 ret = find_first_extent_bit(&info->block_group_cache, 0, 2102 &start, &end, (unsigned int)-1); 2103 if (ret) 2104 break; 2105 ret = get_state_private(&info->block_group_cache, start, &ptr); 2106 if (!ret) 2107 kfree((void *)(unsigned long)ptr); 2108 clear_extent_bits(&info->block_group_cache, start, 2109 end, (unsigned int)-1, GFP_NOFS); 2110 } 2111 while(1) { 2112 ret = find_first_extent_bit(&info->free_space_cache, 0, 2113 &start, &end, EXTENT_DIRTY); 2114 if (ret) 2115 break; 2116 clear_extent_dirty(&info->free_space_cache, start, 2117 end, GFP_NOFS); 2118 } 2119 return 0; 2120 } 2121 2122 static int noinline relocate_inode_pages(struct inode *inode, u64 start, 2123 u64 len) 2124 { 2125 u64 page_start; 2126 u64 page_end; 2127 u64 delalloc_start; 2128 u64 existing_delalloc; 2129 unsigned long last_index; 2130 unsigned long i; 2131 struct page *page; 2132 struct btrfs_root *root = BTRFS_I(inode)->root; 2133 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2134 struct file_ra_state *ra; 2135 2136 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2137 2138 mutex_lock(&inode->i_mutex); 2139 i = start >> PAGE_CACHE_SHIFT; 2140 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 2141 2142 file_ra_state_init(ra, inode->i_mapping); 2143 btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index); 2144 kfree(ra); 2145 2146 for (; i <= last_index; i++) { 2147 page = grab_cache_page(inode->i_mapping, i); 2148 if (!page) 2149 goto out_unlock; 2150 if (!PageUptodate(page)) { 2151 btrfs_readpage(NULL, page); 2152 lock_page(page); 2153 if (!PageUptodate(page)) { 2154 unlock_page(page); 2155 page_cache_release(page); 2156 goto out_unlock; 2157 } 2158 } 2159 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2160 page_end = page_start + PAGE_CACHE_SIZE - 1; 2161 2162 lock_extent(em_tree, page_start, page_end, GFP_NOFS); 2163 2164 delalloc_start = page_start; 2165 existing_delalloc = 2166 count_range_bits(&BTRFS_I(inode)->extent_tree, 2167 &delalloc_start, page_end, 2168 PAGE_CACHE_SIZE, EXTENT_DELALLOC); 2169 2170 set_extent_delalloc(em_tree, page_start, 2171 page_end, GFP_NOFS); 2172 2173 spin_lock(&root->fs_info->delalloc_lock); 2174 root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - 2175 existing_delalloc; 2176 spin_unlock(&root->fs_info->delalloc_lock); 2177 2178 unlock_extent(em_tree, page_start, page_end, GFP_NOFS); 2179 set_page_dirty(page); 2180 unlock_page(page); 2181 page_cache_release(page); 2182 } 2183 2184 out_unlock: 2185 mutex_unlock(&inode->i_mutex); 2186 return 0; 2187 } 2188 2189 /* 2190 * note, this releases the path 2191 */ 2192 static int noinline relocate_one_reference(struct btrfs_root *extent_root, 2193 struct btrfs_path *path, 2194 struct btrfs_key *extent_key) 2195 { 2196 struct inode *inode; 2197 struct btrfs_root *found_root; 2198 struct btrfs_key *root_location; 2199 struct btrfs_extent_ref *ref; 2200 u64 ref_root; 2201 u64 ref_gen; 2202 u64 ref_objectid; 2203 u64 ref_offset; 2204 int ret; 2205 2206 ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 2207 struct btrfs_extent_ref); 2208 ref_root = btrfs_ref_root(path->nodes[0], ref); 2209 ref_gen = btrfs_ref_generation(path->nodes[0], ref); 2210 ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); 2211 ref_offset = btrfs_ref_offset(path->nodes[0], ref); 2212 btrfs_release_path(extent_root, path); 2213 2214 root_location = kmalloc(sizeof(*root_location), GFP_NOFS); 2215 root_location->objectid = ref_root; 2216 if (ref_gen == 0) 2217 root_location->offset = 0; 2218 else 2219 root_location->offset = (u64)-1; 2220 root_location->type = BTRFS_ROOT_ITEM_KEY; 2221 2222 found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, 2223 root_location); 2224 BUG_ON(!found_root); 2225 kfree(root_location); 2226 2227 if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 2228 mutex_unlock(&extent_root->fs_info->fs_mutex); 2229 inode = btrfs_iget_locked(extent_root->fs_info->sb, 2230 ref_objectid, found_root); 2231 if (inode->i_state & I_NEW) { 2232 /* the inode and parent dir are two different roots */ 2233 BTRFS_I(inode)->root = found_root; 2234 BTRFS_I(inode)->location.objectid = ref_objectid; 2235 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 2236 BTRFS_I(inode)->location.offset = 0; 2237 btrfs_read_locked_inode(inode); 2238 unlock_new_inode(inode); 2239 2240 } 2241 /* this can happen if the reference is not against 2242 * the latest version of the tree root 2243 */ 2244 if (is_bad_inode(inode)) { 2245 mutex_lock(&extent_root->fs_info->fs_mutex); 2246 goto out; 2247 } 2248 relocate_inode_pages(inode, ref_offset, extent_key->offset); 2249 /* FIXME, data=ordered will help get rid of this */ 2250 filemap_fdatawrite(inode->i_mapping); 2251 iput(inode); 2252 mutex_lock(&extent_root->fs_info->fs_mutex); 2253 } else { 2254 struct btrfs_trans_handle *trans; 2255 struct btrfs_key found_key; 2256 struct extent_buffer *eb; 2257 int level; 2258 int i; 2259 2260 trans = btrfs_start_transaction(found_root, 1); 2261 eb = read_tree_block(found_root, extent_key->objectid, 2262 extent_key->offset); 2263 level = btrfs_header_level(eb); 2264 2265 if (level == 0) 2266 btrfs_item_key_to_cpu(eb, &found_key, 0); 2267 else 2268 btrfs_node_key_to_cpu(eb, &found_key, 0); 2269 2270 free_extent_buffer(eb); 2271 2272 path->lowest_level = level; 2273 path->reada = 2; 2274 ret = btrfs_search_slot(trans, found_root, &found_key, path, 2275 0, 1); 2276 path->lowest_level = 0; 2277 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 2278 if (!path->nodes[i]) 2279 break; 2280 free_extent_buffer(path->nodes[i]); 2281 path->nodes[i] = NULL; 2282 } 2283 btrfs_release_path(found_root, path); 2284 btrfs_end_transaction(trans, found_root); 2285 } 2286 2287 out: 2288 return 0; 2289 } 2290 2291 static int noinline relocate_one_extent(struct btrfs_root *extent_root, 2292 struct btrfs_path *path, 2293 struct btrfs_key *extent_key) 2294 { 2295 struct btrfs_key key; 2296 struct btrfs_key found_key; 2297 struct extent_buffer *leaf; 2298 u32 nritems; 2299 u32 item_size; 2300 int ret = 0; 2301 2302 key.objectid = extent_key->objectid; 2303 key.type = BTRFS_EXTENT_REF_KEY; 2304 key.offset = 0; 2305 2306 while(1) { 2307 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2308 2309 if (ret < 0) 2310 goto out; 2311 2312 ret = 0; 2313 leaf = path->nodes[0]; 2314 nritems = btrfs_header_nritems(leaf); 2315 if (path->slots[0] == nritems) 2316 goto out; 2317 2318 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2319 if (found_key.objectid != extent_key->objectid) 2320 break; 2321 2322 if (found_key.type != BTRFS_EXTENT_REF_KEY) 2323 break; 2324 2325 key.offset = found_key.offset + 1; 2326 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2327 2328 ret = relocate_one_reference(extent_root, path, extent_key); 2329 if (ret) 2330 goto out; 2331 } 2332 ret = 0; 2333 out: 2334 btrfs_release_path(extent_root, path); 2335 return ret; 2336 } 2337 2338 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) 2339 { 2340 struct btrfs_trans_handle *trans; 2341 struct btrfs_root *tree_root = root->fs_info->tree_root; 2342 struct btrfs_path *path; 2343 u64 cur_byte; 2344 u64 total_found; 2345 struct btrfs_fs_info *info = root->fs_info; 2346 struct extent_map_tree *block_group_cache; 2347 struct btrfs_key key; 2348 struct btrfs_key found_key; 2349 struct extent_buffer *leaf; 2350 u32 nritems; 2351 int ret; 2352 int progress = 0; 2353 2354 btrfs_set_super_total_bytes(&info->super_copy, new_size); 2355 clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, 2356 GFP_NOFS); 2357 block_group_cache = &info->block_group_cache; 2358 path = btrfs_alloc_path(); 2359 root = root->fs_info->extent_root; 2360 path->reada = 2; 2361 2362 again: 2363 total_found = 0; 2364 key.objectid = new_size; 2365 key.offset = 0; 2366 key.type = 0; 2367 cur_byte = key.objectid; 2368 2369 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2370 if (ret < 0) 2371 goto out; 2372 2373 ret = find_previous_extent(root, path); 2374 if (ret < 0) 2375 goto out; 2376 if (ret == 0) { 2377 leaf = path->nodes[0]; 2378 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2379 if (found_key.objectid + found_key.offset > new_size) { 2380 cur_byte = found_key.objectid; 2381 key.objectid = cur_byte; 2382 } 2383 } 2384 btrfs_release_path(root, path); 2385 2386 while(1) { 2387 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2388 if (ret < 0) 2389 goto out; 2390 2391 leaf = path->nodes[0]; 2392 nritems = btrfs_header_nritems(leaf); 2393 next: 2394 if (path->slots[0] >= nritems) { 2395 ret = btrfs_next_leaf(root, path); 2396 if (ret < 0) 2397 goto out; 2398 if (ret == 1) { 2399 ret = 0; 2400 break; 2401 } 2402 leaf = path->nodes[0]; 2403 nritems = btrfs_header_nritems(leaf); 2404 } 2405 2406 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2407 2408 if (progress && need_resched()) { 2409 memcpy(&key, &found_key, sizeof(key)); 2410 mutex_unlock(&root->fs_info->fs_mutex); 2411 cond_resched(); 2412 mutex_lock(&root->fs_info->fs_mutex); 2413 btrfs_release_path(root, path); 2414 btrfs_search_slot(NULL, root, &key, path, 0, 0); 2415 progress = 0; 2416 goto next; 2417 } 2418 progress = 1; 2419 2420 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY || 2421 found_key.objectid + found_key.offset <= cur_byte) { 2422 path->slots[0]++; 2423 goto next; 2424 } 2425 2426 total_found++; 2427 cur_byte = found_key.objectid + found_key.offset; 2428 key.objectid = cur_byte; 2429 btrfs_release_path(root, path); 2430 ret = relocate_one_extent(root, path, &found_key); 2431 } 2432 2433 btrfs_release_path(root, path); 2434 2435 if (total_found > 0) { 2436 trans = btrfs_start_transaction(tree_root, 1); 2437 btrfs_commit_transaction(trans, tree_root); 2438 2439 mutex_unlock(&root->fs_info->fs_mutex); 2440 btrfs_clean_old_snapshots(tree_root); 2441 mutex_lock(&root->fs_info->fs_mutex); 2442 2443 trans = btrfs_start_transaction(tree_root, 1); 2444 btrfs_commit_transaction(trans, tree_root); 2445 goto again; 2446 } 2447 2448 trans = btrfs_start_transaction(root, 1); 2449 key.objectid = new_size; 2450 key.offset = 0; 2451 key.type = 0; 2452 while(1) { 2453 u64 ptr; 2454 2455 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2456 if (ret < 0) 2457 goto out; 2458 2459 leaf = path->nodes[0]; 2460 nritems = btrfs_header_nritems(leaf); 2461 bg_next: 2462 if (path->slots[0] >= nritems) { 2463 ret = btrfs_next_leaf(root, path); 2464 if (ret < 0) 2465 break; 2466 if (ret == 1) { 2467 ret = 0; 2468 break; 2469 } 2470 leaf = path->nodes[0]; 2471 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2472 2473 /* 2474 * btrfs_next_leaf doesn't cow buffers, we have to 2475 * do the search again 2476 */ 2477 memcpy(&key, &found_key, sizeof(key)); 2478 btrfs_release_path(root, path); 2479 goto resched_check; 2480 } 2481 2482 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2483 if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { 2484 printk("shrinker found key %Lu %u %Lu\n", 2485 found_key.objectid, found_key.type, 2486 found_key.offset); 2487 path->slots[0]++; 2488 goto bg_next; 2489 } 2490 ret = get_state_private(&info->block_group_cache, 2491 found_key.objectid, &ptr); 2492 if (!ret) 2493 kfree((void *)(unsigned long)ptr); 2494 2495 clear_extent_bits(&info->block_group_cache, found_key.objectid, 2496 found_key.objectid + found_key.offset - 1, 2497 (unsigned int)-1, GFP_NOFS); 2498 2499 key.objectid = found_key.objectid + 1; 2500 btrfs_del_item(trans, root, path); 2501 btrfs_release_path(root, path); 2502 resched_check: 2503 if (need_resched()) { 2504 mutex_unlock(&root->fs_info->fs_mutex); 2505 cond_resched(); 2506 mutex_lock(&root->fs_info->fs_mutex); 2507 } 2508 } 2509 clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, 2510 GFP_NOFS); 2511 btrfs_commit_transaction(trans, root); 2512 out: 2513 btrfs_free_path(path); 2514 return ret; 2515 } 2516 2517 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, 2518 struct btrfs_root *root, u64 new_size) 2519 { 2520 struct btrfs_path *path; 2521 u64 nr = 0; 2522 u64 cur_byte; 2523 u64 old_size; 2524 unsigned long rem; 2525 struct btrfs_block_group_cache *cache; 2526 struct btrfs_block_group_item *item; 2527 struct btrfs_fs_info *info = root->fs_info; 2528 struct extent_map_tree *block_group_cache; 2529 struct btrfs_key key; 2530 struct extent_buffer *leaf; 2531 int ret; 2532 int bit; 2533 2534 old_size = btrfs_super_total_bytes(&info->super_copy); 2535 block_group_cache = &info->block_group_cache; 2536 2537 root = info->extent_root; 2538 2539 cache = btrfs_lookup_block_group(root->fs_info, old_size - 1); 2540 2541 cur_byte = cache->key.objectid + cache->key.offset; 2542 if (cur_byte >= new_size) 2543 goto set_size; 2544 2545 key.offset = BTRFS_BLOCK_GROUP_SIZE; 2546 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 2547 2548 path = btrfs_alloc_path(); 2549 if (!path) 2550 return -ENOMEM; 2551 2552 while(cur_byte < new_size) { 2553 key.objectid = cur_byte; 2554 ret = btrfs_insert_empty_item(trans, root, path, &key, 2555 sizeof(struct btrfs_block_group_item)); 2556 BUG_ON(ret); 2557 leaf = path->nodes[0]; 2558 item = btrfs_item_ptr(leaf, path->slots[0], 2559 struct btrfs_block_group_item); 2560 2561 btrfs_set_disk_block_group_used(leaf, item, 0); 2562 div_long_long_rem(nr, 3, &rem); 2563 if (rem) { 2564 btrfs_set_disk_block_group_flags(leaf, item, 2565 BTRFS_BLOCK_GROUP_DATA); 2566 } else { 2567 btrfs_set_disk_block_group_flags(leaf, item, 0); 2568 } 2569 nr++; 2570 2571 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2572 BUG_ON(!cache); 2573 2574 read_extent_buffer(leaf, &cache->item, (unsigned long)item, 2575 sizeof(cache->item)); 2576 2577 memcpy(&cache->key, &key, sizeof(key)); 2578 cache->cached = 0; 2579 cache->pinned = 0; 2580 cur_byte = key.objectid + key.offset; 2581 btrfs_release_path(root, path); 2582 2583 if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { 2584 bit = BLOCK_GROUP_DATA; 2585 cache->data = BTRFS_BLOCK_GROUP_DATA; 2586 } else { 2587 bit = BLOCK_GROUP_METADATA; 2588 cache->data = 0; 2589 } 2590 2591 /* use EXTENT_LOCKED to prevent merging */ 2592 set_extent_bits(block_group_cache, key.objectid, 2593 key.objectid + key.offset - 1, 2594 bit | EXTENT_LOCKED, GFP_NOFS); 2595 set_state_private(block_group_cache, key.objectid, 2596 (unsigned long)cache); 2597 } 2598 btrfs_free_path(path); 2599 set_size: 2600 btrfs_set_super_total_bytes(&info->super_copy, new_size); 2601 return 0; 2602 } 2603 2604 int btrfs_read_block_groups(struct btrfs_root *root) 2605 { 2606 struct btrfs_path *path; 2607 int ret; 2608 int err = 0; 2609 int bit; 2610 struct btrfs_block_group_cache *cache; 2611 struct btrfs_fs_info *info = root->fs_info; 2612 struct extent_map_tree *block_group_cache; 2613 struct btrfs_key key; 2614 struct btrfs_key found_key; 2615 struct extent_buffer *leaf; 2616 2617 block_group_cache = &info->block_group_cache; 2618 2619 root = info->extent_root; 2620 key.objectid = 0; 2621 key.offset = BTRFS_BLOCK_GROUP_SIZE; 2622 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 2623 2624 path = btrfs_alloc_path(); 2625 if (!path) 2626 return -ENOMEM; 2627 2628 while(1) { 2629 ret = btrfs_search_slot(NULL, info->extent_root, 2630 &key, path, 0, 0); 2631 if (ret != 0) { 2632 err = ret; 2633 break; 2634 } 2635 leaf = path->nodes[0]; 2636 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2637 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2638 if (!cache) { 2639 err = -1; 2640 break; 2641 } 2642 2643 read_extent_buffer(leaf, &cache->item, 2644 btrfs_item_ptr_offset(leaf, path->slots[0]), 2645 sizeof(cache->item)); 2646 memcpy(&cache->key, &found_key, sizeof(found_key)); 2647 cache->cached = 0; 2648 cache->pinned = 0; 2649 key.objectid = found_key.objectid + found_key.offset; 2650 btrfs_release_path(root, path); 2651 2652 if (cache->item.flags & BTRFS_BLOCK_GROUP_MIXED) { 2653 bit = BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA; 2654 cache->data = BTRFS_BLOCK_GROUP_MIXED; 2655 } else if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { 2656 bit = BLOCK_GROUP_DATA; 2657 cache->data = BTRFS_BLOCK_GROUP_DATA; 2658 } else { 2659 bit = BLOCK_GROUP_METADATA; 2660 cache->data = 0; 2661 } 2662 2663 /* use EXTENT_LOCKED to prevent merging */ 2664 set_extent_bits(block_group_cache, found_key.objectid, 2665 found_key.objectid + found_key.offset - 1, 2666 bit | EXTENT_LOCKED, GFP_NOFS); 2667 set_state_private(block_group_cache, found_key.objectid, 2668 (unsigned long)cache); 2669 2670 if (key.objectid >= 2671 btrfs_super_total_bytes(&info->super_copy)) 2672 break; 2673 } 2674 2675 btrfs_free_path(path); 2676 return 0; 2677 } 2678