1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/math64.h> 22 #include "ctree.h" 23 #include "free-space-cache.h" 24 #include "transaction.h" 25 26 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 27 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 28 29 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 30 u64 offset) 31 { 32 BUG_ON(offset < bitmap_start); 33 offset -= bitmap_start; 34 return (unsigned long)(div64_u64(offset, sectorsize)); 35 } 36 37 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 38 { 39 return (unsigned long)(div64_u64(bytes, sectorsize)); 40 } 41 42 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 43 u64 offset) 44 { 45 u64 bitmap_start; 46 u64 bytes_per_bitmap; 47 48 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 49 bitmap_start = offset - block_group->key.objectid; 50 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 51 bitmap_start *= bytes_per_bitmap; 52 bitmap_start += block_group->key.objectid; 53 54 return bitmap_start; 55 } 56 57 static int tree_insert_offset(struct rb_root *root, u64 offset, 58 struct rb_node *node, int bitmap) 59 { 60 struct rb_node **p = &root->rb_node; 61 struct rb_node *parent = NULL; 62 struct btrfs_free_space *info; 63 64 while (*p) { 65 parent = *p; 66 info = rb_entry(parent, struct btrfs_free_space, offset_index); 67 68 if (offset < info->offset) { 69 p = &(*p)->rb_left; 70 } else if (offset > info->offset) { 71 p = &(*p)->rb_right; 72 } else { 73 /* 74 * we could have a bitmap entry and an extent entry 75 * share the same offset. If this is the case, we want 76 * the extent entry to always be found first if we do a 77 * linear search through the tree, since we want to have 78 * the quickest allocation time, and allocating from an 79 * extent is faster than allocating from a bitmap. So 80 * if we're inserting a bitmap and we find an entry at 81 * this offset, we want to go right, or after this entry 82 * logically. If we are inserting an extent and we've 83 * found a bitmap, we want to go left, or before 84 * logically. 85 */ 86 if (bitmap) { 87 WARN_ON(info->bitmap); 88 p = &(*p)->rb_right; 89 } else { 90 WARN_ON(!info->bitmap); 91 p = &(*p)->rb_left; 92 } 93 } 94 } 95 96 rb_link_node(node, parent, p); 97 rb_insert_color(node, root); 98 99 return 0; 100 } 101 102 /* 103 * searches the tree for the given offset. 104 * 105 * fuzzy - If this is set, then we are trying to make an allocation, and we just 106 * want a section that has at least bytes size and comes at or after the given 107 * offset. 108 */ 109 static struct btrfs_free_space * 110 tree_search_offset(struct btrfs_block_group_cache *block_group, 111 u64 offset, int bitmap_only, int fuzzy) 112 { 113 struct rb_node *n = block_group->free_space_offset.rb_node; 114 struct btrfs_free_space *entry, *prev = NULL; 115 116 /* find entry that is closest to the 'offset' */ 117 while (1) { 118 if (!n) { 119 entry = NULL; 120 break; 121 } 122 123 entry = rb_entry(n, struct btrfs_free_space, offset_index); 124 prev = entry; 125 126 if (offset < entry->offset) 127 n = n->rb_left; 128 else if (offset > entry->offset) 129 n = n->rb_right; 130 else 131 break; 132 } 133 134 if (bitmap_only) { 135 if (!entry) 136 return NULL; 137 if (entry->bitmap) 138 return entry; 139 140 /* 141 * bitmap entry and extent entry may share same offset, 142 * in that case, bitmap entry comes after extent entry. 143 */ 144 n = rb_next(n); 145 if (!n) 146 return NULL; 147 entry = rb_entry(n, struct btrfs_free_space, offset_index); 148 if (entry->offset != offset) 149 return NULL; 150 151 WARN_ON(!entry->bitmap); 152 return entry; 153 } else if (entry) { 154 if (entry->bitmap) { 155 /* 156 * if previous extent entry covers the offset, 157 * we should return it instead of the bitmap entry 158 */ 159 n = &entry->offset_index; 160 while (1) { 161 n = rb_prev(n); 162 if (!n) 163 break; 164 prev = rb_entry(n, struct btrfs_free_space, 165 offset_index); 166 if (!prev->bitmap) { 167 if (prev->offset + prev->bytes > offset) 168 entry = prev; 169 break; 170 } 171 } 172 } 173 return entry; 174 } 175 176 if (!prev) 177 return NULL; 178 179 /* find last entry before the 'offset' */ 180 entry = prev; 181 if (entry->offset > offset) { 182 n = rb_prev(&entry->offset_index); 183 if (n) { 184 entry = rb_entry(n, struct btrfs_free_space, 185 offset_index); 186 BUG_ON(entry->offset > offset); 187 } else { 188 if (fuzzy) 189 return entry; 190 else 191 return NULL; 192 } 193 } 194 195 if (entry->bitmap) { 196 n = &entry->offset_index; 197 while (1) { 198 n = rb_prev(n); 199 if (!n) 200 break; 201 prev = rb_entry(n, struct btrfs_free_space, 202 offset_index); 203 if (!prev->bitmap) { 204 if (prev->offset + prev->bytes > offset) 205 return prev; 206 break; 207 } 208 } 209 if (entry->offset + BITS_PER_BITMAP * 210 block_group->sectorsize > offset) 211 return entry; 212 } else if (entry->offset + entry->bytes > offset) 213 return entry; 214 215 if (!fuzzy) 216 return NULL; 217 218 while (1) { 219 if (entry->bitmap) { 220 if (entry->offset + BITS_PER_BITMAP * 221 block_group->sectorsize > offset) 222 break; 223 } else { 224 if (entry->offset + entry->bytes > offset) 225 break; 226 } 227 228 n = rb_next(&entry->offset_index); 229 if (!n) 230 return NULL; 231 entry = rb_entry(n, struct btrfs_free_space, offset_index); 232 } 233 return entry; 234 } 235 236 static void unlink_free_space(struct btrfs_block_group_cache *block_group, 237 struct btrfs_free_space *info) 238 { 239 rb_erase(&info->offset_index, &block_group->free_space_offset); 240 block_group->free_extents--; 241 block_group->free_space -= info->bytes; 242 } 243 244 static int link_free_space(struct btrfs_block_group_cache *block_group, 245 struct btrfs_free_space *info) 246 { 247 int ret = 0; 248 249 BUG_ON(!info->bitmap && !info->bytes); 250 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 251 &info->offset_index, (info->bitmap != NULL)); 252 if (ret) 253 return ret; 254 255 block_group->free_space += info->bytes; 256 block_group->free_extents++; 257 return ret; 258 } 259 260 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 261 { 262 u64 max_bytes; 263 u64 bitmap_bytes; 264 u64 extent_bytes; 265 266 /* 267 * The goal is to keep the total amount of memory used per 1gb of space 268 * at or below 32k, so we need to adjust how much memory we allow to be 269 * used by extent based free space tracking 270 */ 271 max_bytes = MAX_CACHE_BYTES_PER_GIG * 272 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 273 274 /* 275 * we want to account for 1 more bitmap than what we have so we can make 276 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 277 * we add more bitmaps. 278 */ 279 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; 280 281 if (bitmap_bytes >= max_bytes) { 282 block_group->extents_thresh = 0; 283 return; 284 } 285 286 /* 287 * we want the extent entry threshold to always be at most 1/2 the maxw 288 * bytes we can have, or whatever is less than that. 289 */ 290 extent_bytes = max_bytes - bitmap_bytes; 291 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 292 293 block_group->extents_thresh = 294 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 295 } 296 297 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 298 struct btrfs_free_space *info, u64 offset, 299 u64 bytes) 300 { 301 unsigned long start, end; 302 unsigned long i; 303 304 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 305 end = start + bytes_to_bits(bytes, block_group->sectorsize); 306 BUG_ON(end > BITS_PER_BITMAP); 307 308 for (i = start; i < end; i++) 309 clear_bit(i, info->bitmap); 310 311 info->bytes -= bytes; 312 block_group->free_space -= bytes; 313 } 314 315 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 316 struct btrfs_free_space *info, u64 offset, 317 u64 bytes) 318 { 319 unsigned long start, end; 320 unsigned long i; 321 322 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 323 end = start + bytes_to_bits(bytes, block_group->sectorsize); 324 BUG_ON(end > BITS_PER_BITMAP); 325 326 for (i = start; i < end; i++) 327 set_bit(i, info->bitmap); 328 329 info->bytes += bytes; 330 block_group->free_space += bytes; 331 } 332 333 static int search_bitmap(struct btrfs_block_group_cache *block_group, 334 struct btrfs_free_space *bitmap_info, u64 *offset, 335 u64 *bytes) 336 { 337 unsigned long found_bits = 0; 338 unsigned long bits, i; 339 unsigned long next_zero; 340 341 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 342 max_t(u64, *offset, bitmap_info->offset)); 343 bits = bytes_to_bits(*bytes, block_group->sectorsize); 344 345 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 346 i < BITS_PER_BITMAP; 347 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 348 next_zero = find_next_zero_bit(bitmap_info->bitmap, 349 BITS_PER_BITMAP, i); 350 if ((next_zero - i) >= bits) { 351 found_bits = next_zero - i; 352 break; 353 } 354 i = next_zero; 355 } 356 357 if (found_bits) { 358 *offset = (u64)(i * block_group->sectorsize) + 359 bitmap_info->offset; 360 *bytes = (u64)(found_bits) * block_group->sectorsize; 361 return 0; 362 } 363 364 return -1; 365 } 366 367 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 368 *block_group, u64 *offset, 369 u64 *bytes, int debug) 370 { 371 struct btrfs_free_space *entry; 372 struct rb_node *node; 373 int ret; 374 375 if (!block_group->free_space_offset.rb_node) 376 return NULL; 377 378 entry = tree_search_offset(block_group, 379 offset_to_bitmap(block_group, *offset), 380 0, 1); 381 if (!entry) 382 return NULL; 383 384 for (node = &entry->offset_index; node; node = rb_next(node)) { 385 entry = rb_entry(node, struct btrfs_free_space, offset_index); 386 if (entry->bytes < *bytes) 387 continue; 388 389 if (entry->bitmap) { 390 ret = search_bitmap(block_group, entry, offset, bytes); 391 if (!ret) 392 return entry; 393 continue; 394 } 395 396 *offset = entry->offset; 397 *bytes = entry->bytes; 398 return entry; 399 } 400 401 return NULL; 402 } 403 404 static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 405 struct btrfs_free_space *info, u64 offset) 406 { 407 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 408 int max_bitmaps = (int)div64_u64(block_group->key.offset + 409 bytes_per_bg - 1, bytes_per_bg); 410 BUG_ON(block_group->total_bitmaps >= max_bitmaps); 411 412 info->offset = offset_to_bitmap(block_group, offset); 413 info->bytes = 0; 414 link_free_space(block_group, info); 415 block_group->total_bitmaps++; 416 417 recalculate_thresholds(block_group); 418 } 419 420 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 421 struct btrfs_free_space *bitmap_info, 422 u64 *offset, u64 *bytes) 423 { 424 u64 end; 425 u64 search_start, search_bytes; 426 int ret; 427 428 again: 429 end = bitmap_info->offset + 430 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; 431 432 /* 433 * XXX - this can go away after a few releases. 434 * 435 * since the only user of btrfs_remove_free_space is the tree logging 436 * stuff, and the only way to test that is under crash conditions, we 437 * want to have this debug stuff here just in case somethings not 438 * working. Search the bitmap for the space we are trying to use to 439 * make sure its actually there. If its not there then we need to stop 440 * because something has gone wrong. 441 */ 442 search_start = *offset; 443 search_bytes = *bytes; 444 ret = search_bitmap(block_group, bitmap_info, &search_start, 445 &search_bytes); 446 BUG_ON(ret < 0 || search_start != *offset); 447 448 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 449 bitmap_clear_bits(block_group, bitmap_info, *offset, 450 end - *offset + 1); 451 *bytes -= end - *offset + 1; 452 *offset = end + 1; 453 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 454 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 455 *bytes = 0; 456 } 457 458 if (*bytes) { 459 struct rb_node *next = rb_next(&bitmap_info->offset_index); 460 if (!bitmap_info->bytes) { 461 unlink_free_space(block_group, bitmap_info); 462 kfree(bitmap_info->bitmap); 463 kfree(bitmap_info); 464 block_group->total_bitmaps--; 465 recalculate_thresholds(block_group); 466 } 467 468 /* 469 * no entry after this bitmap, but we still have bytes to 470 * remove, so something has gone wrong. 471 */ 472 if (!next) 473 return -EINVAL; 474 475 bitmap_info = rb_entry(next, struct btrfs_free_space, 476 offset_index); 477 478 /* 479 * if the next entry isn't a bitmap we need to return to let the 480 * extent stuff do its work. 481 */ 482 if (!bitmap_info->bitmap) 483 return -EAGAIN; 484 485 /* 486 * Ok the next item is a bitmap, but it may not actually hold 487 * the information for the rest of this free space stuff, so 488 * look for it, and if we don't find it return so we can try 489 * everything over again. 490 */ 491 search_start = *offset; 492 search_bytes = *bytes; 493 ret = search_bitmap(block_group, bitmap_info, &search_start, 494 &search_bytes); 495 if (ret < 0 || search_start != *offset) 496 return -EAGAIN; 497 498 goto again; 499 } else if (!bitmap_info->bytes) { 500 unlink_free_space(block_group, bitmap_info); 501 kfree(bitmap_info->bitmap); 502 kfree(bitmap_info); 503 block_group->total_bitmaps--; 504 recalculate_thresholds(block_group); 505 } 506 507 return 0; 508 } 509 510 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 511 struct btrfs_free_space *info) 512 { 513 struct btrfs_free_space *bitmap_info; 514 int added = 0; 515 u64 bytes, offset, end; 516 int ret; 517 518 /* 519 * If we are below the extents threshold then we can add this as an 520 * extent, and don't have to deal with the bitmap 521 */ 522 if (block_group->free_extents < block_group->extents_thresh && 523 info->bytes > block_group->sectorsize * 4) 524 return 0; 525 526 /* 527 * some block groups are so tiny they can't be enveloped by a bitmap, so 528 * don't even bother to create a bitmap for this 529 */ 530 if (BITS_PER_BITMAP * block_group->sectorsize > 531 block_group->key.offset) 532 return 0; 533 534 bytes = info->bytes; 535 offset = info->offset; 536 537 again: 538 bitmap_info = tree_search_offset(block_group, 539 offset_to_bitmap(block_group, offset), 540 1, 0); 541 if (!bitmap_info) { 542 BUG_ON(added); 543 goto new_bitmap; 544 } 545 546 end = bitmap_info->offset + 547 (u64)(BITS_PER_BITMAP * block_group->sectorsize); 548 549 if (offset >= bitmap_info->offset && offset + bytes > end) { 550 bitmap_set_bits(block_group, bitmap_info, offset, 551 end - offset); 552 bytes -= end - offset; 553 offset = end; 554 added = 0; 555 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 556 bitmap_set_bits(block_group, bitmap_info, offset, bytes); 557 bytes = 0; 558 } else { 559 BUG(); 560 } 561 562 if (!bytes) { 563 ret = 1; 564 goto out; 565 } else 566 goto again; 567 568 new_bitmap: 569 if (info && info->bitmap) { 570 add_new_bitmap(block_group, info, offset); 571 added = 1; 572 info = NULL; 573 goto again; 574 } else { 575 spin_unlock(&block_group->tree_lock); 576 577 /* no pre-allocated info, allocate a new one */ 578 if (!info) { 579 info = kzalloc(sizeof(struct btrfs_free_space), 580 GFP_NOFS); 581 if (!info) { 582 spin_lock(&block_group->tree_lock); 583 ret = -ENOMEM; 584 goto out; 585 } 586 } 587 588 /* allocate the bitmap */ 589 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 590 spin_lock(&block_group->tree_lock); 591 if (!info->bitmap) { 592 ret = -ENOMEM; 593 goto out; 594 } 595 goto again; 596 } 597 598 out: 599 if (info) { 600 if (info->bitmap) 601 kfree(info->bitmap); 602 kfree(info); 603 } 604 605 return ret; 606 } 607 608 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 609 u64 offset, u64 bytes) 610 { 611 struct btrfs_free_space *right_info = NULL; 612 struct btrfs_free_space *left_info = NULL; 613 struct btrfs_free_space *info = NULL; 614 int ret = 0; 615 616 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 617 if (!info) 618 return -ENOMEM; 619 620 info->offset = offset; 621 info->bytes = bytes; 622 623 spin_lock(&block_group->tree_lock); 624 625 /* 626 * first we want to see if there is free space adjacent to the range we 627 * are adding, if there is remove that struct and add a new one to 628 * cover the entire range 629 */ 630 right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 631 if (right_info && rb_prev(&right_info->offset_index)) 632 left_info = rb_entry(rb_prev(&right_info->offset_index), 633 struct btrfs_free_space, offset_index); 634 else 635 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 636 637 /* 638 * If there was no extent directly to the left or right of this new 639 * extent then we know we're going to have to allocate a new extent, so 640 * before we do that see if we need to drop this into a bitmap 641 */ 642 if ((!left_info || left_info->bitmap) && 643 (!right_info || right_info->bitmap)) { 644 ret = insert_into_bitmap(block_group, info); 645 646 if (ret < 0) { 647 goto out; 648 } else if (ret) { 649 ret = 0; 650 goto out; 651 } 652 } 653 654 if (right_info && !right_info->bitmap) { 655 unlink_free_space(block_group, right_info); 656 info->bytes += right_info->bytes; 657 kfree(right_info); 658 } 659 660 if (left_info && !left_info->bitmap && 661 left_info->offset + left_info->bytes == offset) { 662 unlink_free_space(block_group, left_info); 663 info->offset = left_info->offset; 664 info->bytes += left_info->bytes; 665 kfree(left_info); 666 } 667 668 ret = link_free_space(block_group, info); 669 if (ret) 670 kfree(info); 671 out: 672 spin_unlock(&block_group->tree_lock); 673 674 if (ret) { 675 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 676 BUG_ON(ret == -EEXIST); 677 } 678 679 return ret; 680 } 681 682 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 683 u64 offset, u64 bytes) 684 { 685 struct btrfs_free_space *info; 686 struct btrfs_free_space *next_info = NULL; 687 int ret = 0; 688 689 spin_lock(&block_group->tree_lock); 690 691 again: 692 info = tree_search_offset(block_group, offset, 0, 0); 693 if (!info) { 694 /* 695 * oops didn't find an extent that matched the space we wanted 696 * to remove, look for a bitmap instead 697 */ 698 info = tree_search_offset(block_group, 699 offset_to_bitmap(block_group, offset), 700 1, 0); 701 if (!info) { 702 WARN_ON(1); 703 goto out_lock; 704 } 705 } 706 707 if (info->bytes < bytes && rb_next(&info->offset_index)) { 708 u64 end; 709 next_info = rb_entry(rb_next(&info->offset_index), 710 struct btrfs_free_space, 711 offset_index); 712 713 if (next_info->bitmap) 714 end = next_info->offset + BITS_PER_BITMAP * 715 block_group->sectorsize - 1; 716 else 717 end = next_info->offset + next_info->bytes; 718 719 if (next_info->bytes < bytes || 720 next_info->offset > offset || offset > end) { 721 printk(KERN_CRIT "Found free space at %llu, size %llu," 722 " trying to use %llu\n", 723 (unsigned long long)info->offset, 724 (unsigned long long)info->bytes, 725 (unsigned long long)bytes); 726 WARN_ON(1); 727 ret = -EINVAL; 728 goto out_lock; 729 } 730 731 info = next_info; 732 } 733 734 if (info->bytes == bytes) { 735 unlink_free_space(block_group, info); 736 if (info->bitmap) { 737 kfree(info->bitmap); 738 block_group->total_bitmaps--; 739 } 740 kfree(info); 741 goto out_lock; 742 } 743 744 if (!info->bitmap && info->offset == offset) { 745 unlink_free_space(block_group, info); 746 info->offset += bytes; 747 info->bytes -= bytes; 748 link_free_space(block_group, info); 749 goto out_lock; 750 } 751 752 if (!info->bitmap && info->offset <= offset && 753 info->offset + info->bytes >= offset + bytes) { 754 u64 old_start = info->offset; 755 /* 756 * we're freeing space in the middle of the info, 757 * this can happen during tree log replay 758 * 759 * first unlink the old info and then 760 * insert it again after the hole we're creating 761 */ 762 unlink_free_space(block_group, info); 763 if (offset + bytes < info->offset + info->bytes) { 764 u64 old_end = info->offset + info->bytes; 765 766 info->offset = offset + bytes; 767 info->bytes = old_end - info->offset; 768 ret = link_free_space(block_group, info); 769 WARN_ON(ret); 770 if (ret) 771 goto out_lock; 772 } else { 773 /* the hole we're creating ends at the end 774 * of the info struct, just free the info 775 */ 776 kfree(info); 777 } 778 spin_unlock(&block_group->tree_lock); 779 780 /* step two, insert a new info struct to cover 781 * anything before the hole 782 */ 783 ret = btrfs_add_free_space(block_group, old_start, 784 offset - old_start); 785 WARN_ON(ret); 786 goto out; 787 } 788 789 ret = remove_from_bitmap(block_group, info, &offset, &bytes); 790 if (ret == -EAGAIN) 791 goto again; 792 BUG_ON(ret); 793 out_lock: 794 spin_unlock(&block_group->tree_lock); 795 out: 796 return ret; 797 } 798 799 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 800 u64 bytes) 801 { 802 struct btrfs_free_space *info; 803 struct rb_node *n; 804 int count = 0; 805 806 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { 807 info = rb_entry(n, struct btrfs_free_space, offset_index); 808 if (info->bytes >= bytes) 809 count++; 810 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 811 (unsigned long long)info->offset, 812 (unsigned long long)info->bytes, 813 (info->bitmap) ? "yes" : "no"); 814 } 815 printk(KERN_INFO "block group has cluster?: %s\n", 816 list_empty(&block_group->cluster_list) ? "no" : "yes"); 817 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 818 "\n", count); 819 } 820 821 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) 822 { 823 struct btrfs_free_space *info; 824 struct rb_node *n; 825 u64 ret = 0; 826 827 for (n = rb_first(&block_group->free_space_offset); n; 828 n = rb_next(n)) { 829 info = rb_entry(n, struct btrfs_free_space, offset_index); 830 ret += info->bytes; 831 } 832 833 return ret; 834 } 835 836 /* 837 * for a given cluster, put all of its extents back into the free 838 * space cache. If the block group passed doesn't match the block group 839 * pointed to by the cluster, someone else raced in and freed the 840 * cluster already. In that case, we just return without changing anything 841 */ 842 static int 843 __btrfs_return_cluster_to_free_space( 844 struct btrfs_block_group_cache *block_group, 845 struct btrfs_free_cluster *cluster) 846 { 847 struct btrfs_free_space *entry; 848 struct rb_node *node; 849 bool bitmap; 850 851 spin_lock(&cluster->lock); 852 if (cluster->block_group != block_group) 853 goto out; 854 855 bitmap = cluster->points_to_bitmap; 856 cluster->block_group = NULL; 857 cluster->window_start = 0; 858 list_del_init(&cluster->block_group_list); 859 cluster->points_to_bitmap = false; 860 861 if (bitmap) 862 goto out; 863 864 node = rb_first(&cluster->root); 865 while (node) { 866 entry = rb_entry(node, struct btrfs_free_space, offset_index); 867 node = rb_next(&entry->offset_index); 868 rb_erase(&entry->offset_index, &cluster->root); 869 BUG_ON(entry->bitmap); 870 tree_insert_offset(&block_group->free_space_offset, 871 entry->offset, &entry->offset_index, 0); 872 } 873 cluster->root.rb_node = NULL; 874 875 out: 876 spin_unlock(&cluster->lock); 877 btrfs_put_block_group(block_group); 878 return 0; 879 } 880 881 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 882 { 883 struct btrfs_free_space *info; 884 struct rb_node *node; 885 struct btrfs_free_cluster *cluster; 886 struct list_head *head; 887 888 spin_lock(&block_group->tree_lock); 889 while ((head = block_group->cluster_list.next) != 890 &block_group->cluster_list) { 891 cluster = list_entry(head, struct btrfs_free_cluster, 892 block_group_list); 893 894 WARN_ON(cluster->block_group != block_group); 895 __btrfs_return_cluster_to_free_space(block_group, cluster); 896 if (need_resched()) { 897 spin_unlock(&block_group->tree_lock); 898 cond_resched(); 899 spin_lock(&block_group->tree_lock); 900 } 901 } 902 903 while ((node = rb_last(&block_group->free_space_offset)) != NULL) { 904 info = rb_entry(node, struct btrfs_free_space, offset_index); 905 unlink_free_space(block_group, info); 906 if (info->bitmap) 907 kfree(info->bitmap); 908 kfree(info); 909 if (need_resched()) { 910 spin_unlock(&block_group->tree_lock); 911 cond_resched(); 912 spin_lock(&block_group->tree_lock); 913 } 914 } 915 916 spin_unlock(&block_group->tree_lock); 917 } 918 919 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 920 u64 offset, u64 bytes, u64 empty_size) 921 { 922 struct btrfs_free_space *entry = NULL; 923 u64 bytes_search = bytes + empty_size; 924 u64 ret = 0; 925 926 spin_lock(&block_group->tree_lock); 927 entry = find_free_space(block_group, &offset, &bytes_search, 0); 928 if (!entry) 929 goto out; 930 931 ret = offset; 932 if (entry->bitmap) { 933 bitmap_clear_bits(block_group, entry, offset, bytes); 934 if (!entry->bytes) { 935 unlink_free_space(block_group, entry); 936 kfree(entry->bitmap); 937 kfree(entry); 938 block_group->total_bitmaps--; 939 recalculate_thresholds(block_group); 940 } 941 } else { 942 unlink_free_space(block_group, entry); 943 entry->offset += bytes; 944 entry->bytes -= bytes; 945 if (!entry->bytes) 946 kfree(entry); 947 else 948 link_free_space(block_group, entry); 949 } 950 951 out: 952 spin_unlock(&block_group->tree_lock); 953 954 return ret; 955 } 956 957 /* 958 * given a cluster, put all of its extents back into the free space 959 * cache. If a block group is passed, this function will only free 960 * a cluster that belongs to the passed block group. 961 * 962 * Otherwise, it'll get a reference on the block group pointed to by the 963 * cluster and remove the cluster from it. 964 */ 965 int btrfs_return_cluster_to_free_space( 966 struct btrfs_block_group_cache *block_group, 967 struct btrfs_free_cluster *cluster) 968 { 969 int ret; 970 971 /* first, get a safe pointer to the block group */ 972 spin_lock(&cluster->lock); 973 if (!block_group) { 974 block_group = cluster->block_group; 975 if (!block_group) { 976 spin_unlock(&cluster->lock); 977 return 0; 978 } 979 } else if (cluster->block_group != block_group) { 980 /* someone else has already freed it don't redo their work */ 981 spin_unlock(&cluster->lock); 982 return 0; 983 } 984 atomic_inc(&block_group->count); 985 spin_unlock(&cluster->lock); 986 987 /* now return any extents the cluster had on it */ 988 spin_lock(&block_group->tree_lock); 989 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 990 spin_unlock(&block_group->tree_lock); 991 992 /* finally drop our ref */ 993 btrfs_put_block_group(block_group); 994 return ret; 995 } 996 997 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 998 struct btrfs_free_cluster *cluster, 999 u64 bytes, u64 min_start) 1000 { 1001 struct btrfs_free_space *entry; 1002 int err; 1003 u64 search_start = cluster->window_start; 1004 u64 search_bytes = bytes; 1005 u64 ret = 0; 1006 1007 spin_lock(&block_group->tree_lock); 1008 spin_lock(&cluster->lock); 1009 1010 if (!cluster->points_to_bitmap) 1011 goto out; 1012 1013 if (cluster->block_group != block_group) 1014 goto out; 1015 1016 /* 1017 * search_start is the beginning of the bitmap, but at some point it may 1018 * be a good idea to point to the actual start of the free area in the 1019 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only 1020 * to 1 to make sure we get the bitmap entry 1021 */ 1022 entry = tree_search_offset(block_group, 1023 offset_to_bitmap(block_group, search_start), 1024 1, 0); 1025 if (!entry || !entry->bitmap) 1026 goto out; 1027 1028 search_start = min_start; 1029 search_bytes = bytes; 1030 1031 err = search_bitmap(block_group, entry, &search_start, 1032 &search_bytes); 1033 if (err) 1034 goto out; 1035 1036 ret = search_start; 1037 bitmap_clear_bits(block_group, entry, ret, bytes); 1038 out: 1039 spin_unlock(&cluster->lock); 1040 spin_unlock(&block_group->tree_lock); 1041 1042 return ret; 1043 } 1044 1045 /* 1046 * given a cluster, try to allocate 'bytes' from it, returns 0 1047 * if it couldn't find anything suitably large, or a logical disk offset 1048 * if things worked out 1049 */ 1050 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 1051 struct btrfs_free_cluster *cluster, u64 bytes, 1052 u64 min_start) 1053 { 1054 struct btrfs_free_space *entry = NULL; 1055 struct rb_node *node; 1056 u64 ret = 0; 1057 1058 if (cluster->points_to_bitmap) 1059 return btrfs_alloc_from_bitmap(block_group, cluster, bytes, 1060 min_start); 1061 1062 spin_lock(&cluster->lock); 1063 if (bytes > cluster->max_size) 1064 goto out; 1065 1066 if (cluster->block_group != block_group) 1067 goto out; 1068 1069 node = rb_first(&cluster->root); 1070 if (!node) 1071 goto out; 1072 1073 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1074 1075 while(1) { 1076 if (entry->bytes < bytes || entry->offset < min_start) { 1077 struct rb_node *node; 1078 1079 node = rb_next(&entry->offset_index); 1080 if (!node) 1081 break; 1082 entry = rb_entry(node, struct btrfs_free_space, 1083 offset_index); 1084 continue; 1085 } 1086 ret = entry->offset; 1087 1088 entry->offset += bytes; 1089 entry->bytes -= bytes; 1090 1091 if (entry->bytes == 0) { 1092 rb_erase(&entry->offset_index, &cluster->root); 1093 kfree(entry); 1094 } 1095 break; 1096 } 1097 out: 1098 spin_unlock(&cluster->lock); 1099 1100 return ret; 1101 } 1102 1103 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 1104 struct btrfs_free_space *entry, 1105 struct btrfs_free_cluster *cluster, 1106 u64 offset, u64 bytes, u64 min_bytes) 1107 { 1108 unsigned long next_zero; 1109 unsigned long i; 1110 unsigned long search_bits; 1111 unsigned long total_bits; 1112 unsigned long found_bits; 1113 unsigned long start = 0; 1114 unsigned long total_found = 0; 1115 bool found = false; 1116 1117 i = offset_to_bit(entry->offset, block_group->sectorsize, 1118 max_t(u64, offset, entry->offset)); 1119 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 1120 total_bits = bytes_to_bits(bytes, block_group->sectorsize); 1121 1122 again: 1123 found_bits = 0; 1124 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 1125 i < BITS_PER_BITMAP; 1126 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 1127 next_zero = find_next_zero_bit(entry->bitmap, 1128 BITS_PER_BITMAP, i); 1129 if (next_zero - i >= search_bits) { 1130 found_bits = next_zero - i; 1131 break; 1132 } 1133 i = next_zero; 1134 } 1135 1136 if (!found_bits) 1137 return -1; 1138 1139 if (!found) { 1140 start = i; 1141 found = true; 1142 } 1143 1144 total_found += found_bits; 1145 1146 if (cluster->max_size < found_bits * block_group->sectorsize) 1147 cluster->max_size = found_bits * block_group->sectorsize; 1148 1149 if (total_found < total_bits) { 1150 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); 1151 if (i - start > total_bits * 2) { 1152 total_found = 0; 1153 cluster->max_size = 0; 1154 found = false; 1155 } 1156 goto again; 1157 } 1158 1159 cluster->window_start = start * block_group->sectorsize + 1160 entry->offset; 1161 cluster->points_to_bitmap = true; 1162 1163 return 0; 1164 } 1165 1166 /* 1167 * here we try to find a cluster of blocks in a block group. The goal 1168 * is to find at least bytes free and up to empty_size + bytes free. 1169 * We might not find them all in one contiguous area. 1170 * 1171 * returns zero and sets up cluster if things worked out, otherwise 1172 * it returns -enospc 1173 */ 1174 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 1175 struct btrfs_root *root, 1176 struct btrfs_block_group_cache *block_group, 1177 struct btrfs_free_cluster *cluster, 1178 u64 offset, u64 bytes, u64 empty_size) 1179 { 1180 struct btrfs_free_space *entry = NULL; 1181 struct rb_node *node; 1182 struct btrfs_free_space *next; 1183 struct btrfs_free_space *last = NULL; 1184 u64 min_bytes; 1185 u64 window_start; 1186 u64 window_free; 1187 u64 max_extent = 0; 1188 bool found_bitmap = false; 1189 int ret; 1190 1191 /* for metadata, allow allocates with more holes */ 1192 if (btrfs_test_opt(root, SSD_SPREAD)) { 1193 min_bytes = bytes + empty_size; 1194 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 1195 /* 1196 * we want to do larger allocations when we are 1197 * flushing out the delayed refs, it helps prevent 1198 * making more work as we go along. 1199 */ 1200 if (trans->transaction->delayed_refs.flushing) 1201 min_bytes = max(bytes, (bytes + empty_size) >> 1); 1202 else 1203 min_bytes = max(bytes, (bytes + empty_size) >> 4); 1204 } else 1205 min_bytes = max(bytes, (bytes + empty_size) >> 2); 1206 1207 spin_lock(&block_group->tree_lock); 1208 spin_lock(&cluster->lock); 1209 1210 /* someone already found a cluster, hooray */ 1211 if (cluster->block_group) { 1212 ret = 0; 1213 goto out; 1214 } 1215 again: 1216 entry = tree_search_offset(block_group, offset, found_bitmap, 1); 1217 if (!entry) { 1218 ret = -ENOSPC; 1219 goto out; 1220 } 1221 1222 /* 1223 * If found_bitmap is true, we exhausted our search for extent entries, 1224 * and we just want to search all of the bitmaps that we can find, and 1225 * ignore any extent entries we find. 1226 */ 1227 while (entry->bitmap || found_bitmap || 1228 (!entry->bitmap && entry->bytes < min_bytes)) { 1229 struct rb_node *node = rb_next(&entry->offset_index); 1230 1231 if (entry->bitmap && entry->bytes > bytes + empty_size) { 1232 ret = btrfs_bitmap_cluster(block_group, entry, cluster, 1233 offset, bytes + empty_size, 1234 min_bytes); 1235 if (!ret) 1236 goto got_it; 1237 } 1238 1239 if (!node) { 1240 ret = -ENOSPC; 1241 goto out; 1242 } 1243 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1244 } 1245 1246 /* 1247 * We already searched all the extent entries from the passed in offset 1248 * to the end and didn't find enough space for the cluster, and we also 1249 * didn't find any bitmaps that met our criteria, just go ahead and exit 1250 */ 1251 if (found_bitmap) { 1252 ret = -ENOSPC; 1253 goto out; 1254 } 1255 1256 cluster->points_to_bitmap = false; 1257 window_start = entry->offset; 1258 window_free = entry->bytes; 1259 last = entry; 1260 max_extent = entry->bytes; 1261 1262 while (1) { 1263 /* out window is just right, lets fill it */ 1264 if (window_free >= bytes + empty_size) 1265 break; 1266 1267 node = rb_next(&last->offset_index); 1268 if (!node) { 1269 if (found_bitmap) 1270 goto again; 1271 ret = -ENOSPC; 1272 goto out; 1273 } 1274 next = rb_entry(node, struct btrfs_free_space, offset_index); 1275 1276 /* 1277 * we found a bitmap, so if this search doesn't result in a 1278 * cluster, we know to go and search again for the bitmaps and 1279 * start looking for space there 1280 */ 1281 if (next->bitmap) { 1282 if (!found_bitmap) 1283 offset = next->offset; 1284 found_bitmap = true; 1285 last = next; 1286 continue; 1287 } 1288 1289 /* 1290 * we haven't filled the empty size and the window is 1291 * very large. reset and try again 1292 */ 1293 if (next->offset - (last->offset + last->bytes) > 128 * 1024 || 1294 next->offset - window_start > (bytes + empty_size) * 2) { 1295 entry = next; 1296 window_start = entry->offset; 1297 window_free = entry->bytes; 1298 last = entry; 1299 max_extent = 0; 1300 } else { 1301 last = next; 1302 window_free += next->bytes; 1303 if (entry->bytes > max_extent) 1304 max_extent = entry->bytes; 1305 } 1306 } 1307 1308 cluster->window_start = entry->offset; 1309 1310 /* 1311 * now we've found our entries, pull them out of the free space 1312 * cache and put them into the cluster rbtree 1313 * 1314 * The cluster includes an rbtree, but only uses the offset index 1315 * of each free space cache entry. 1316 */ 1317 while (1) { 1318 node = rb_next(&entry->offset_index); 1319 if (entry->bitmap && node) { 1320 entry = rb_entry(node, struct btrfs_free_space, 1321 offset_index); 1322 continue; 1323 } else if (entry->bitmap && !node) { 1324 break; 1325 } 1326 1327 rb_erase(&entry->offset_index, &block_group->free_space_offset); 1328 ret = tree_insert_offset(&cluster->root, entry->offset, 1329 &entry->offset_index, 0); 1330 BUG_ON(ret); 1331 1332 if (!node || entry == last) 1333 break; 1334 1335 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1336 } 1337 1338 cluster->max_size = max_extent; 1339 got_it: 1340 ret = 0; 1341 atomic_inc(&block_group->count); 1342 list_add_tail(&cluster->block_group_list, &block_group->cluster_list); 1343 cluster->block_group = block_group; 1344 out: 1345 spin_unlock(&cluster->lock); 1346 spin_unlock(&block_group->tree_lock); 1347 1348 return ret; 1349 } 1350 1351 /* 1352 * simple code to zero out a cluster 1353 */ 1354 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 1355 { 1356 spin_lock_init(&cluster->lock); 1357 spin_lock_init(&cluster->refill_lock); 1358 cluster->root.rb_node = NULL; 1359 cluster->max_size = 0; 1360 cluster->points_to_bitmap = false; 1361 INIT_LIST_HEAD(&cluster->block_group_list); 1362 cluster->block_group = NULL; 1363 } 1364 1365