1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include "ctree.h" 24 #include "free-space-cache.h" 25 #include "transaction.h" 26 27 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29 30 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 31 u64 offset) 32 { 33 BUG_ON(offset < bitmap_start); 34 offset -= bitmap_start; 35 return (unsigned long)(div64_u64(offset, sectorsize)); 36 } 37 38 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 39 { 40 return (unsigned long)(div64_u64(bytes, sectorsize)); 41 } 42 43 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 44 u64 offset) 45 { 46 u64 bitmap_start; 47 u64 bytes_per_bitmap; 48 49 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 50 bitmap_start = offset - block_group->key.objectid; 51 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 52 bitmap_start *= bytes_per_bitmap; 53 bitmap_start += block_group->key.objectid; 54 55 return bitmap_start; 56 } 57 58 static int tree_insert_offset(struct rb_root *root, u64 offset, 59 struct rb_node *node, int bitmap) 60 { 61 struct rb_node **p = &root->rb_node; 62 struct rb_node *parent = NULL; 63 struct btrfs_free_space *info; 64 65 while (*p) { 66 parent = *p; 67 info = rb_entry(parent, struct btrfs_free_space, offset_index); 68 69 if (offset < info->offset) { 70 p = &(*p)->rb_left; 71 } else if (offset > info->offset) { 72 p = &(*p)->rb_right; 73 } else { 74 /* 75 * we could have a bitmap entry and an extent entry 76 * share the same offset. If this is the case, we want 77 * the extent entry to always be found first if we do a 78 * linear search through the tree, since we want to have 79 * the quickest allocation time, and allocating from an 80 * extent is faster than allocating from a bitmap. So 81 * if we're inserting a bitmap and we find an entry at 82 * this offset, we want to go right, or after this entry 83 * logically. If we are inserting an extent and we've 84 * found a bitmap, we want to go left, or before 85 * logically. 86 */ 87 if (bitmap) { 88 WARN_ON(info->bitmap); 89 p = &(*p)->rb_right; 90 } else { 91 WARN_ON(!info->bitmap); 92 p = &(*p)->rb_left; 93 } 94 } 95 } 96 97 rb_link_node(node, parent, p); 98 rb_insert_color(node, root); 99 100 return 0; 101 } 102 103 /* 104 * searches the tree for the given offset. 105 * 106 * fuzzy - If this is set, then we are trying to make an allocation, and we just 107 * want a section that has at least bytes size and comes at or after the given 108 * offset. 109 */ 110 static struct btrfs_free_space * 111 tree_search_offset(struct btrfs_block_group_cache *block_group, 112 u64 offset, int bitmap_only, int fuzzy) 113 { 114 struct rb_node *n = block_group->free_space_offset.rb_node; 115 struct btrfs_free_space *entry, *prev = NULL; 116 117 /* find entry that is closest to the 'offset' */ 118 while (1) { 119 if (!n) { 120 entry = NULL; 121 break; 122 } 123 124 entry = rb_entry(n, struct btrfs_free_space, offset_index); 125 prev = entry; 126 127 if (offset < entry->offset) 128 n = n->rb_left; 129 else if (offset > entry->offset) 130 n = n->rb_right; 131 else 132 break; 133 } 134 135 if (bitmap_only) { 136 if (!entry) 137 return NULL; 138 if (entry->bitmap) 139 return entry; 140 141 /* 142 * bitmap entry and extent entry may share same offset, 143 * in that case, bitmap entry comes after extent entry. 144 */ 145 n = rb_next(n); 146 if (!n) 147 return NULL; 148 entry = rb_entry(n, struct btrfs_free_space, offset_index); 149 if (entry->offset != offset) 150 return NULL; 151 152 WARN_ON(!entry->bitmap); 153 return entry; 154 } else if (entry) { 155 if (entry->bitmap) { 156 /* 157 * if previous extent entry covers the offset, 158 * we should return it instead of the bitmap entry 159 */ 160 n = &entry->offset_index; 161 while (1) { 162 n = rb_prev(n); 163 if (!n) 164 break; 165 prev = rb_entry(n, struct btrfs_free_space, 166 offset_index); 167 if (!prev->bitmap) { 168 if (prev->offset + prev->bytes > offset) 169 entry = prev; 170 break; 171 } 172 } 173 } 174 return entry; 175 } 176 177 if (!prev) 178 return NULL; 179 180 /* find last entry before the 'offset' */ 181 entry = prev; 182 if (entry->offset > offset) { 183 n = rb_prev(&entry->offset_index); 184 if (n) { 185 entry = rb_entry(n, struct btrfs_free_space, 186 offset_index); 187 BUG_ON(entry->offset > offset); 188 } else { 189 if (fuzzy) 190 return entry; 191 else 192 return NULL; 193 } 194 } 195 196 if (entry->bitmap) { 197 n = &entry->offset_index; 198 while (1) { 199 n = rb_prev(n); 200 if (!n) 201 break; 202 prev = rb_entry(n, struct btrfs_free_space, 203 offset_index); 204 if (!prev->bitmap) { 205 if (prev->offset + prev->bytes > offset) 206 return prev; 207 break; 208 } 209 } 210 if (entry->offset + BITS_PER_BITMAP * 211 block_group->sectorsize > offset) 212 return entry; 213 } else if (entry->offset + entry->bytes > offset) 214 return entry; 215 216 if (!fuzzy) 217 return NULL; 218 219 while (1) { 220 if (entry->bitmap) { 221 if (entry->offset + BITS_PER_BITMAP * 222 block_group->sectorsize > offset) 223 break; 224 } else { 225 if (entry->offset + entry->bytes > offset) 226 break; 227 } 228 229 n = rb_next(&entry->offset_index); 230 if (!n) 231 return NULL; 232 entry = rb_entry(n, struct btrfs_free_space, offset_index); 233 } 234 return entry; 235 } 236 237 static void unlink_free_space(struct btrfs_block_group_cache *block_group, 238 struct btrfs_free_space *info) 239 { 240 rb_erase(&info->offset_index, &block_group->free_space_offset); 241 block_group->free_extents--; 242 block_group->free_space -= info->bytes; 243 } 244 245 static int link_free_space(struct btrfs_block_group_cache *block_group, 246 struct btrfs_free_space *info) 247 { 248 int ret = 0; 249 250 BUG_ON(!info->bitmap && !info->bytes); 251 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 252 &info->offset_index, (info->bitmap != NULL)); 253 if (ret) 254 return ret; 255 256 block_group->free_space += info->bytes; 257 block_group->free_extents++; 258 return ret; 259 } 260 261 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 262 { 263 u64 max_bytes; 264 u64 bitmap_bytes; 265 u64 extent_bytes; 266 267 /* 268 * The goal is to keep the total amount of memory used per 1gb of space 269 * at or below 32k, so we need to adjust how much memory we allow to be 270 * used by extent based free space tracking 271 */ 272 max_bytes = MAX_CACHE_BYTES_PER_GIG * 273 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 274 275 /* 276 * we want to account for 1 more bitmap than what we have so we can make 277 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 278 * we add more bitmaps. 279 */ 280 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; 281 282 if (bitmap_bytes >= max_bytes) { 283 block_group->extents_thresh = 0; 284 return; 285 } 286 287 /* 288 * we want the extent entry threshold to always be at most 1/2 the maxw 289 * bytes we can have, or whatever is less than that. 290 */ 291 extent_bytes = max_bytes - bitmap_bytes; 292 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 293 294 block_group->extents_thresh = 295 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 296 } 297 298 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 299 struct btrfs_free_space *info, u64 offset, 300 u64 bytes) 301 { 302 unsigned long start, end; 303 unsigned long i; 304 305 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 306 end = start + bytes_to_bits(bytes, block_group->sectorsize); 307 BUG_ON(end > BITS_PER_BITMAP); 308 309 for (i = start; i < end; i++) 310 clear_bit(i, info->bitmap); 311 312 info->bytes -= bytes; 313 block_group->free_space -= bytes; 314 } 315 316 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 317 struct btrfs_free_space *info, u64 offset, 318 u64 bytes) 319 { 320 unsigned long start, end; 321 unsigned long i; 322 323 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 324 end = start + bytes_to_bits(bytes, block_group->sectorsize); 325 BUG_ON(end > BITS_PER_BITMAP); 326 327 for (i = start; i < end; i++) 328 set_bit(i, info->bitmap); 329 330 info->bytes += bytes; 331 block_group->free_space += bytes; 332 } 333 334 static int search_bitmap(struct btrfs_block_group_cache *block_group, 335 struct btrfs_free_space *bitmap_info, u64 *offset, 336 u64 *bytes) 337 { 338 unsigned long found_bits = 0; 339 unsigned long bits, i; 340 unsigned long next_zero; 341 342 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 343 max_t(u64, *offset, bitmap_info->offset)); 344 bits = bytes_to_bits(*bytes, block_group->sectorsize); 345 346 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 347 i < BITS_PER_BITMAP; 348 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 349 next_zero = find_next_zero_bit(bitmap_info->bitmap, 350 BITS_PER_BITMAP, i); 351 if ((next_zero - i) >= bits) { 352 found_bits = next_zero - i; 353 break; 354 } 355 i = next_zero; 356 } 357 358 if (found_bits) { 359 *offset = (u64)(i * block_group->sectorsize) + 360 bitmap_info->offset; 361 *bytes = (u64)(found_bits) * block_group->sectorsize; 362 return 0; 363 } 364 365 return -1; 366 } 367 368 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 369 *block_group, u64 *offset, 370 u64 *bytes, int debug) 371 { 372 struct btrfs_free_space *entry; 373 struct rb_node *node; 374 int ret; 375 376 if (!block_group->free_space_offset.rb_node) 377 return NULL; 378 379 entry = tree_search_offset(block_group, 380 offset_to_bitmap(block_group, *offset), 381 0, 1); 382 if (!entry) 383 return NULL; 384 385 for (node = &entry->offset_index; node; node = rb_next(node)) { 386 entry = rb_entry(node, struct btrfs_free_space, offset_index); 387 if (entry->bytes < *bytes) 388 continue; 389 390 if (entry->bitmap) { 391 ret = search_bitmap(block_group, entry, offset, bytes); 392 if (!ret) 393 return entry; 394 continue; 395 } 396 397 *offset = entry->offset; 398 *bytes = entry->bytes; 399 return entry; 400 } 401 402 return NULL; 403 } 404 405 static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 406 struct btrfs_free_space *info, u64 offset) 407 { 408 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 409 int max_bitmaps = (int)div64_u64(block_group->key.offset + 410 bytes_per_bg - 1, bytes_per_bg); 411 BUG_ON(block_group->total_bitmaps >= max_bitmaps); 412 413 info->offset = offset_to_bitmap(block_group, offset); 414 info->bytes = 0; 415 link_free_space(block_group, info); 416 block_group->total_bitmaps++; 417 418 recalculate_thresholds(block_group); 419 } 420 421 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 422 struct btrfs_free_space *bitmap_info, 423 u64 *offset, u64 *bytes) 424 { 425 u64 end; 426 u64 search_start, search_bytes; 427 int ret; 428 429 again: 430 end = bitmap_info->offset + 431 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; 432 433 /* 434 * XXX - this can go away after a few releases. 435 * 436 * since the only user of btrfs_remove_free_space is the tree logging 437 * stuff, and the only way to test that is under crash conditions, we 438 * want to have this debug stuff here just in case somethings not 439 * working. Search the bitmap for the space we are trying to use to 440 * make sure its actually there. If its not there then we need to stop 441 * because something has gone wrong. 442 */ 443 search_start = *offset; 444 search_bytes = *bytes; 445 ret = search_bitmap(block_group, bitmap_info, &search_start, 446 &search_bytes); 447 BUG_ON(ret < 0 || search_start != *offset); 448 449 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 450 bitmap_clear_bits(block_group, bitmap_info, *offset, 451 end - *offset + 1); 452 *bytes -= end - *offset + 1; 453 *offset = end + 1; 454 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 455 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 456 *bytes = 0; 457 } 458 459 if (*bytes) { 460 struct rb_node *next = rb_next(&bitmap_info->offset_index); 461 if (!bitmap_info->bytes) { 462 unlink_free_space(block_group, bitmap_info); 463 kfree(bitmap_info->bitmap); 464 kfree(bitmap_info); 465 block_group->total_bitmaps--; 466 recalculate_thresholds(block_group); 467 } 468 469 /* 470 * no entry after this bitmap, but we still have bytes to 471 * remove, so something has gone wrong. 472 */ 473 if (!next) 474 return -EINVAL; 475 476 bitmap_info = rb_entry(next, struct btrfs_free_space, 477 offset_index); 478 479 /* 480 * if the next entry isn't a bitmap we need to return to let the 481 * extent stuff do its work. 482 */ 483 if (!bitmap_info->bitmap) 484 return -EAGAIN; 485 486 /* 487 * Ok the next item is a bitmap, but it may not actually hold 488 * the information for the rest of this free space stuff, so 489 * look for it, and if we don't find it return so we can try 490 * everything over again. 491 */ 492 search_start = *offset; 493 search_bytes = *bytes; 494 ret = search_bitmap(block_group, bitmap_info, &search_start, 495 &search_bytes); 496 if (ret < 0 || search_start != *offset) 497 return -EAGAIN; 498 499 goto again; 500 } else if (!bitmap_info->bytes) { 501 unlink_free_space(block_group, bitmap_info); 502 kfree(bitmap_info->bitmap); 503 kfree(bitmap_info); 504 block_group->total_bitmaps--; 505 recalculate_thresholds(block_group); 506 } 507 508 return 0; 509 } 510 511 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 512 struct btrfs_free_space *info) 513 { 514 struct btrfs_free_space *bitmap_info; 515 int added = 0; 516 u64 bytes, offset, end; 517 int ret; 518 519 /* 520 * If we are below the extents threshold then we can add this as an 521 * extent, and don't have to deal with the bitmap 522 */ 523 if (block_group->free_extents < block_group->extents_thresh && 524 info->bytes > block_group->sectorsize * 4) 525 return 0; 526 527 /* 528 * some block groups are so tiny they can't be enveloped by a bitmap, so 529 * don't even bother to create a bitmap for this 530 */ 531 if (BITS_PER_BITMAP * block_group->sectorsize > 532 block_group->key.offset) 533 return 0; 534 535 bytes = info->bytes; 536 offset = info->offset; 537 538 again: 539 bitmap_info = tree_search_offset(block_group, 540 offset_to_bitmap(block_group, offset), 541 1, 0); 542 if (!bitmap_info) { 543 BUG_ON(added); 544 goto new_bitmap; 545 } 546 547 end = bitmap_info->offset + 548 (u64)(BITS_PER_BITMAP * block_group->sectorsize); 549 550 if (offset >= bitmap_info->offset && offset + bytes > end) { 551 bitmap_set_bits(block_group, bitmap_info, offset, 552 end - offset); 553 bytes -= end - offset; 554 offset = end; 555 added = 0; 556 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 557 bitmap_set_bits(block_group, bitmap_info, offset, bytes); 558 bytes = 0; 559 } else { 560 BUG(); 561 } 562 563 if (!bytes) { 564 ret = 1; 565 goto out; 566 } else 567 goto again; 568 569 new_bitmap: 570 if (info && info->bitmap) { 571 add_new_bitmap(block_group, info, offset); 572 added = 1; 573 info = NULL; 574 goto again; 575 } else { 576 spin_unlock(&block_group->tree_lock); 577 578 /* no pre-allocated info, allocate a new one */ 579 if (!info) { 580 info = kzalloc(sizeof(struct btrfs_free_space), 581 GFP_NOFS); 582 if (!info) { 583 spin_lock(&block_group->tree_lock); 584 ret = -ENOMEM; 585 goto out; 586 } 587 } 588 589 /* allocate the bitmap */ 590 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 591 spin_lock(&block_group->tree_lock); 592 if (!info->bitmap) { 593 ret = -ENOMEM; 594 goto out; 595 } 596 goto again; 597 } 598 599 out: 600 if (info) { 601 if (info->bitmap) 602 kfree(info->bitmap); 603 kfree(info); 604 } 605 606 return ret; 607 } 608 609 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 610 u64 offset, u64 bytes) 611 { 612 struct btrfs_free_space *right_info = NULL; 613 struct btrfs_free_space *left_info = NULL; 614 struct btrfs_free_space *info = NULL; 615 int ret = 0; 616 617 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 618 if (!info) 619 return -ENOMEM; 620 621 info->offset = offset; 622 info->bytes = bytes; 623 624 spin_lock(&block_group->tree_lock); 625 626 /* 627 * first we want to see if there is free space adjacent to the range we 628 * are adding, if there is remove that struct and add a new one to 629 * cover the entire range 630 */ 631 right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 632 if (right_info && rb_prev(&right_info->offset_index)) 633 left_info = rb_entry(rb_prev(&right_info->offset_index), 634 struct btrfs_free_space, offset_index); 635 else 636 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 637 638 /* 639 * If there was no extent directly to the left or right of this new 640 * extent then we know we're going to have to allocate a new extent, so 641 * before we do that see if we need to drop this into a bitmap 642 */ 643 if ((!left_info || left_info->bitmap) && 644 (!right_info || right_info->bitmap)) { 645 ret = insert_into_bitmap(block_group, info); 646 647 if (ret < 0) { 648 goto out; 649 } else if (ret) { 650 ret = 0; 651 goto out; 652 } 653 } 654 655 if (right_info && !right_info->bitmap) { 656 unlink_free_space(block_group, right_info); 657 info->bytes += right_info->bytes; 658 kfree(right_info); 659 } 660 661 if (left_info && !left_info->bitmap && 662 left_info->offset + left_info->bytes == offset) { 663 unlink_free_space(block_group, left_info); 664 info->offset = left_info->offset; 665 info->bytes += left_info->bytes; 666 kfree(left_info); 667 } 668 669 ret = link_free_space(block_group, info); 670 if (ret) 671 kfree(info); 672 out: 673 spin_unlock(&block_group->tree_lock); 674 675 if (ret) { 676 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 677 BUG_ON(ret == -EEXIST); 678 } 679 680 return ret; 681 } 682 683 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 684 u64 offset, u64 bytes) 685 { 686 struct btrfs_free_space *info; 687 struct btrfs_free_space *next_info = NULL; 688 int ret = 0; 689 690 spin_lock(&block_group->tree_lock); 691 692 again: 693 info = tree_search_offset(block_group, offset, 0, 0); 694 if (!info) { 695 /* 696 * oops didn't find an extent that matched the space we wanted 697 * to remove, look for a bitmap instead 698 */ 699 info = tree_search_offset(block_group, 700 offset_to_bitmap(block_group, offset), 701 1, 0); 702 if (!info) { 703 WARN_ON(1); 704 goto out_lock; 705 } 706 } 707 708 if (info->bytes < bytes && rb_next(&info->offset_index)) { 709 u64 end; 710 next_info = rb_entry(rb_next(&info->offset_index), 711 struct btrfs_free_space, 712 offset_index); 713 714 if (next_info->bitmap) 715 end = next_info->offset + BITS_PER_BITMAP * 716 block_group->sectorsize - 1; 717 else 718 end = next_info->offset + next_info->bytes; 719 720 if (next_info->bytes < bytes || 721 next_info->offset > offset || offset > end) { 722 printk(KERN_CRIT "Found free space at %llu, size %llu," 723 " trying to use %llu\n", 724 (unsigned long long)info->offset, 725 (unsigned long long)info->bytes, 726 (unsigned long long)bytes); 727 WARN_ON(1); 728 ret = -EINVAL; 729 goto out_lock; 730 } 731 732 info = next_info; 733 } 734 735 if (info->bytes == bytes) { 736 unlink_free_space(block_group, info); 737 if (info->bitmap) { 738 kfree(info->bitmap); 739 block_group->total_bitmaps--; 740 } 741 kfree(info); 742 goto out_lock; 743 } 744 745 if (!info->bitmap && info->offset == offset) { 746 unlink_free_space(block_group, info); 747 info->offset += bytes; 748 info->bytes -= bytes; 749 link_free_space(block_group, info); 750 goto out_lock; 751 } 752 753 if (!info->bitmap && info->offset <= offset && 754 info->offset + info->bytes >= offset + bytes) { 755 u64 old_start = info->offset; 756 /* 757 * we're freeing space in the middle of the info, 758 * this can happen during tree log replay 759 * 760 * first unlink the old info and then 761 * insert it again after the hole we're creating 762 */ 763 unlink_free_space(block_group, info); 764 if (offset + bytes < info->offset + info->bytes) { 765 u64 old_end = info->offset + info->bytes; 766 767 info->offset = offset + bytes; 768 info->bytes = old_end - info->offset; 769 ret = link_free_space(block_group, info); 770 WARN_ON(ret); 771 if (ret) 772 goto out_lock; 773 } else { 774 /* the hole we're creating ends at the end 775 * of the info struct, just free the info 776 */ 777 kfree(info); 778 } 779 spin_unlock(&block_group->tree_lock); 780 781 /* step two, insert a new info struct to cover 782 * anything before the hole 783 */ 784 ret = btrfs_add_free_space(block_group, old_start, 785 offset - old_start); 786 WARN_ON(ret); 787 goto out; 788 } 789 790 ret = remove_from_bitmap(block_group, info, &offset, &bytes); 791 if (ret == -EAGAIN) 792 goto again; 793 BUG_ON(ret); 794 out_lock: 795 spin_unlock(&block_group->tree_lock); 796 out: 797 return ret; 798 } 799 800 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 801 u64 bytes) 802 { 803 struct btrfs_free_space *info; 804 struct rb_node *n; 805 int count = 0; 806 807 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { 808 info = rb_entry(n, struct btrfs_free_space, offset_index); 809 if (info->bytes >= bytes) 810 count++; 811 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 812 (unsigned long long)info->offset, 813 (unsigned long long)info->bytes, 814 (info->bitmap) ? "yes" : "no"); 815 } 816 printk(KERN_INFO "block group has cluster?: %s\n", 817 list_empty(&block_group->cluster_list) ? "no" : "yes"); 818 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 819 "\n", count); 820 } 821 822 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) 823 { 824 struct btrfs_free_space *info; 825 struct rb_node *n; 826 u64 ret = 0; 827 828 for (n = rb_first(&block_group->free_space_offset); n; 829 n = rb_next(n)) { 830 info = rb_entry(n, struct btrfs_free_space, offset_index); 831 ret += info->bytes; 832 } 833 834 return ret; 835 } 836 837 /* 838 * for a given cluster, put all of its extents back into the free 839 * space cache. If the block group passed doesn't match the block group 840 * pointed to by the cluster, someone else raced in and freed the 841 * cluster already. In that case, we just return without changing anything 842 */ 843 static int 844 __btrfs_return_cluster_to_free_space( 845 struct btrfs_block_group_cache *block_group, 846 struct btrfs_free_cluster *cluster) 847 { 848 struct btrfs_free_space *entry; 849 struct rb_node *node; 850 bool bitmap; 851 852 spin_lock(&cluster->lock); 853 if (cluster->block_group != block_group) 854 goto out; 855 856 bitmap = cluster->points_to_bitmap; 857 cluster->block_group = NULL; 858 cluster->window_start = 0; 859 list_del_init(&cluster->block_group_list); 860 cluster->points_to_bitmap = false; 861 862 if (bitmap) 863 goto out; 864 865 node = rb_first(&cluster->root); 866 while (node) { 867 entry = rb_entry(node, struct btrfs_free_space, offset_index); 868 node = rb_next(&entry->offset_index); 869 rb_erase(&entry->offset_index, &cluster->root); 870 BUG_ON(entry->bitmap); 871 tree_insert_offset(&block_group->free_space_offset, 872 entry->offset, &entry->offset_index, 0); 873 } 874 cluster->root = RB_ROOT; 875 876 out: 877 spin_unlock(&cluster->lock); 878 btrfs_put_block_group(block_group); 879 return 0; 880 } 881 882 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 883 { 884 struct btrfs_free_space *info; 885 struct rb_node *node; 886 struct btrfs_free_cluster *cluster; 887 struct list_head *head; 888 889 spin_lock(&block_group->tree_lock); 890 while ((head = block_group->cluster_list.next) != 891 &block_group->cluster_list) { 892 cluster = list_entry(head, struct btrfs_free_cluster, 893 block_group_list); 894 895 WARN_ON(cluster->block_group != block_group); 896 __btrfs_return_cluster_to_free_space(block_group, cluster); 897 if (need_resched()) { 898 spin_unlock(&block_group->tree_lock); 899 cond_resched(); 900 spin_lock(&block_group->tree_lock); 901 } 902 } 903 904 while ((node = rb_last(&block_group->free_space_offset)) != NULL) { 905 info = rb_entry(node, struct btrfs_free_space, offset_index); 906 unlink_free_space(block_group, info); 907 if (info->bitmap) 908 kfree(info->bitmap); 909 kfree(info); 910 if (need_resched()) { 911 spin_unlock(&block_group->tree_lock); 912 cond_resched(); 913 spin_lock(&block_group->tree_lock); 914 } 915 } 916 917 spin_unlock(&block_group->tree_lock); 918 } 919 920 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 921 u64 offset, u64 bytes, u64 empty_size) 922 { 923 struct btrfs_free_space *entry = NULL; 924 u64 bytes_search = bytes + empty_size; 925 u64 ret = 0; 926 927 spin_lock(&block_group->tree_lock); 928 entry = find_free_space(block_group, &offset, &bytes_search, 0); 929 if (!entry) 930 goto out; 931 932 ret = offset; 933 if (entry->bitmap) { 934 bitmap_clear_bits(block_group, entry, offset, bytes); 935 if (!entry->bytes) { 936 unlink_free_space(block_group, entry); 937 kfree(entry->bitmap); 938 kfree(entry); 939 block_group->total_bitmaps--; 940 recalculate_thresholds(block_group); 941 } 942 } else { 943 unlink_free_space(block_group, entry); 944 entry->offset += bytes; 945 entry->bytes -= bytes; 946 if (!entry->bytes) 947 kfree(entry); 948 else 949 link_free_space(block_group, entry); 950 } 951 952 out: 953 spin_unlock(&block_group->tree_lock); 954 955 return ret; 956 } 957 958 /* 959 * given a cluster, put all of its extents back into the free space 960 * cache. If a block group is passed, this function will only free 961 * a cluster that belongs to the passed block group. 962 * 963 * Otherwise, it'll get a reference on the block group pointed to by the 964 * cluster and remove the cluster from it. 965 */ 966 int btrfs_return_cluster_to_free_space( 967 struct btrfs_block_group_cache *block_group, 968 struct btrfs_free_cluster *cluster) 969 { 970 int ret; 971 972 /* first, get a safe pointer to the block group */ 973 spin_lock(&cluster->lock); 974 if (!block_group) { 975 block_group = cluster->block_group; 976 if (!block_group) { 977 spin_unlock(&cluster->lock); 978 return 0; 979 } 980 } else if (cluster->block_group != block_group) { 981 /* someone else has already freed it don't redo their work */ 982 spin_unlock(&cluster->lock); 983 return 0; 984 } 985 atomic_inc(&block_group->count); 986 spin_unlock(&cluster->lock); 987 988 /* now return any extents the cluster had on it */ 989 spin_lock(&block_group->tree_lock); 990 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 991 spin_unlock(&block_group->tree_lock); 992 993 /* finally drop our ref */ 994 btrfs_put_block_group(block_group); 995 return ret; 996 } 997 998 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 999 struct btrfs_free_cluster *cluster, 1000 u64 bytes, u64 min_start) 1001 { 1002 struct btrfs_free_space *entry; 1003 int err; 1004 u64 search_start = cluster->window_start; 1005 u64 search_bytes = bytes; 1006 u64 ret = 0; 1007 1008 spin_lock(&block_group->tree_lock); 1009 spin_lock(&cluster->lock); 1010 1011 if (!cluster->points_to_bitmap) 1012 goto out; 1013 1014 if (cluster->block_group != block_group) 1015 goto out; 1016 1017 /* 1018 * search_start is the beginning of the bitmap, but at some point it may 1019 * be a good idea to point to the actual start of the free area in the 1020 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only 1021 * to 1 to make sure we get the bitmap entry 1022 */ 1023 entry = tree_search_offset(block_group, 1024 offset_to_bitmap(block_group, search_start), 1025 1, 0); 1026 if (!entry || !entry->bitmap) 1027 goto out; 1028 1029 search_start = min_start; 1030 search_bytes = bytes; 1031 1032 err = search_bitmap(block_group, entry, &search_start, 1033 &search_bytes); 1034 if (err) 1035 goto out; 1036 1037 ret = search_start; 1038 bitmap_clear_bits(block_group, entry, ret, bytes); 1039 out: 1040 spin_unlock(&cluster->lock); 1041 spin_unlock(&block_group->tree_lock); 1042 1043 return ret; 1044 } 1045 1046 /* 1047 * given a cluster, try to allocate 'bytes' from it, returns 0 1048 * if it couldn't find anything suitably large, or a logical disk offset 1049 * if things worked out 1050 */ 1051 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 1052 struct btrfs_free_cluster *cluster, u64 bytes, 1053 u64 min_start) 1054 { 1055 struct btrfs_free_space *entry = NULL; 1056 struct rb_node *node; 1057 u64 ret = 0; 1058 1059 if (cluster->points_to_bitmap) 1060 return btrfs_alloc_from_bitmap(block_group, cluster, bytes, 1061 min_start); 1062 1063 spin_lock(&cluster->lock); 1064 if (bytes > cluster->max_size) 1065 goto out; 1066 1067 if (cluster->block_group != block_group) 1068 goto out; 1069 1070 node = rb_first(&cluster->root); 1071 if (!node) 1072 goto out; 1073 1074 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1075 1076 while(1) { 1077 if (entry->bytes < bytes || entry->offset < min_start) { 1078 struct rb_node *node; 1079 1080 node = rb_next(&entry->offset_index); 1081 if (!node) 1082 break; 1083 entry = rb_entry(node, struct btrfs_free_space, 1084 offset_index); 1085 continue; 1086 } 1087 ret = entry->offset; 1088 1089 entry->offset += bytes; 1090 entry->bytes -= bytes; 1091 1092 if (entry->bytes == 0) { 1093 rb_erase(&entry->offset_index, &cluster->root); 1094 kfree(entry); 1095 } 1096 break; 1097 } 1098 out: 1099 spin_unlock(&cluster->lock); 1100 1101 return ret; 1102 } 1103 1104 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 1105 struct btrfs_free_space *entry, 1106 struct btrfs_free_cluster *cluster, 1107 u64 offset, u64 bytes, u64 min_bytes) 1108 { 1109 unsigned long next_zero; 1110 unsigned long i; 1111 unsigned long search_bits; 1112 unsigned long total_bits; 1113 unsigned long found_bits; 1114 unsigned long start = 0; 1115 unsigned long total_found = 0; 1116 bool found = false; 1117 1118 i = offset_to_bit(entry->offset, block_group->sectorsize, 1119 max_t(u64, offset, entry->offset)); 1120 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 1121 total_bits = bytes_to_bits(bytes, block_group->sectorsize); 1122 1123 again: 1124 found_bits = 0; 1125 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 1126 i < BITS_PER_BITMAP; 1127 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 1128 next_zero = find_next_zero_bit(entry->bitmap, 1129 BITS_PER_BITMAP, i); 1130 if (next_zero - i >= search_bits) { 1131 found_bits = next_zero - i; 1132 break; 1133 } 1134 i = next_zero; 1135 } 1136 1137 if (!found_bits) 1138 return -1; 1139 1140 if (!found) { 1141 start = i; 1142 found = true; 1143 } 1144 1145 total_found += found_bits; 1146 1147 if (cluster->max_size < found_bits * block_group->sectorsize) 1148 cluster->max_size = found_bits * block_group->sectorsize; 1149 1150 if (total_found < total_bits) { 1151 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); 1152 if (i - start > total_bits * 2) { 1153 total_found = 0; 1154 cluster->max_size = 0; 1155 found = false; 1156 } 1157 goto again; 1158 } 1159 1160 cluster->window_start = start * block_group->sectorsize + 1161 entry->offset; 1162 cluster->points_to_bitmap = true; 1163 1164 return 0; 1165 } 1166 1167 /* 1168 * here we try to find a cluster of blocks in a block group. The goal 1169 * is to find at least bytes free and up to empty_size + bytes free. 1170 * We might not find them all in one contiguous area. 1171 * 1172 * returns zero and sets up cluster if things worked out, otherwise 1173 * it returns -enospc 1174 */ 1175 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 1176 struct btrfs_root *root, 1177 struct btrfs_block_group_cache *block_group, 1178 struct btrfs_free_cluster *cluster, 1179 u64 offset, u64 bytes, u64 empty_size) 1180 { 1181 struct btrfs_free_space *entry = NULL; 1182 struct rb_node *node; 1183 struct btrfs_free_space *next; 1184 struct btrfs_free_space *last = NULL; 1185 u64 min_bytes; 1186 u64 window_start; 1187 u64 window_free; 1188 u64 max_extent = 0; 1189 bool found_bitmap = false; 1190 int ret; 1191 1192 /* for metadata, allow allocates with more holes */ 1193 if (btrfs_test_opt(root, SSD_SPREAD)) { 1194 min_bytes = bytes + empty_size; 1195 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 1196 /* 1197 * we want to do larger allocations when we are 1198 * flushing out the delayed refs, it helps prevent 1199 * making more work as we go along. 1200 */ 1201 if (trans->transaction->delayed_refs.flushing) 1202 min_bytes = max(bytes, (bytes + empty_size) >> 1); 1203 else 1204 min_bytes = max(bytes, (bytes + empty_size) >> 4); 1205 } else 1206 min_bytes = max(bytes, (bytes + empty_size) >> 2); 1207 1208 spin_lock(&block_group->tree_lock); 1209 spin_lock(&cluster->lock); 1210 1211 /* someone already found a cluster, hooray */ 1212 if (cluster->block_group) { 1213 ret = 0; 1214 goto out; 1215 } 1216 again: 1217 entry = tree_search_offset(block_group, offset, found_bitmap, 1); 1218 if (!entry) { 1219 ret = -ENOSPC; 1220 goto out; 1221 } 1222 1223 /* 1224 * If found_bitmap is true, we exhausted our search for extent entries, 1225 * and we just want to search all of the bitmaps that we can find, and 1226 * ignore any extent entries we find. 1227 */ 1228 while (entry->bitmap || found_bitmap || 1229 (!entry->bitmap && entry->bytes < min_bytes)) { 1230 struct rb_node *node = rb_next(&entry->offset_index); 1231 1232 if (entry->bitmap && entry->bytes > bytes + empty_size) { 1233 ret = btrfs_bitmap_cluster(block_group, entry, cluster, 1234 offset, bytes + empty_size, 1235 min_bytes); 1236 if (!ret) 1237 goto got_it; 1238 } 1239 1240 if (!node) { 1241 ret = -ENOSPC; 1242 goto out; 1243 } 1244 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1245 } 1246 1247 /* 1248 * We already searched all the extent entries from the passed in offset 1249 * to the end and didn't find enough space for the cluster, and we also 1250 * didn't find any bitmaps that met our criteria, just go ahead and exit 1251 */ 1252 if (found_bitmap) { 1253 ret = -ENOSPC; 1254 goto out; 1255 } 1256 1257 cluster->points_to_bitmap = false; 1258 window_start = entry->offset; 1259 window_free = entry->bytes; 1260 last = entry; 1261 max_extent = entry->bytes; 1262 1263 while (1) { 1264 /* out window is just right, lets fill it */ 1265 if (window_free >= bytes + empty_size) 1266 break; 1267 1268 node = rb_next(&last->offset_index); 1269 if (!node) { 1270 if (found_bitmap) 1271 goto again; 1272 ret = -ENOSPC; 1273 goto out; 1274 } 1275 next = rb_entry(node, struct btrfs_free_space, offset_index); 1276 1277 /* 1278 * we found a bitmap, so if this search doesn't result in a 1279 * cluster, we know to go and search again for the bitmaps and 1280 * start looking for space there 1281 */ 1282 if (next->bitmap) { 1283 if (!found_bitmap) 1284 offset = next->offset; 1285 found_bitmap = true; 1286 last = next; 1287 continue; 1288 } 1289 1290 /* 1291 * we haven't filled the empty size and the window is 1292 * very large. reset and try again 1293 */ 1294 if (next->offset - (last->offset + last->bytes) > 128 * 1024 || 1295 next->offset - window_start > (bytes + empty_size) * 2) { 1296 entry = next; 1297 window_start = entry->offset; 1298 window_free = entry->bytes; 1299 last = entry; 1300 max_extent = entry->bytes; 1301 } else { 1302 last = next; 1303 window_free += next->bytes; 1304 if (entry->bytes > max_extent) 1305 max_extent = entry->bytes; 1306 } 1307 } 1308 1309 cluster->window_start = entry->offset; 1310 1311 /* 1312 * now we've found our entries, pull them out of the free space 1313 * cache and put them into the cluster rbtree 1314 * 1315 * The cluster includes an rbtree, but only uses the offset index 1316 * of each free space cache entry. 1317 */ 1318 while (1) { 1319 node = rb_next(&entry->offset_index); 1320 if (entry->bitmap && node) { 1321 entry = rb_entry(node, struct btrfs_free_space, 1322 offset_index); 1323 continue; 1324 } else if (entry->bitmap && !node) { 1325 break; 1326 } 1327 1328 rb_erase(&entry->offset_index, &block_group->free_space_offset); 1329 ret = tree_insert_offset(&cluster->root, entry->offset, 1330 &entry->offset_index, 0); 1331 BUG_ON(ret); 1332 1333 if (!node || entry == last) 1334 break; 1335 1336 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1337 } 1338 1339 cluster->max_size = max_extent; 1340 got_it: 1341 ret = 0; 1342 atomic_inc(&block_group->count); 1343 list_add_tail(&cluster->block_group_list, &block_group->cluster_list); 1344 cluster->block_group = block_group; 1345 out: 1346 spin_unlock(&cluster->lock); 1347 spin_unlock(&block_group->tree_lock); 1348 1349 return ret; 1350 } 1351 1352 /* 1353 * simple code to zero out a cluster 1354 */ 1355 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 1356 { 1357 spin_lock_init(&cluster->lock); 1358 spin_lock_init(&cluster->refill_lock); 1359 cluster->root = RB_ROOT; 1360 cluster->max_size = 0; 1361 cluster->points_to_bitmap = false; 1362 INIT_LIST_HEAD(&cluster->block_group_list); 1363 cluster->block_group = NULL; 1364 } 1365 1366