1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 #include "volumes.h" 31 32 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 33 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 34 35 struct btrfs_trim_range { 36 u64 start; 37 u64 bytes; 38 struct list_head list; 39 }; 40 41 static int link_free_space(struct btrfs_free_space_ctl *ctl, 42 struct btrfs_free_space *info); 43 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 44 struct btrfs_free_space *info); 45 46 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 47 struct btrfs_path *path, 48 u64 offset) 49 { 50 struct btrfs_key key; 51 struct btrfs_key location; 52 struct btrfs_disk_key disk_key; 53 struct btrfs_free_space_header *header; 54 struct extent_buffer *leaf; 55 struct inode *inode = NULL; 56 int ret; 57 58 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 59 key.offset = offset; 60 key.type = 0; 61 62 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 63 if (ret < 0) 64 return ERR_PTR(ret); 65 if (ret > 0) { 66 btrfs_release_path(path); 67 return ERR_PTR(-ENOENT); 68 } 69 70 leaf = path->nodes[0]; 71 header = btrfs_item_ptr(leaf, path->slots[0], 72 struct btrfs_free_space_header); 73 btrfs_free_space_key(leaf, header, &disk_key); 74 btrfs_disk_key_to_cpu(&location, &disk_key); 75 btrfs_release_path(path); 76 77 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 78 if (!inode) 79 return ERR_PTR(-ENOENT); 80 if (IS_ERR(inode)) 81 return inode; 82 if (is_bad_inode(inode)) { 83 iput(inode); 84 return ERR_PTR(-ENOENT); 85 } 86 87 mapping_set_gfp_mask(inode->i_mapping, 88 mapping_gfp_mask(inode->i_mapping) & 89 ~(__GFP_FS | __GFP_HIGHMEM)); 90 91 return inode; 92 } 93 94 struct inode *lookup_free_space_inode(struct btrfs_root *root, 95 struct btrfs_block_group_cache 96 *block_group, struct btrfs_path *path) 97 { 98 struct inode *inode = NULL; 99 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 100 101 spin_lock(&block_group->lock); 102 if (block_group->inode) 103 inode = igrab(block_group->inode); 104 spin_unlock(&block_group->lock); 105 if (inode) 106 return inode; 107 108 inode = __lookup_free_space_inode(root, path, 109 block_group->key.objectid); 110 if (IS_ERR(inode)) 111 return inode; 112 113 spin_lock(&block_group->lock); 114 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 115 btrfs_info(root->fs_info, 116 "Old style space inode found, converting."); 117 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 118 BTRFS_INODE_NODATACOW; 119 block_group->disk_cache_state = BTRFS_DC_CLEAR; 120 } 121 122 if (!block_group->iref) { 123 block_group->inode = igrab(inode); 124 block_group->iref = 1; 125 } 126 spin_unlock(&block_group->lock); 127 128 return inode; 129 } 130 131 static int __create_free_space_inode(struct btrfs_root *root, 132 struct btrfs_trans_handle *trans, 133 struct btrfs_path *path, 134 u64 ino, u64 offset) 135 { 136 struct btrfs_key key; 137 struct btrfs_disk_key disk_key; 138 struct btrfs_free_space_header *header; 139 struct btrfs_inode_item *inode_item; 140 struct extent_buffer *leaf; 141 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 142 int ret; 143 144 ret = btrfs_insert_empty_inode(trans, root, path, ino); 145 if (ret) 146 return ret; 147 148 /* We inline crc's for the free disk space cache */ 149 if (ino != BTRFS_FREE_INO_OBJECTID) 150 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 151 152 leaf = path->nodes[0]; 153 inode_item = btrfs_item_ptr(leaf, path->slots[0], 154 struct btrfs_inode_item); 155 btrfs_item_key(leaf, &disk_key, path->slots[0]); 156 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 157 sizeof(*inode_item)); 158 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 159 btrfs_set_inode_size(leaf, inode_item, 0); 160 btrfs_set_inode_nbytes(leaf, inode_item, 0); 161 btrfs_set_inode_uid(leaf, inode_item, 0); 162 btrfs_set_inode_gid(leaf, inode_item, 0); 163 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 164 btrfs_set_inode_flags(leaf, inode_item, flags); 165 btrfs_set_inode_nlink(leaf, inode_item, 1); 166 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 167 btrfs_set_inode_block_group(leaf, inode_item, offset); 168 btrfs_mark_buffer_dirty(leaf); 169 btrfs_release_path(path); 170 171 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 172 key.offset = offset; 173 key.type = 0; 174 ret = btrfs_insert_empty_item(trans, root, path, &key, 175 sizeof(struct btrfs_free_space_header)); 176 if (ret < 0) { 177 btrfs_release_path(path); 178 return ret; 179 } 180 181 leaf = path->nodes[0]; 182 header = btrfs_item_ptr(leaf, path->slots[0], 183 struct btrfs_free_space_header); 184 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 185 btrfs_set_free_space_key(leaf, header, &disk_key); 186 btrfs_mark_buffer_dirty(leaf); 187 btrfs_release_path(path); 188 189 return 0; 190 } 191 192 int create_free_space_inode(struct btrfs_root *root, 193 struct btrfs_trans_handle *trans, 194 struct btrfs_block_group_cache *block_group, 195 struct btrfs_path *path) 196 { 197 int ret; 198 u64 ino; 199 200 ret = btrfs_find_free_objectid(root, &ino); 201 if (ret < 0) 202 return ret; 203 204 return __create_free_space_inode(root, trans, path, ino, 205 block_group->key.objectid); 206 } 207 208 int btrfs_check_trunc_cache_free_space(struct btrfs_root *root, 209 struct btrfs_block_rsv *rsv) 210 { 211 u64 needed_bytes; 212 int ret; 213 214 /* 1 for slack space, 1 for updating the inode */ 215 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 216 btrfs_calc_trans_metadata_size(root, 1); 217 218 spin_lock(&rsv->lock); 219 if (rsv->reserved < needed_bytes) 220 ret = -ENOSPC; 221 else 222 ret = 0; 223 spin_unlock(&rsv->lock); 224 return ret; 225 } 226 227 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 228 struct btrfs_trans_handle *trans, 229 struct btrfs_block_group_cache *block_group, 230 struct inode *inode) 231 { 232 int ret = 0; 233 struct btrfs_path *path = btrfs_alloc_path(); 234 235 if (!path) { 236 ret = -ENOMEM; 237 goto fail; 238 } 239 240 if (block_group) { 241 mutex_lock(&trans->transaction->cache_write_mutex); 242 if (!list_empty(&block_group->io_list)) { 243 list_del_init(&block_group->io_list); 244 245 btrfs_wait_cache_io(root, trans, block_group, 246 &block_group->io_ctl, path, 247 block_group->key.objectid); 248 btrfs_put_block_group(block_group); 249 } 250 251 /* 252 * now that we've truncated the cache away, its no longer 253 * setup or written 254 */ 255 spin_lock(&block_group->lock); 256 block_group->disk_cache_state = BTRFS_DC_CLEAR; 257 spin_unlock(&block_group->lock); 258 } 259 btrfs_free_path(path); 260 261 btrfs_i_size_write(inode, 0); 262 truncate_pagecache(inode, 0); 263 264 /* 265 * We don't need an orphan item because truncating the free space cache 266 * will never be split across transactions. 267 * We don't need to check for -EAGAIN because we're a free space 268 * cache inode 269 */ 270 ret = btrfs_truncate_inode_items(trans, root, inode, 271 0, BTRFS_EXTENT_DATA_KEY); 272 if (ret) { 273 mutex_unlock(&trans->transaction->cache_write_mutex); 274 btrfs_abort_transaction(trans, root, ret); 275 return ret; 276 } 277 278 ret = btrfs_update_inode(trans, root, inode); 279 280 if (block_group) 281 mutex_unlock(&trans->transaction->cache_write_mutex); 282 283 fail: 284 if (ret) 285 btrfs_abort_transaction(trans, root, ret); 286 287 return ret; 288 } 289 290 static int readahead_cache(struct inode *inode) 291 { 292 struct file_ra_state *ra; 293 unsigned long last_index; 294 295 ra = kzalloc(sizeof(*ra), GFP_NOFS); 296 if (!ra) 297 return -ENOMEM; 298 299 file_ra_state_init(ra, inode->i_mapping); 300 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 301 302 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 303 304 kfree(ra); 305 306 return 0; 307 } 308 309 static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 310 struct btrfs_root *root, int write) 311 { 312 int num_pages; 313 int check_crcs = 0; 314 315 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); 316 317 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 318 check_crcs = 1; 319 320 /* Make sure we can fit our crcs into the first page */ 321 if (write && check_crcs && 322 (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) 323 return -ENOSPC; 324 325 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 326 327 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 328 if (!io_ctl->pages) 329 return -ENOMEM; 330 331 io_ctl->num_pages = num_pages; 332 io_ctl->root = root; 333 io_ctl->check_crcs = check_crcs; 334 io_ctl->inode = inode; 335 336 return 0; 337 } 338 339 static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 340 { 341 kfree(io_ctl->pages); 342 io_ctl->pages = NULL; 343 } 344 345 static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 346 { 347 if (io_ctl->cur) { 348 io_ctl->cur = NULL; 349 io_ctl->orig = NULL; 350 } 351 } 352 353 static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 354 { 355 ASSERT(io_ctl->index < io_ctl->num_pages); 356 io_ctl->page = io_ctl->pages[io_ctl->index++]; 357 io_ctl->cur = page_address(io_ctl->page); 358 io_ctl->orig = io_ctl->cur; 359 io_ctl->size = PAGE_CACHE_SIZE; 360 if (clear) 361 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 362 } 363 364 static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 365 { 366 int i; 367 368 io_ctl_unmap_page(io_ctl); 369 370 for (i = 0; i < io_ctl->num_pages; i++) { 371 if (io_ctl->pages[i]) { 372 ClearPageChecked(io_ctl->pages[i]); 373 unlock_page(io_ctl->pages[i]); 374 page_cache_release(io_ctl->pages[i]); 375 } 376 } 377 } 378 379 static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode, 380 int uptodate) 381 { 382 struct page *page; 383 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 384 int i; 385 386 for (i = 0; i < io_ctl->num_pages; i++) { 387 page = find_or_create_page(inode->i_mapping, i, mask); 388 if (!page) { 389 io_ctl_drop_pages(io_ctl); 390 return -ENOMEM; 391 } 392 io_ctl->pages[i] = page; 393 if (uptodate && !PageUptodate(page)) { 394 btrfs_readpage(NULL, page); 395 lock_page(page); 396 if (!PageUptodate(page)) { 397 btrfs_err(BTRFS_I(inode)->root->fs_info, 398 "error reading free space cache"); 399 io_ctl_drop_pages(io_ctl); 400 return -EIO; 401 } 402 } 403 } 404 405 for (i = 0; i < io_ctl->num_pages; i++) { 406 clear_page_dirty_for_io(io_ctl->pages[i]); 407 set_page_extent_mapped(io_ctl->pages[i]); 408 } 409 410 return 0; 411 } 412 413 static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 414 { 415 __le64 *val; 416 417 io_ctl_map_page(io_ctl, 1); 418 419 /* 420 * Skip the csum areas. If we don't check crcs then we just have a 421 * 64bit chunk at the front of the first page. 422 */ 423 if (io_ctl->check_crcs) { 424 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 425 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 426 } else { 427 io_ctl->cur += sizeof(u64); 428 io_ctl->size -= sizeof(u64) * 2; 429 } 430 431 val = io_ctl->cur; 432 *val = cpu_to_le64(generation); 433 io_ctl->cur += sizeof(u64); 434 } 435 436 static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 437 { 438 __le64 *gen; 439 440 /* 441 * Skip the crc area. If we don't check crcs then we just have a 64bit 442 * chunk at the front of the first page. 443 */ 444 if (io_ctl->check_crcs) { 445 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 446 io_ctl->size -= sizeof(u64) + 447 (sizeof(u32) * io_ctl->num_pages); 448 } else { 449 io_ctl->cur += sizeof(u64); 450 io_ctl->size -= sizeof(u64) * 2; 451 } 452 453 gen = io_ctl->cur; 454 if (le64_to_cpu(*gen) != generation) { 455 printk_ratelimited(KERN_ERR "BTRFS: space cache generation " 456 "(%Lu) does not match inode (%Lu)\n", *gen, 457 generation); 458 io_ctl_unmap_page(io_ctl); 459 return -EIO; 460 } 461 io_ctl->cur += sizeof(u64); 462 return 0; 463 } 464 465 static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 466 { 467 u32 *tmp; 468 u32 crc = ~(u32)0; 469 unsigned offset = 0; 470 471 if (!io_ctl->check_crcs) { 472 io_ctl_unmap_page(io_ctl); 473 return; 474 } 475 476 if (index == 0) 477 offset = sizeof(u32) * io_ctl->num_pages; 478 479 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 480 PAGE_CACHE_SIZE - offset); 481 btrfs_csum_final(crc, (char *)&crc); 482 io_ctl_unmap_page(io_ctl); 483 tmp = page_address(io_ctl->pages[0]); 484 tmp += index; 485 *tmp = crc; 486 } 487 488 static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 489 { 490 u32 *tmp, val; 491 u32 crc = ~(u32)0; 492 unsigned offset = 0; 493 494 if (!io_ctl->check_crcs) { 495 io_ctl_map_page(io_ctl, 0); 496 return 0; 497 } 498 499 if (index == 0) 500 offset = sizeof(u32) * io_ctl->num_pages; 501 502 tmp = page_address(io_ctl->pages[0]); 503 tmp += index; 504 val = *tmp; 505 506 io_ctl_map_page(io_ctl, 0); 507 crc = btrfs_csum_data(io_ctl->orig + offset, crc, 508 PAGE_CACHE_SIZE - offset); 509 btrfs_csum_final(crc, (char *)&crc); 510 if (val != crc) { 511 printk_ratelimited(KERN_ERR "BTRFS: csum mismatch on free " 512 "space cache\n"); 513 io_ctl_unmap_page(io_ctl); 514 return -EIO; 515 } 516 517 return 0; 518 } 519 520 static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 521 void *bitmap) 522 { 523 struct btrfs_free_space_entry *entry; 524 525 if (!io_ctl->cur) 526 return -ENOSPC; 527 528 entry = io_ctl->cur; 529 entry->offset = cpu_to_le64(offset); 530 entry->bytes = cpu_to_le64(bytes); 531 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 532 BTRFS_FREE_SPACE_EXTENT; 533 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 534 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 535 536 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 537 return 0; 538 539 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 540 541 /* No more pages to map */ 542 if (io_ctl->index >= io_ctl->num_pages) 543 return 0; 544 545 /* map the next page */ 546 io_ctl_map_page(io_ctl, 1); 547 return 0; 548 } 549 550 static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 551 { 552 if (!io_ctl->cur) 553 return -ENOSPC; 554 555 /* 556 * If we aren't at the start of the current page, unmap this one and 557 * map the next one if there is any left. 558 */ 559 if (io_ctl->cur != io_ctl->orig) { 560 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 561 if (io_ctl->index >= io_ctl->num_pages) 562 return -ENOSPC; 563 io_ctl_map_page(io_ctl, 0); 564 } 565 566 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); 567 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 568 if (io_ctl->index < io_ctl->num_pages) 569 io_ctl_map_page(io_ctl, 0); 570 return 0; 571 } 572 573 static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 574 { 575 /* 576 * If we're not on the boundary we know we've modified the page and we 577 * need to crc the page. 578 */ 579 if (io_ctl->cur != io_ctl->orig) 580 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 581 else 582 io_ctl_unmap_page(io_ctl); 583 584 while (io_ctl->index < io_ctl->num_pages) { 585 io_ctl_map_page(io_ctl, 1); 586 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 587 } 588 } 589 590 static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 591 struct btrfs_free_space *entry, u8 *type) 592 { 593 struct btrfs_free_space_entry *e; 594 int ret; 595 596 if (!io_ctl->cur) { 597 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 598 if (ret) 599 return ret; 600 } 601 602 e = io_ctl->cur; 603 entry->offset = le64_to_cpu(e->offset); 604 entry->bytes = le64_to_cpu(e->bytes); 605 *type = e->type; 606 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 607 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 608 609 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 610 return 0; 611 612 io_ctl_unmap_page(io_ctl); 613 614 return 0; 615 } 616 617 static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 618 struct btrfs_free_space *entry) 619 { 620 int ret; 621 622 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 623 if (ret) 624 return ret; 625 626 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); 627 io_ctl_unmap_page(io_ctl); 628 629 return 0; 630 } 631 632 /* 633 * Since we attach pinned extents after the fact we can have contiguous sections 634 * of free space that are split up in entries. This poses a problem with the 635 * tree logging stuff since it could have allocated across what appears to be 2 636 * entries since we would have merged the entries when adding the pinned extents 637 * back to the free space cache. So run through the space cache that we just 638 * loaded and merge contiguous entries. This will make the log replay stuff not 639 * blow up and it will make for nicer allocator behavior. 640 */ 641 static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 642 { 643 struct btrfs_free_space *e, *prev = NULL; 644 struct rb_node *n; 645 646 again: 647 spin_lock(&ctl->tree_lock); 648 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 649 e = rb_entry(n, struct btrfs_free_space, offset_index); 650 if (!prev) 651 goto next; 652 if (e->bitmap || prev->bitmap) 653 goto next; 654 if (prev->offset + prev->bytes == e->offset) { 655 unlink_free_space(ctl, prev); 656 unlink_free_space(ctl, e); 657 prev->bytes += e->bytes; 658 kmem_cache_free(btrfs_free_space_cachep, e); 659 link_free_space(ctl, prev); 660 prev = NULL; 661 spin_unlock(&ctl->tree_lock); 662 goto again; 663 } 664 next: 665 prev = e; 666 } 667 spin_unlock(&ctl->tree_lock); 668 } 669 670 static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 671 struct btrfs_free_space_ctl *ctl, 672 struct btrfs_path *path, u64 offset) 673 { 674 struct btrfs_free_space_header *header; 675 struct extent_buffer *leaf; 676 struct btrfs_io_ctl io_ctl; 677 struct btrfs_key key; 678 struct btrfs_free_space *e, *n; 679 LIST_HEAD(bitmaps); 680 u64 num_entries; 681 u64 num_bitmaps; 682 u64 generation; 683 u8 type; 684 int ret = 0; 685 686 /* Nothing in the space cache, goodbye */ 687 if (!i_size_read(inode)) 688 return 0; 689 690 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 691 key.offset = offset; 692 key.type = 0; 693 694 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 695 if (ret < 0) 696 return 0; 697 else if (ret > 0) { 698 btrfs_release_path(path); 699 return 0; 700 } 701 702 ret = -1; 703 704 leaf = path->nodes[0]; 705 header = btrfs_item_ptr(leaf, path->slots[0], 706 struct btrfs_free_space_header); 707 num_entries = btrfs_free_space_entries(leaf, header); 708 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 709 generation = btrfs_free_space_generation(leaf, header); 710 btrfs_release_path(path); 711 712 if (!BTRFS_I(inode)->generation) { 713 btrfs_info(root->fs_info, 714 "The free space cache file (%llu) is invalid. skip it\n", 715 offset); 716 return 0; 717 } 718 719 if (BTRFS_I(inode)->generation != generation) { 720 btrfs_err(root->fs_info, 721 "free space inode generation (%llu) " 722 "did not match free space cache generation (%llu)", 723 BTRFS_I(inode)->generation, generation); 724 return 0; 725 } 726 727 if (!num_entries) 728 return 0; 729 730 ret = io_ctl_init(&io_ctl, inode, root, 0); 731 if (ret) 732 return ret; 733 734 ret = readahead_cache(inode); 735 if (ret) 736 goto out; 737 738 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 739 if (ret) 740 goto out; 741 742 ret = io_ctl_check_crc(&io_ctl, 0); 743 if (ret) 744 goto free_cache; 745 746 ret = io_ctl_check_generation(&io_ctl, generation); 747 if (ret) 748 goto free_cache; 749 750 while (num_entries) { 751 e = kmem_cache_zalloc(btrfs_free_space_cachep, 752 GFP_NOFS); 753 if (!e) 754 goto free_cache; 755 756 ret = io_ctl_read_entry(&io_ctl, e, &type); 757 if (ret) { 758 kmem_cache_free(btrfs_free_space_cachep, e); 759 goto free_cache; 760 } 761 762 if (!e->bytes) { 763 kmem_cache_free(btrfs_free_space_cachep, e); 764 goto free_cache; 765 } 766 767 if (type == BTRFS_FREE_SPACE_EXTENT) { 768 spin_lock(&ctl->tree_lock); 769 ret = link_free_space(ctl, e); 770 spin_unlock(&ctl->tree_lock); 771 if (ret) { 772 btrfs_err(root->fs_info, 773 "Duplicate entries in free space cache, dumping"); 774 kmem_cache_free(btrfs_free_space_cachep, e); 775 goto free_cache; 776 } 777 } else { 778 ASSERT(num_bitmaps); 779 num_bitmaps--; 780 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 781 if (!e->bitmap) { 782 kmem_cache_free( 783 btrfs_free_space_cachep, e); 784 goto free_cache; 785 } 786 spin_lock(&ctl->tree_lock); 787 ret = link_free_space(ctl, e); 788 ctl->total_bitmaps++; 789 ctl->op->recalc_thresholds(ctl); 790 spin_unlock(&ctl->tree_lock); 791 if (ret) { 792 btrfs_err(root->fs_info, 793 "Duplicate entries in free space cache, dumping"); 794 kmem_cache_free(btrfs_free_space_cachep, e); 795 goto free_cache; 796 } 797 list_add_tail(&e->list, &bitmaps); 798 } 799 800 num_entries--; 801 } 802 803 io_ctl_unmap_page(&io_ctl); 804 805 /* 806 * We add the bitmaps at the end of the entries in order that 807 * the bitmap entries are added to the cache. 808 */ 809 list_for_each_entry_safe(e, n, &bitmaps, list) { 810 list_del_init(&e->list); 811 ret = io_ctl_read_bitmap(&io_ctl, e); 812 if (ret) 813 goto free_cache; 814 } 815 816 io_ctl_drop_pages(&io_ctl); 817 merge_space_tree(ctl); 818 ret = 1; 819 out: 820 io_ctl_free(&io_ctl); 821 return ret; 822 free_cache: 823 io_ctl_drop_pages(&io_ctl); 824 __btrfs_remove_free_space_cache(ctl); 825 goto out; 826 } 827 828 int load_free_space_cache(struct btrfs_fs_info *fs_info, 829 struct btrfs_block_group_cache *block_group) 830 { 831 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 832 struct btrfs_root *root = fs_info->tree_root; 833 struct inode *inode; 834 struct btrfs_path *path; 835 int ret = 0; 836 bool matched; 837 u64 used = btrfs_block_group_used(&block_group->item); 838 839 /* 840 * If this block group has been marked to be cleared for one reason or 841 * another then we can't trust the on disk cache, so just return. 842 */ 843 spin_lock(&block_group->lock); 844 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 845 spin_unlock(&block_group->lock); 846 return 0; 847 } 848 spin_unlock(&block_group->lock); 849 850 path = btrfs_alloc_path(); 851 if (!path) 852 return 0; 853 path->search_commit_root = 1; 854 path->skip_locking = 1; 855 856 inode = lookup_free_space_inode(root, block_group, path); 857 if (IS_ERR(inode)) { 858 btrfs_free_path(path); 859 return 0; 860 } 861 862 /* We may have converted the inode and made the cache invalid. */ 863 spin_lock(&block_group->lock); 864 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 865 spin_unlock(&block_group->lock); 866 btrfs_free_path(path); 867 goto out; 868 } 869 spin_unlock(&block_group->lock); 870 871 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 872 path, block_group->key.objectid); 873 btrfs_free_path(path); 874 if (ret <= 0) 875 goto out; 876 877 spin_lock(&ctl->tree_lock); 878 matched = (ctl->free_space == (block_group->key.offset - used - 879 block_group->bytes_super)); 880 spin_unlock(&ctl->tree_lock); 881 882 if (!matched) { 883 __btrfs_remove_free_space_cache(ctl); 884 btrfs_warn(fs_info, "block group %llu has wrong amount of free space", 885 block_group->key.objectid); 886 ret = -1; 887 } 888 out: 889 if (ret < 0) { 890 /* This cache is bogus, make sure it gets cleared */ 891 spin_lock(&block_group->lock); 892 block_group->disk_cache_state = BTRFS_DC_CLEAR; 893 spin_unlock(&block_group->lock); 894 ret = 0; 895 896 btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now", 897 block_group->key.objectid); 898 } 899 900 iput(inode); 901 return ret; 902 } 903 904 static noinline_for_stack 905 int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 906 struct btrfs_free_space_ctl *ctl, 907 struct btrfs_block_group_cache *block_group, 908 int *entries, int *bitmaps, 909 struct list_head *bitmap_list) 910 { 911 int ret; 912 struct btrfs_free_cluster *cluster = NULL; 913 struct btrfs_free_cluster *cluster_locked = NULL; 914 struct rb_node *node = rb_first(&ctl->free_space_offset); 915 struct btrfs_trim_range *trim_entry; 916 917 /* Get the cluster for this block_group if it exists */ 918 if (block_group && !list_empty(&block_group->cluster_list)) { 919 cluster = list_entry(block_group->cluster_list.next, 920 struct btrfs_free_cluster, 921 block_group_list); 922 } 923 924 if (!node && cluster) { 925 cluster_locked = cluster; 926 spin_lock(&cluster_locked->lock); 927 node = rb_first(&cluster->root); 928 cluster = NULL; 929 } 930 931 /* Write out the extent entries */ 932 while (node) { 933 struct btrfs_free_space *e; 934 935 e = rb_entry(node, struct btrfs_free_space, offset_index); 936 *entries += 1; 937 938 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 939 e->bitmap); 940 if (ret) 941 goto fail; 942 943 if (e->bitmap) { 944 list_add_tail(&e->list, bitmap_list); 945 *bitmaps += 1; 946 } 947 node = rb_next(node); 948 if (!node && cluster) { 949 node = rb_first(&cluster->root); 950 cluster_locked = cluster; 951 spin_lock(&cluster_locked->lock); 952 cluster = NULL; 953 } 954 } 955 if (cluster_locked) { 956 spin_unlock(&cluster_locked->lock); 957 cluster_locked = NULL; 958 } 959 960 /* 961 * Make sure we don't miss any range that was removed from our rbtree 962 * because trimming is running. Otherwise after a umount+mount (or crash 963 * after committing the transaction) we would leak free space and get 964 * an inconsistent free space cache report from fsck. 965 */ 966 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 967 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 968 trim_entry->bytes, NULL); 969 if (ret) 970 goto fail; 971 *entries += 1; 972 } 973 974 return 0; 975 fail: 976 if (cluster_locked) 977 spin_unlock(&cluster_locked->lock); 978 return -ENOSPC; 979 } 980 981 static noinline_for_stack int 982 update_cache_item(struct btrfs_trans_handle *trans, 983 struct btrfs_root *root, 984 struct inode *inode, 985 struct btrfs_path *path, u64 offset, 986 int entries, int bitmaps) 987 { 988 struct btrfs_key key; 989 struct btrfs_free_space_header *header; 990 struct extent_buffer *leaf; 991 int ret; 992 993 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 994 key.offset = offset; 995 key.type = 0; 996 997 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 998 if (ret < 0) { 999 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1000 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1001 GFP_NOFS); 1002 goto fail; 1003 } 1004 leaf = path->nodes[0]; 1005 if (ret > 0) { 1006 struct btrfs_key found_key; 1007 ASSERT(path->slots[0]); 1008 path->slots[0]--; 1009 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1010 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1011 found_key.offset != offset) { 1012 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1013 inode->i_size - 1, 1014 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1015 NULL, GFP_NOFS); 1016 btrfs_release_path(path); 1017 goto fail; 1018 } 1019 } 1020 1021 BTRFS_I(inode)->generation = trans->transid; 1022 header = btrfs_item_ptr(leaf, path->slots[0], 1023 struct btrfs_free_space_header); 1024 btrfs_set_free_space_entries(leaf, header, entries); 1025 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1026 btrfs_set_free_space_generation(leaf, header, trans->transid); 1027 btrfs_mark_buffer_dirty(leaf); 1028 btrfs_release_path(path); 1029 1030 return 0; 1031 1032 fail: 1033 return -1; 1034 } 1035 1036 static noinline_for_stack int 1037 write_pinned_extent_entries(struct btrfs_root *root, 1038 struct btrfs_block_group_cache *block_group, 1039 struct btrfs_io_ctl *io_ctl, 1040 int *entries) 1041 { 1042 u64 start, extent_start, extent_end, len; 1043 struct extent_io_tree *unpin = NULL; 1044 int ret; 1045 1046 if (!block_group) 1047 return 0; 1048 1049 /* 1050 * We want to add any pinned extents to our free space cache 1051 * so we don't leak the space 1052 * 1053 * We shouldn't have switched the pinned extents yet so this is the 1054 * right one 1055 */ 1056 unpin = root->fs_info->pinned_extents; 1057 1058 start = block_group->key.objectid; 1059 1060 while (start < block_group->key.objectid + block_group->key.offset) { 1061 ret = find_first_extent_bit(unpin, start, 1062 &extent_start, &extent_end, 1063 EXTENT_DIRTY, NULL); 1064 if (ret) 1065 return 0; 1066 1067 /* This pinned extent is out of our range */ 1068 if (extent_start >= block_group->key.objectid + 1069 block_group->key.offset) 1070 return 0; 1071 1072 extent_start = max(extent_start, start); 1073 extent_end = min(block_group->key.objectid + 1074 block_group->key.offset, extent_end + 1); 1075 len = extent_end - extent_start; 1076 1077 *entries += 1; 1078 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1079 if (ret) 1080 return -ENOSPC; 1081 1082 start = extent_end; 1083 } 1084 1085 return 0; 1086 } 1087 1088 static noinline_for_stack int 1089 write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1090 { 1091 struct list_head *pos, *n; 1092 int ret; 1093 1094 /* Write out the bitmaps */ 1095 list_for_each_safe(pos, n, bitmap_list) { 1096 struct btrfs_free_space *entry = 1097 list_entry(pos, struct btrfs_free_space, list); 1098 1099 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1100 if (ret) 1101 return -ENOSPC; 1102 list_del_init(&entry->list); 1103 } 1104 1105 return 0; 1106 } 1107 1108 static int flush_dirty_cache(struct inode *inode) 1109 { 1110 int ret; 1111 1112 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1113 if (ret) 1114 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1115 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 1116 GFP_NOFS); 1117 1118 return ret; 1119 } 1120 1121 static void noinline_for_stack 1122 cleanup_bitmap_list(struct list_head *bitmap_list) 1123 { 1124 struct list_head *pos, *n; 1125 1126 list_for_each_safe(pos, n, bitmap_list) { 1127 struct btrfs_free_space *entry = 1128 list_entry(pos, struct btrfs_free_space, list); 1129 list_del_init(&entry->list); 1130 } 1131 } 1132 1133 static void noinline_for_stack 1134 cleanup_write_cache_enospc(struct inode *inode, 1135 struct btrfs_io_ctl *io_ctl, 1136 struct extent_state **cached_state, 1137 struct list_head *bitmap_list) 1138 { 1139 io_ctl_drop_pages(io_ctl); 1140 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1141 i_size_read(inode) - 1, cached_state, 1142 GFP_NOFS); 1143 } 1144 1145 int btrfs_wait_cache_io(struct btrfs_root *root, 1146 struct btrfs_trans_handle *trans, 1147 struct btrfs_block_group_cache *block_group, 1148 struct btrfs_io_ctl *io_ctl, 1149 struct btrfs_path *path, u64 offset) 1150 { 1151 int ret; 1152 struct inode *inode = io_ctl->inode; 1153 1154 if (!inode) 1155 return 0; 1156 1157 if (block_group) 1158 root = root->fs_info->tree_root; 1159 1160 /* Flush the dirty pages in the cache file. */ 1161 ret = flush_dirty_cache(inode); 1162 if (ret) 1163 goto out; 1164 1165 /* Update the cache item to tell everyone this cache file is valid. */ 1166 ret = update_cache_item(trans, root, inode, path, offset, 1167 io_ctl->entries, io_ctl->bitmaps); 1168 out: 1169 io_ctl_free(io_ctl); 1170 if (ret) { 1171 invalidate_inode_pages2(inode->i_mapping); 1172 BTRFS_I(inode)->generation = 0; 1173 if (block_group) { 1174 #ifdef DEBUG 1175 btrfs_err(root->fs_info, 1176 "failed to write free space cache for block group %llu", 1177 block_group->key.objectid); 1178 #endif 1179 } 1180 } 1181 btrfs_update_inode(trans, root, inode); 1182 1183 if (block_group) { 1184 /* the dirty list is protected by the dirty_bgs_lock */ 1185 spin_lock(&trans->transaction->dirty_bgs_lock); 1186 1187 /* the disk_cache_state is protected by the block group lock */ 1188 spin_lock(&block_group->lock); 1189 1190 /* 1191 * only mark this as written if we didn't get put back on 1192 * the dirty list while waiting for IO. Otherwise our 1193 * cache state won't be right, and we won't get written again 1194 */ 1195 if (!ret && list_empty(&block_group->dirty_list)) 1196 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1197 else if (ret) 1198 block_group->disk_cache_state = BTRFS_DC_ERROR; 1199 1200 spin_unlock(&block_group->lock); 1201 spin_unlock(&trans->transaction->dirty_bgs_lock); 1202 io_ctl->inode = NULL; 1203 iput(inode); 1204 } 1205 1206 return ret; 1207 1208 } 1209 1210 /** 1211 * __btrfs_write_out_cache - write out cached info to an inode 1212 * @root - the root the inode belongs to 1213 * @ctl - the free space cache we are going to write out 1214 * @block_group - the block_group for this cache if it belongs to a block_group 1215 * @trans - the trans handle 1216 * @path - the path to use 1217 * @offset - the offset for the key we'll insert 1218 * 1219 * This function writes out a free space cache struct to disk for quick recovery 1220 * on mount. This will return 0 if it was successfull in writing the cache out, 1221 * or an errno if it was not. 1222 */ 1223 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1224 struct btrfs_free_space_ctl *ctl, 1225 struct btrfs_block_group_cache *block_group, 1226 struct btrfs_io_ctl *io_ctl, 1227 struct btrfs_trans_handle *trans, 1228 struct btrfs_path *path, u64 offset) 1229 { 1230 struct extent_state *cached_state = NULL; 1231 LIST_HEAD(bitmap_list); 1232 int entries = 0; 1233 int bitmaps = 0; 1234 int ret; 1235 int must_iput = 0; 1236 1237 if (!i_size_read(inode)) 1238 return -EIO; 1239 1240 WARN_ON(io_ctl->pages); 1241 ret = io_ctl_init(io_ctl, inode, root, 1); 1242 if (ret) 1243 return ret; 1244 1245 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1246 down_write(&block_group->data_rwsem); 1247 spin_lock(&block_group->lock); 1248 if (block_group->delalloc_bytes) { 1249 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1250 spin_unlock(&block_group->lock); 1251 up_write(&block_group->data_rwsem); 1252 BTRFS_I(inode)->generation = 0; 1253 ret = 0; 1254 must_iput = 1; 1255 goto out; 1256 } 1257 spin_unlock(&block_group->lock); 1258 } 1259 1260 /* Lock all pages first so we can lock the extent safely. */ 1261 ret = io_ctl_prepare_pages(io_ctl, inode, 0); 1262 if (ret) 1263 goto out; 1264 1265 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1266 0, &cached_state); 1267 1268 io_ctl_set_generation(io_ctl, trans->transid); 1269 1270 mutex_lock(&ctl->cache_writeout_mutex); 1271 /* Write out the extent entries in the free space cache */ 1272 spin_lock(&ctl->tree_lock); 1273 ret = write_cache_extent_entries(io_ctl, ctl, 1274 block_group, &entries, &bitmaps, 1275 &bitmap_list); 1276 if (ret) 1277 goto out_nospc_locked; 1278 1279 /* 1280 * Some spaces that are freed in the current transaction are pinned, 1281 * they will be added into free space cache after the transaction is 1282 * committed, we shouldn't lose them. 1283 * 1284 * If this changes while we are working we'll get added back to 1285 * the dirty list and redo it. No locking needed 1286 */ 1287 ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries); 1288 if (ret) 1289 goto out_nospc_locked; 1290 1291 /* 1292 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1293 * locked while doing it because a concurrent trim can be manipulating 1294 * or freeing the bitmap. 1295 */ 1296 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1297 spin_unlock(&ctl->tree_lock); 1298 mutex_unlock(&ctl->cache_writeout_mutex); 1299 if (ret) 1300 goto out_nospc; 1301 1302 /* Zero out the rest of the pages just to make sure */ 1303 io_ctl_zero_remaining_pages(io_ctl); 1304 1305 /* Everything is written out, now we dirty the pages in the file. */ 1306 ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, 1307 0, i_size_read(inode), &cached_state); 1308 if (ret) 1309 goto out_nospc; 1310 1311 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1312 up_write(&block_group->data_rwsem); 1313 /* 1314 * Release the pages and unlock the extent, we will flush 1315 * them out later 1316 */ 1317 io_ctl_drop_pages(io_ctl); 1318 1319 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1320 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1321 1322 /* 1323 * at this point the pages are under IO and we're happy, 1324 * The caller is responsible for waiting on them and updating the 1325 * the cache and the inode 1326 */ 1327 io_ctl->entries = entries; 1328 io_ctl->bitmaps = bitmaps; 1329 1330 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1331 if (ret) 1332 goto out; 1333 1334 return 0; 1335 1336 out: 1337 io_ctl->inode = NULL; 1338 io_ctl_free(io_ctl); 1339 if (ret) { 1340 invalidate_inode_pages2(inode->i_mapping); 1341 BTRFS_I(inode)->generation = 0; 1342 } 1343 btrfs_update_inode(trans, root, inode); 1344 if (must_iput) 1345 iput(inode); 1346 return ret; 1347 1348 out_nospc_locked: 1349 cleanup_bitmap_list(&bitmap_list); 1350 spin_unlock(&ctl->tree_lock); 1351 mutex_unlock(&ctl->cache_writeout_mutex); 1352 1353 out_nospc: 1354 cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list); 1355 1356 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1357 up_write(&block_group->data_rwsem); 1358 1359 goto out; 1360 } 1361 1362 int btrfs_write_out_cache(struct btrfs_root *root, 1363 struct btrfs_trans_handle *trans, 1364 struct btrfs_block_group_cache *block_group, 1365 struct btrfs_path *path) 1366 { 1367 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1368 struct inode *inode; 1369 int ret = 0; 1370 1371 root = root->fs_info->tree_root; 1372 1373 spin_lock(&block_group->lock); 1374 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1375 spin_unlock(&block_group->lock); 1376 return 0; 1377 } 1378 spin_unlock(&block_group->lock); 1379 1380 inode = lookup_free_space_inode(root, block_group, path); 1381 if (IS_ERR(inode)) 1382 return 0; 1383 1384 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, 1385 &block_group->io_ctl, trans, 1386 path, block_group->key.objectid); 1387 if (ret) { 1388 #ifdef DEBUG 1389 btrfs_err(root->fs_info, 1390 "failed to write free space cache for block group %llu", 1391 block_group->key.objectid); 1392 #endif 1393 spin_lock(&block_group->lock); 1394 block_group->disk_cache_state = BTRFS_DC_ERROR; 1395 spin_unlock(&block_group->lock); 1396 1397 block_group->io_ctl.inode = NULL; 1398 iput(inode); 1399 } 1400 1401 /* 1402 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1403 * to wait for IO and put the inode 1404 */ 1405 1406 return ret; 1407 } 1408 1409 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1410 u64 offset) 1411 { 1412 ASSERT(offset >= bitmap_start); 1413 offset -= bitmap_start; 1414 return (unsigned long)(div_u64(offset, unit)); 1415 } 1416 1417 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1418 { 1419 return (unsigned long)(div_u64(bytes, unit)); 1420 } 1421 1422 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1423 u64 offset) 1424 { 1425 u64 bitmap_start; 1426 u32 bytes_per_bitmap; 1427 1428 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1429 bitmap_start = offset - ctl->start; 1430 bitmap_start = div_u64(bitmap_start, bytes_per_bitmap); 1431 bitmap_start *= bytes_per_bitmap; 1432 bitmap_start += ctl->start; 1433 1434 return bitmap_start; 1435 } 1436 1437 static int tree_insert_offset(struct rb_root *root, u64 offset, 1438 struct rb_node *node, int bitmap) 1439 { 1440 struct rb_node **p = &root->rb_node; 1441 struct rb_node *parent = NULL; 1442 struct btrfs_free_space *info; 1443 1444 while (*p) { 1445 parent = *p; 1446 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1447 1448 if (offset < info->offset) { 1449 p = &(*p)->rb_left; 1450 } else if (offset > info->offset) { 1451 p = &(*p)->rb_right; 1452 } else { 1453 /* 1454 * we could have a bitmap entry and an extent entry 1455 * share the same offset. If this is the case, we want 1456 * the extent entry to always be found first if we do a 1457 * linear search through the tree, since we want to have 1458 * the quickest allocation time, and allocating from an 1459 * extent is faster than allocating from a bitmap. So 1460 * if we're inserting a bitmap and we find an entry at 1461 * this offset, we want to go right, or after this entry 1462 * logically. If we are inserting an extent and we've 1463 * found a bitmap, we want to go left, or before 1464 * logically. 1465 */ 1466 if (bitmap) { 1467 if (info->bitmap) { 1468 WARN_ON_ONCE(1); 1469 return -EEXIST; 1470 } 1471 p = &(*p)->rb_right; 1472 } else { 1473 if (!info->bitmap) { 1474 WARN_ON_ONCE(1); 1475 return -EEXIST; 1476 } 1477 p = &(*p)->rb_left; 1478 } 1479 } 1480 } 1481 1482 rb_link_node(node, parent, p); 1483 rb_insert_color(node, root); 1484 1485 return 0; 1486 } 1487 1488 /* 1489 * searches the tree for the given offset. 1490 * 1491 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1492 * want a section that has at least bytes size and comes at or after the given 1493 * offset. 1494 */ 1495 static struct btrfs_free_space * 1496 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1497 u64 offset, int bitmap_only, int fuzzy) 1498 { 1499 struct rb_node *n = ctl->free_space_offset.rb_node; 1500 struct btrfs_free_space *entry, *prev = NULL; 1501 1502 /* find entry that is closest to the 'offset' */ 1503 while (1) { 1504 if (!n) { 1505 entry = NULL; 1506 break; 1507 } 1508 1509 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1510 prev = entry; 1511 1512 if (offset < entry->offset) 1513 n = n->rb_left; 1514 else if (offset > entry->offset) 1515 n = n->rb_right; 1516 else 1517 break; 1518 } 1519 1520 if (bitmap_only) { 1521 if (!entry) 1522 return NULL; 1523 if (entry->bitmap) 1524 return entry; 1525 1526 /* 1527 * bitmap entry and extent entry may share same offset, 1528 * in that case, bitmap entry comes after extent entry. 1529 */ 1530 n = rb_next(n); 1531 if (!n) 1532 return NULL; 1533 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1534 if (entry->offset != offset) 1535 return NULL; 1536 1537 WARN_ON(!entry->bitmap); 1538 return entry; 1539 } else if (entry) { 1540 if (entry->bitmap) { 1541 /* 1542 * if previous extent entry covers the offset, 1543 * we should return it instead of the bitmap entry 1544 */ 1545 n = rb_prev(&entry->offset_index); 1546 if (n) { 1547 prev = rb_entry(n, struct btrfs_free_space, 1548 offset_index); 1549 if (!prev->bitmap && 1550 prev->offset + prev->bytes > offset) 1551 entry = prev; 1552 } 1553 } 1554 return entry; 1555 } 1556 1557 if (!prev) 1558 return NULL; 1559 1560 /* find last entry before the 'offset' */ 1561 entry = prev; 1562 if (entry->offset > offset) { 1563 n = rb_prev(&entry->offset_index); 1564 if (n) { 1565 entry = rb_entry(n, struct btrfs_free_space, 1566 offset_index); 1567 ASSERT(entry->offset <= offset); 1568 } else { 1569 if (fuzzy) 1570 return entry; 1571 else 1572 return NULL; 1573 } 1574 } 1575 1576 if (entry->bitmap) { 1577 n = rb_prev(&entry->offset_index); 1578 if (n) { 1579 prev = rb_entry(n, struct btrfs_free_space, 1580 offset_index); 1581 if (!prev->bitmap && 1582 prev->offset + prev->bytes > offset) 1583 return prev; 1584 } 1585 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1586 return entry; 1587 } else if (entry->offset + entry->bytes > offset) 1588 return entry; 1589 1590 if (!fuzzy) 1591 return NULL; 1592 1593 while (1) { 1594 if (entry->bitmap) { 1595 if (entry->offset + BITS_PER_BITMAP * 1596 ctl->unit > offset) 1597 break; 1598 } else { 1599 if (entry->offset + entry->bytes > offset) 1600 break; 1601 } 1602 1603 n = rb_next(&entry->offset_index); 1604 if (!n) 1605 return NULL; 1606 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1607 } 1608 return entry; 1609 } 1610 1611 static inline void 1612 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1613 struct btrfs_free_space *info) 1614 { 1615 rb_erase(&info->offset_index, &ctl->free_space_offset); 1616 ctl->free_extents--; 1617 } 1618 1619 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1620 struct btrfs_free_space *info) 1621 { 1622 __unlink_free_space(ctl, info); 1623 ctl->free_space -= info->bytes; 1624 } 1625 1626 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1627 struct btrfs_free_space *info) 1628 { 1629 int ret = 0; 1630 1631 ASSERT(info->bytes || info->bitmap); 1632 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1633 &info->offset_index, (info->bitmap != NULL)); 1634 if (ret) 1635 return ret; 1636 1637 ctl->free_space += info->bytes; 1638 ctl->free_extents++; 1639 return ret; 1640 } 1641 1642 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1643 { 1644 struct btrfs_block_group_cache *block_group = ctl->private; 1645 u64 max_bytes; 1646 u64 bitmap_bytes; 1647 u64 extent_bytes; 1648 u64 size = block_group->key.offset; 1649 u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1650 u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg); 1651 1652 max_bitmaps = max_t(u32, max_bitmaps, 1); 1653 1654 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1655 1656 /* 1657 * The goal is to keep the total amount of memory used per 1gb of space 1658 * at or below 32k, so we need to adjust how much memory we allow to be 1659 * used by extent based free space tracking 1660 */ 1661 if (size < 1024 * 1024 * 1024) 1662 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1663 else 1664 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1665 div_u64(size, 1024 * 1024 * 1024); 1666 1667 /* 1668 * we want to account for 1 more bitmap than what we have so we can make 1669 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1670 * we add more bitmaps. 1671 */ 1672 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1673 1674 if (bitmap_bytes >= max_bytes) { 1675 ctl->extents_thresh = 0; 1676 return; 1677 } 1678 1679 /* 1680 * we want the extent entry threshold to always be at most 1/2 the max 1681 * bytes we can have, or whatever is less than that. 1682 */ 1683 extent_bytes = max_bytes - bitmap_bytes; 1684 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1685 1686 ctl->extents_thresh = 1687 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1688 } 1689 1690 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1691 struct btrfs_free_space *info, 1692 u64 offset, u64 bytes) 1693 { 1694 unsigned long start, count; 1695 1696 start = offset_to_bit(info->offset, ctl->unit, offset); 1697 count = bytes_to_bits(bytes, ctl->unit); 1698 ASSERT(start + count <= BITS_PER_BITMAP); 1699 1700 bitmap_clear(info->bitmap, start, count); 1701 1702 info->bytes -= bytes; 1703 } 1704 1705 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1706 struct btrfs_free_space *info, u64 offset, 1707 u64 bytes) 1708 { 1709 __bitmap_clear_bits(ctl, info, offset, bytes); 1710 ctl->free_space -= bytes; 1711 } 1712 1713 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1714 struct btrfs_free_space *info, u64 offset, 1715 u64 bytes) 1716 { 1717 unsigned long start, count; 1718 1719 start = offset_to_bit(info->offset, ctl->unit, offset); 1720 count = bytes_to_bits(bytes, ctl->unit); 1721 ASSERT(start + count <= BITS_PER_BITMAP); 1722 1723 bitmap_set(info->bitmap, start, count); 1724 1725 info->bytes += bytes; 1726 ctl->free_space += bytes; 1727 } 1728 1729 /* 1730 * If we can not find suitable extent, we will use bytes to record 1731 * the size of the max extent. 1732 */ 1733 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1734 struct btrfs_free_space *bitmap_info, u64 *offset, 1735 u64 *bytes) 1736 { 1737 unsigned long found_bits = 0; 1738 unsigned long max_bits = 0; 1739 unsigned long bits, i; 1740 unsigned long next_zero; 1741 unsigned long extent_bits; 1742 1743 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1744 max_t(u64, *offset, bitmap_info->offset)); 1745 bits = bytes_to_bits(*bytes, ctl->unit); 1746 1747 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1748 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1749 BITS_PER_BITMAP, i); 1750 extent_bits = next_zero - i; 1751 if (extent_bits >= bits) { 1752 found_bits = extent_bits; 1753 break; 1754 } else if (extent_bits > max_bits) { 1755 max_bits = extent_bits; 1756 } 1757 i = next_zero; 1758 } 1759 1760 if (found_bits) { 1761 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1762 *bytes = (u64)(found_bits) * ctl->unit; 1763 return 0; 1764 } 1765 1766 *bytes = (u64)(max_bits) * ctl->unit; 1767 return -1; 1768 } 1769 1770 /* Cache the size of the max extent in bytes */ 1771 static struct btrfs_free_space * 1772 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1773 unsigned long align, u64 *max_extent_size) 1774 { 1775 struct btrfs_free_space *entry; 1776 struct rb_node *node; 1777 u64 tmp; 1778 u64 align_off; 1779 int ret; 1780 1781 if (!ctl->free_space_offset.rb_node) 1782 goto out; 1783 1784 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1785 if (!entry) 1786 goto out; 1787 1788 for (node = &entry->offset_index; node; node = rb_next(node)) { 1789 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1790 if (entry->bytes < *bytes) { 1791 if (entry->bytes > *max_extent_size) 1792 *max_extent_size = entry->bytes; 1793 continue; 1794 } 1795 1796 /* make sure the space returned is big enough 1797 * to match our requested alignment 1798 */ 1799 if (*bytes >= align) { 1800 tmp = entry->offset - ctl->start + align - 1; 1801 tmp = div64_u64(tmp, align); 1802 tmp = tmp * align + ctl->start; 1803 align_off = tmp - entry->offset; 1804 } else { 1805 align_off = 0; 1806 tmp = entry->offset; 1807 } 1808 1809 if (entry->bytes < *bytes + align_off) { 1810 if (entry->bytes > *max_extent_size) 1811 *max_extent_size = entry->bytes; 1812 continue; 1813 } 1814 1815 if (entry->bitmap) { 1816 u64 size = *bytes; 1817 1818 ret = search_bitmap(ctl, entry, &tmp, &size); 1819 if (!ret) { 1820 *offset = tmp; 1821 *bytes = size; 1822 return entry; 1823 } else if (size > *max_extent_size) { 1824 *max_extent_size = size; 1825 } 1826 continue; 1827 } 1828 1829 *offset = tmp; 1830 *bytes = entry->bytes - align_off; 1831 return entry; 1832 } 1833 out: 1834 return NULL; 1835 } 1836 1837 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1838 struct btrfs_free_space *info, u64 offset) 1839 { 1840 info->offset = offset_to_bitmap(ctl, offset); 1841 info->bytes = 0; 1842 INIT_LIST_HEAD(&info->list); 1843 link_free_space(ctl, info); 1844 ctl->total_bitmaps++; 1845 1846 ctl->op->recalc_thresholds(ctl); 1847 } 1848 1849 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1850 struct btrfs_free_space *bitmap_info) 1851 { 1852 unlink_free_space(ctl, bitmap_info); 1853 kfree(bitmap_info->bitmap); 1854 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1855 ctl->total_bitmaps--; 1856 ctl->op->recalc_thresholds(ctl); 1857 } 1858 1859 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1860 struct btrfs_free_space *bitmap_info, 1861 u64 *offset, u64 *bytes) 1862 { 1863 u64 end; 1864 u64 search_start, search_bytes; 1865 int ret; 1866 1867 again: 1868 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1869 1870 /* 1871 * We need to search for bits in this bitmap. We could only cover some 1872 * of the extent in this bitmap thanks to how we add space, so we need 1873 * to search for as much as it as we can and clear that amount, and then 1874 * go searching for the next bit. 1875 */ 1876 search_start = *offset; 1877 search_bytes = ctl->unit; 1878 search_bytes = min(search_bytes, end - search_start + 1); 1879 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1880 if (ret < 0 || search_start != *offset) 1881 return -EINVAL; 1882 1883 /* We may have found more bits than what we need */ 1884 search_bytes = min(search_bytes, *bytes); 1885 1886 /* Cannot clear past the end of the bitmap */ 1887 search_bytes = min(search_bytes, end - search_start + 1); 1888 1889 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 1890 *offset += search_bytes; 1891 *bytes -= search_bytes; 1892 1893 if (*bytes) { 1894 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1895 if (!bitmap_info->bytes) 1896 free_bitmap(ctl, bitmap_info); 1897 1898 /* 1899 * no entry after this bitmap, but we still have bytes to 1900 * remove, so something has gone wrong. 1901 */ 1902 if (!next) 1903 return -EINVAL; 1904 1905 bitmap_info = rb_entry(next, struct btrfs_free_space, 1906 offset_index); 1907 1908 /* 1909 * if the next entry isn't a bitmap we need to return to let the 1910 * extent stuff do its work. 1911 */ 1912 if (!bitmap_info->bitmap) 1913 return -EAGAIN; 1914 1915 /* 1916 * Ok the next item is a bitmap, but it may not actually hold 1917 * the information for the rest of this free space stuff, so 1918 * look for it, and if we don't find it return so we can try 1919 * everything over again. 1920 */ 1921 search_start = *offset; 1922 search_bytes = ctl->unit; 1923 ret = search_bitmap(ctl, bitmap_info, &search_start, 1924 &search_bytes); 1925 if (ret < 0 || search_start != *offset) 1926 return -EAGAIN; 1927 1928 goto again; 1929 } else if (!bitmap_info->bytes) 1930 free_bitmap(ctl, bitmap_info); 1931 1932 return 0; 1933 } 1934 1935 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1936 struct btrfs_free_space *info, u64 offset, 1937 u64 bytes) 1938 { 1939 u64 bytes_to_set = 0; 1940 u64 end; 1941 1942 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1943 1944 bytes_to_set = min(end - offset, bytes); 1945 1946 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1947 1948 return bytes_to_set; 1949 1950 } 1951 1952 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1953 struct btrfs_free_space *info) 1954 { 1955 struct btrfs_block_group_cache *block_group = ctl->private; 1956 1957 /* 1958 * If we are below the extents threshold then we can add this as an 1959 * extent, and don't have to deal with the bitmap 1960 */ 1961 if (ctl->free_extents < ctl->extents_thresh) { 1962 /* 1963 * If this block group has some small extents we don't want to 1964 * use up all of our free slots in the cache with them, we want 1965 * to reserve them to larger extents, however if we have plent 1966 * of cache left then go ahead an dadd them, no sense in adding 1967 * the overhead of a bitmap if we don't have to. 1968 */ 1969 if (info->bytes <= block_group->sectorsize * 4) { 1970 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1971 return false; 1972 } else { 1973 return false; 1974 } 1975 } 1976 1977 /* 1978 * The original block groups from mkfs can be really small, like 8 1979 * megabytes, so don't bother with a bitmap for those entries. However 1980 * some block groups can be smaller than what a bitmap would cover but 1981 * are still large enough that they could overflow the 32k memory limit, 1982 * so allow those block groups to still be allowed to have a bitmap 1983 * entry. 1984 */ 1985 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) 1986 return false; 1987 1988 return true; 1989 } 1990 1991 static struct btrfs_free_space_op free_space_op = { 1992 .recalc_thresholds = recalculate_thresholds, 1993 .use_bitmap = use_bitmap, 1994 }; 1995 1996 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1997 struct btrfs_free_space *info) 1998 { 1999 struct btrfs_free_space *bitmap_info; 2000 struct btrfs_block_group_cache *block_group = NULL; 2001 int added = 0; 2002 u64 bytes, offset, bytes_added; 2003 int ret; 2004 2005 bytes = info->bytes; 2006 offset = info->offset; 2007 2008 if (!ctl->op->use_bitmap(ctl, info)) 2009 return 0; 2010 2011 if (ctl->op == &free_space_op) 2012 block_group = ctl->private; 2013 again: 2014 /* 2015 * Since we link bitmaps right into the cluster we need to see if we 2016 * have a cluster here, and if so and it has our bitmap we need to add 2017 * the free space to that bitmap. 2018 */ 2019 if (block_group && !list_empty(&block_group->cluster_list)) { 2020 struct btrfs_free_cluster *cluster; 2021 struct rb_node *node; 2022 struct btrfs_free_space *entry; 2023 2024 cluster = list_entry(block_group->cluster_list.next, 2025 struct btrfs_free_cluster, 2026 block_group_list); 2027 spin_lock(&cluster->lock); 2028 node = rb_first(&cluster->root); 2029 if (!node) { 2030 spin_unlock(&cluster->lock); 2031 goto no_cluster_bitmap; 2032 } 2033 2034 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2035 if (!entry->bitmap) { 2036 spin_unlock(&cluster->lock); 2037 goto no_cluster_bitmap; 2038 } 2039 2040 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2041 bytes_added = add_bytes_to_bitmap(ctl, entry, 2042 offset, bytes); 2043 bytes -= bytes_added; 2044 offset += bytes_added; 2045 } 2046 spin_unlock(&cluster->lock); 2047 if (!bytes) { 2048 ret = 1; 2049 goto out; 2050 } 2051 } 2052 2053 no_cluster_bitmap: 2054 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2055 1, 0); 2056 if (!bitmap_info) { 2057 ASSERT(added == 0); 2058 goto new_bitmap; 2059 } 2060 2061 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 2062 bytes -= bytes_added; 2063 offset += bytes_added; 2064 added = 0; 2065 2066 if (!bytes) { 2067 ret = 1; 2068 goto out; 2069 } else 2070 goto again; 2071 2072 new_bitmap: 2073 if (info && info->bitmap) { 2074 add_new_bitmap(ctl, info, offset); 2075 added = 1; 2076 info = NULL; 2077 goto again; 2078 } else { 2079 spin_unlock(&ctl->tree_lock); 2080 2081 /* no pre-allocated info, allocate a new one */ 2082 if (!info) { 2083 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2084 GFP_NOFS); 2085 if (!info) { 2086 spin_lock(&ctl->tree_lock); 2087 ret = -ENOMEM; 2088 goto out; 2089 } 2090 } 2091 2092 /* allocate the bitmap */ 2093 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 2094 spin_lock(&ctl->tree_lock); 2095 if (!info->bitmap) { 2096 ret = -ENOMEM; 2097 goto out; 2098 } 2099 goto again; 2100 } 2101 2102 out: 2103 if (info) { 2104 if (info->bitmap) 2105 kfree(info->bitmap); 2106 kmem_cache_free(btrfs_free_space_cachep, info); 2107 } 2108 2109 return ret; 2110 } 2111 2112 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2113 struct btrfs_free_space *info, bool update_stat) 2114 { 2115 struct btrfs_free_space *left_info; 2116 struct btrfs_free_space *right_info; 2117 bool merged = false; 2118 u64 offset = info->offset; 2119 u64 bytes = info->bytes; 2120 2121 /* 2122 * first we want to see if there is free space adjacent to the range we 2123 * are adding, if there is remove that struct and add a new one to 2124 * cover the entire range 2125 */ 2126 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2127 if (right_info && rb_prev(&right_info->offset_index)) 2128 left_info = rb_entry(rb_prev(&right_info->offset_index), 2129 struct btrfs_free_space, offset_index); 2130 else 2131 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2132 2133 if (right_info && !right_info->bitmap) { 2134 if (update_stat) 2135 unlink_free_space(ctl, right_info); 2136 else 2137 __unlink_free_space(ctl, right_info); 2138 info->bytes += right_info->bytes; 2139 kmem_cache_free(btrfs_free_space_cachep, right_info); 2140 merged = true; 2141 } 2142 2143 if (left_info && !left_info->bitmap && 2144 left_info->offset + left_info->bytes == offset) { 2145 if (update_stat) 2146 unlink_free_space(ctl, left_info); 2147 else 2148 __unlink_free_space(ctl, left_info); 2149 info->offset = left_info->offset; 2150 info->bytes += left_info->bytes; 2151 kmem_cache_free(btrfs_free_space_cachep, left_info); 2152 merged = true; 2153 } 2154 2155 return merged; 2156 } 2157 2158 static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2159 struct btrfs_free_space *info, 2160 bool update_stat) 2161 { 2162 struct btrfs_free_space *bitmap; 2163 unsigned long i; 2164 unsigned long j; 2165 const u64 end = info->offset + info->bytes; 2166 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2167 u64 bytes; 2168 2169 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2170 if (!bitmap) 2171 return false; 2172 2173 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2174 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2175 if (j == i) 2176 return false; 2177 bytes = (j - i) * ctl->unit; 2178 info->bytes += bytes; 2179 2180 if (update_stat) 2181 bitmap_clear_bits(ctl, bitmap, end, bytes); 2182 else 2183 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2184 2185 if (!bitmap->bytes) 2186 free_bitmap(ctl, bitmap); 2187 2188 return true; 2189 } 2190 2191 static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2192 struct btrfs_free_space *info, 2193 bool update_stat) 2194 { 2195 struct btrfs_free_space *bitmap; 2196 u64 bitmap_offset; 2197 unsigned long i; 2198 unsigned long j; 2199 unsigned long prev_j; 2200 u64 bytes; 2201 2202 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2203 /* If we're on a boundary, try the previous logical bitmap. */ 2204 if (bitmap_offset == info->offset) { 2205 if (info->offset == 0) 2206 return false; 2207 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2208 } 2209 2210 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2211 if (!bitmap) 2212 return false; 2213 2214 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2215 j = 0; 2216 prev_j = (unsigned long)-1; 2217 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2218 if (j > i) 2219 break; 2220 prev_j = j; 2221 } 2222 if (prev_j == i) 2223 return false; 2224 2225 if (prev_j == (unsigned long)-1) 2226 bytes = (i + 1) * ctl->unit; 2227 else 2228 bytes = (i - prev_j) * ctl->unit; 2229 2230 info->offset -= bytes; 2231 info->bytes += bytes; 2232 2233 if (update_stat) 2234 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2235 else 2236 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2237 2238 if (!bitmap->bytes) 2239 free_bitmap(ctl, bitmap); 2240 2241 return true; 2242 } 2243 2244 /* 2245 * We prefer always to allocate from extent entries, both for clustered and 2246 * non-clustered allocation requests. So when attempting to add a new extent 2247 * entry, try to see if there's adjacent free space in bitmap entries, and if 2248 * there is, migrate that space from the bitmaps to the extent. 2249 * Like this we get better chances of satisfying space allocation requests 2250 * because we attempt to satisfy them based on a single cache entry, and never 2251 * on 2 or more entries - even if the entries represent a contiguous free space 2252 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2253 * ends). 2254 */ 2255 static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2256 struct btrfs_free_space *info, 2257 bool update_stat) 2258 { 2259 /* 2260 * Only work with disconnected entries, as we can change their offset, 2261 * and must be extent entries. 2262 */ 2263 ASSERT(!info->bitmap); 2264 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2265 2266 if (ctl->total_bitmaps > 0) { 2267 bool stole_end; 2268 bool stole_front = false; 2269 2270 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2271 if (ctl->total_bitmaps > 0) 2272 stole_front = steal_from_bitmap_to_front(ctl, info, 2273 update_stat); 2274 2275 if (stole_end || stole_front) 2276 try_merge_free_space(ctl, info, update_stat); 2277 } 2278 } 2279 2280 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 2281 u64 offset, u64 bytes) 2282 { 2283 struct btrfs_free_space *info; 2284 int ret = 0; 2285 2286 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2287 if (!info) 2288 return -ENOMEM; 2289 2290 info->offset = offset; 2291 info->bytes = bytes; 2292 RB_CLEAR_NODE(&info->offset_index); 2293 2294 spin_lock(&ctl->tree_lock); 2295 2296 if (try_merge_free_space(ctl, info, true)) 2297 goto link; 2298 2299 /* 2300 * There was no extent directly to the left or right of this new 2301 * extent then we know we're going to have to allocate a new extent, so 2302 * before we do that see if we need to drop this into a bitmap 2303 */ 2304 ret = insert_into_bitmap(ctl, info); 2305 if (ret < 0) { 2306 goto out; 2307 } else if (ret) { 2308 ret = 0; 2309 goto out; 2310 } 2311 link: 2312 /* 2313 * Only steal free space from adjacent bitmaps if we're sure we're not 2314 * going to add the new free space to existing bitmap entries - because 2315 * that would mean unnecessary work that would be reverted. Therefore 2316 * attempt to steal space from bitmaps if we're adding an extent entry. 2317 */ 2318 steal_from_bitmap(ctl, info, true); 2319 2320 ret = link_free_space(ctl, info); 2321 if (ret) 2322 kmem_cache_free(btrfs_free_space_cachep, info); 2323 out: 2324 spin_unlock(&ctl->tree_lock); 2325 2326 if (ret) { 2327 printk(KERN_CRIT "BTRFS: unable to add free space :%d\n", ret); 2328 ASSERT(ret != -EEXIST); 2329 } 2330 2331 return ret; 2332 } 2333 2334 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 2335 u64 offset, u64 bytes) 2336 { 2337 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2338 struct btrfs_free_space *info; 2339 int ret; 2340 bool re_search = false; 2341 2342 spin_lock(&ctl->tree_lock); 2343 2344 again: 2345 ret = 0; 2346 if (!bytes) 2347 goto out_lock; 2348 2349 info = tree_search_offset(ctl, offset, 0, 0); 2350 if (!info) { 2351 /* 2352 * oops didn't find an extent that matched the space we wanted 2353 * to remove, look for a bitmap instead 2354 */ 2355 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2356 1, 0); 2357 if (!info) { 2358 /* 2359 * If we found a partial bit of our free space in a 2360 * bitmap but then couldn't find the other part this may 2361 * be a problem, so WARN about it. 2362 */ 2363 WARN_ON(re_search); 2364 goto out_lock; 2365 } 2366 } 2367 2368 re_search = false; 2369 if (!info->bitmap) { 2370 unlink_free_space(ctl, info); 2371 if (offset == info->offset) { 2372 u64 to_free = min(bytes, info->bytes); 2373 2374 info->bytes -= to_free; 2375 info->offset += to_free; 2376 if (info->bytes) { 2377 ret = link_free_space(ctl, info); 2378 WARN_ON(ret); 2379 } else { 2380 kmem_cache_free(btrfs_free_space_cachep, info); 2381 } 2382 2383 offset += to_free; 2384 bytes -= to_free; 2385 goto again; 2386 } else { 2387 u64 old_end = info->bytes + info->offset; 2388 2389 info->bytes = offset - info->offset; 2390 ret = link_free_space(ctl, info); 2391 WARN_ON(ret); 2392 if (ret) 2393 goto out_lock; 2394 2395 /* Not enough bytes in this entry to satisfy us */ 2396 if (old_end < offset + bytes) { 2397 bytes -= old_end - offset; 2398 offset = old_end; 2399 goto again; 2400 } else if (old_end == offset + bytes) { 2401 /* all done */ 2402 goto out_lock; 2403 } 2404 spin_unlock(&ctl->tree_lock); 2405 2406 ret = btrfs_add_free_space(block_group, offset + bytes, 2407 old_end - (offset + bytes)); 2408 WARN_ON(ret); 2409 goto out; 2410 } 2411 } 2412 2413 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2414 if (ret == -EAGAIN) { 2415 re_search = true; 2416 goto again; 2417 } 2418 out_lock: 2419 spin_unlock(&ctl->tree_lock); 2420 out: 2421 return ret; 2422 } 2423 2424 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 2425 u64 bytes) 2426 { 2427 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2428 struct btrfs_free_space *info; 2429 struct rb_node *n; 2430 int count = 0; 2431 2432 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2433 info = rb_entry(n, struct btrfs_free_space, offset_index); 2434 if (info->bytes >= bytes && !block_group->ro) 2435 count++; 2436 btrfs_crit(block_group->fs_info, 2437 "entry offset %llu, bytes %llu, bitmap %s", 2438 info->offset, info->bytes, 2439 (info->bitmap) ? "yes" : "no"); 2440 } 2441 btrfs_info(block_group->fs_info, "block group has cluster?: %s", 2442 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2443 btrfs_info(block_group->fs_info, 2444 "%d blocks of free space at or bigger than bytes is", count); 2445 } 2446 2447 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 2448 { 2449 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2450 2451 spin_lock_init(&ctl->tree_lock); 2452 ctl->unit = block_group->sectorsize; 2453 ctl->start = block_group->key.objectid; 2454 ctl->private = block_group; 2455 ctl->op = &free_space_op; 2456 INIT_LIST_HEAD(&ctl->trimming_ranges); 2457 mutex_init(&ctl->cache_writeout_mutex); 2458 2459 /* 2460 * we only want to have 32k of ram per block group for keeping 2461 * track of free space, and if we pass 1/2 of that we want to 2462 * start converting things over to using bitmaps 2463 */ 2464 ctl->extents_thresh = ((1024 * 32) / 2) / 2465 sizeof(struct btrfs_free_space); 2466 } 2467 2468 /* 2469 * for a given cluster, put all of its extents back into the free 2470 * space cache. If the block group passed doesn't match the block group 2471 * pointed to by the cluster, someone else raced in and freed the 2472 * cluster already. In that case, we just return without changing anything 2473 */ 2474 static int 2475 __btrfs_return_cluster_to_free_space( 2476 struct btrfs_block_group_cache *block_group, 2477 struct btrfs_free_cluster *cluster) 2478 { 2479 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2480 struct btrfs_free_space *entry; 2481 struct rb_node *node; 2482 2483 spin_lock(&cluster->lock); 2484 if (cluster->block_group != block_group) 2485 goto out; 2486 2487 cluster->block_group = NULL; 2488 cluster->window_start = 0; 2489 list_del_init(&cluster->block_group_list); 2490 2491 node = rb_first(&cluster->root); 2492 while (node) { 2493 bool bitmap; 2494 2495 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2496 node = rb_next(&entry->offset_index); 2497 rb_erase(&entry->offset_index, &cluster->root); 2498 RB_CLEAR_NODE(&entry->offset_index); 2499 2500 bitmap = (entry->bitmap != NULL); 2501 if (!bitmap) { 2502 try_merge_free_space(ctl, entry, false); 2503 steal_from_bitmap(ctl, entry, false); 2504 } 2505 tree_insert_offset(&ctl->free_space_offset, 2506 entry->offset, &entry->offset_index, bitmap); 2507 } 2508 cluster->root = RB_ROOT; 2509 2510 out: 2511 spin_unlock(&cluster->lock); 2512 btrfs_put_block_group(block_group); 2513 return 0; 2514 } 2515 2516 static void __btrfs_remove_free_space_cache_locked( 2517 struct btrfs_free_space_ctl *ctl) 2518 { 2519 struct btrfs_free_space *info; 2520 struct rb_node *node; 2521 2522 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2523 info = rb_entry(node, struct btrfs_free_space, offset_index); 2524 if (!info->bitmap) { 2525 unlink_free_space(ctl, info); 2526 kmem_cache_free(btrfs_free_space_cachep, info); 2527 } else { 2528 free_bitmap(ctl, info); 2529 } 2530 2531 cond_resched_lock(&ctl->tree_lock); 2532 } 2533 } 2534 2535 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2536 { 2537 spin_lock(&ctl->tree_lock); 2538 __btrfs_remove_free_space_cache_locked(ctl); 2539 spin_unlock(&ctl->tree_lock); 2540 } 2541 2542 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2543 { 2544 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2545 struct btrfs_free_cluster *cluster; 2546 struct list_head *head; 2547 2548 spin_lock(&ctl->tree_lock); 2549 while ((head = block_group->cluster_list.next) != 2550 &block_group->cluster_list) { 2551 cluster = list_entry(head, struct btrfs_free_cluster, 2552 block_group_list); 2553 2554 WARN_ON(cluster->block_group != block_group); 2555 __btrfs_return_cluster_to_free_space(block_group, cluster); 2556 2557 cond_resched_lock(&ctl->tree_lock); 2558 } 2559 __btrfs_remove_free_space_cache_locked(ctl); 2560 spin_unlock(&ctl->tree_lock); 2561 2562 } 2563 2564 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2565 u64 offset, u64 bytes, u64 empty_size, 2566 u64 *max_extent_size) 2567 { 2568 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2569 struct btrfs_free_space *entry = NULL; 2570 u64 bytes_search = bytes + empty_size; 2571 u64 ret = 0; 2572 u64 align_gap = 0; 2573 u64 align_gap_len = 0; 2574 2575 spin_lock(&ctl->tree_lock); 2576 entry = find_free_space(ctl, &offset, &bytes_search, 2577 block_group->full_stripe_len, max_extent_size); 2578 if (!entry) 2579 goto out; 2580 2581 ret = offset; 2582 if (entry->bitmap) { 2583 bitmap_clear_bits(ctl, entry, offset, bytes); 2584 if (!entry->bytes) 2585 free_bitmap(ctl, entry); 2586 } else { 2587 unlink_free_space(ctl, entry); 2588 align_gap_len = offset - entry->offset; 2589 align_gap = entry->offset; 2590 2591 entry->offset = offset + bytes; 2592 WARN_ON(entry->bytes < bytes + align_gap_len); 2593 2594 entry->bytes -= bytes + align_gap_len; 2595 if (!entry->bytes) 2596 kmem_cache_free(btrfs_free_space_cachep, entry); 2597 else 2598 link_free_space(ctl, entry); 2599 } 2600 out: 2601 spin_unlock(&ctl->tree_lock); 2602 2603 if (align_gap_len) 2604 __btrfs_add_free_space(ctl, align_gap, align_gap_len); 2605 return ret; 2606 } 2607 2608 /* 2609 * given a cluster, put all of its extents back into the free space 2610 * cache. If a block group is passed, this function will only free 2611 * a cluster that belongs to the passed block group. 2612 * 2613 * Otherwise, it'll get a reference on the block group pointed to by the 2614 * cluster and remove the cluster from it. 2615 */ 2616 int btrfs_return_cluster_to_free_space( 2617 struct btrfs_block_group_cache *block_group, 2618 struct btrfs_free_cluster *cluster) 2619 { 2620 struct btrfs_free_space_ctl *ctl; 2621 int ret; 2622 2623 /* first, get a safe pointer to the block group */ 2624 spin_lock(&cluster->lock); 2625 if (!block_group) { 2626 block_group = cluster->block_group; 2627 if (!block_group) { 2628 spin_unlock(&cluster->lock); 2629 return 0; 2630 } 2631 } else if (cluster->block_group != block_group) { 2632 /* someone else has already freed it don't redo their work */ 2633 spin_unlock(&cluster->lock); 2634 return 0; 2635 } 2636 atomic_inc(&block_group->count); 2637 spin_unlock(&cluster->lock); 2638 2639 ctl = block_group->free_space_ctl; 2640 2641 /* now return any extents the cluster had on it */ 2642 spin_lock(&ctl->tree_lock); 2643 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2644 spin_unlock(&ctl->tree_lock); 2645 2646 /* finally drop our ref */ 2647 btrfs_put_block_group(block_group); 2648 return ret; 2649 } 2650 2651 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2652 struct btrfs_free_cluster *cluster, 2653 struct btrfs_free_space *entry, 2654 u64 bytes, u64 min_start, 2655 u64 *max_extent_size) 2656 { 2657 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2658 int err; 2659 u64 search_start = cluster->window_start; 2660 u64 search_bytes = bytes; 2661 u64 ret = 0; 2662 2663 search_start = min_start; 2664 search_bytes = bytes; 2665 2666 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2667 if (err) { 2668 if (search_bytes > *max_extent_size) 2669 *max_extent_size = search_bytes; 2670 return 0; 2671 } 2672 2673 ret = search_start; 2674 __bitmap_clear_bits(ctl, entry, ret, bytes); 2675 2676 return ret; 2677 } 2678 2679 /* 2680 * given a cluster, try to allocate 'bytes' from it, returns 0 2681 * if it couldn't find anything suitably large, or a logical disk offset 2682 * if things worked out 2683 */ 2684 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2685 struct btrfs_free_cluster *cluster, u64 bytes, 2686 u64 min_start, u64 *max_extent_size) 2687 { 2688 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2689 struct btrfs_free_space *entry = NULL; 2690 struct rb_node *node; 2691 u64 ret = 0; 2692 2693 spin_lock(&cluster->lock); 2694 if (bytes > cluster->max_size) 2695 goto out; 2696 2697 if (cluster->block_group != block_group) 2698 goto out; 2699 2700 node = rb_first(&cluster->root); 2701 if (!node) 2702 goto out; 2703 2704 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2705 while (1) { 2706 if (entry->bytes < bytes && entry->bytes > *max_extent_size) 2707 *max_extent_size = entry->bytes; 2708 2709 if (entry->bytes < bytes || 2710 (!entry->bitmap && entry->offset < min_start)) { 2711 node = rb_next(&entry->offset_index); 2712 if (!node) 2713 break; 2714 entry = rb_entry(node, struct btrfs_free_space, 2715 offset_index); 2716 continue; 2717 } 2718 2719 if (entry->bitmap) { 2720 ret = btrfs_alloc_from_bitmap(block_group, 2721 cluster, entry, bytes, 2722 cluster->window_start, 2723 max_extent_size); 2724 if (ret == 0) { 2725 node = rb_next(&entry->offset_index); 2726 if (!node) 2727 break; 2728 entry = rb_entry(node, struct btrfs_free_space, 2729 offset_index); 2730 continue; 2731 } 2732 cluster->window_start += bytes; 2733 } else { 2734 ret = entry->offset; 2735 2736 entry->offset += bytes; 2737 entry->bytes -= bytes; 2738 } 2739 2740 if (entry->bytes == 0) 2741 rb_erase(&entry->offset_index, &cluster->root); 2742 break; 2743 } 2744 out: 2745 spin_unlock(&cluster->lock); 2746 2747 if (!ret) 2748 return 0; 2749 2750 spin_lock(&ctl->tree_lock); 2751 2752 ctl->free_space -= bytes; 2753 if (entry->bytes == 0) { 2754 ctl->free_extents--; 2755 if (entry->bitmap) { 2756 kfree(entry->bitmap); 2757 ctl->total_bitmaps--; 2758 ctl->op->recalc_thresholds(ctl); 2759 } 2760 kmem_cache_free(btrfs_free_space_cachep, entry); 2761 } 2762 2763 spin_unlock(&ctl->tree_lock); 2764 2765 return ret; 2766 } 2767 2768 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2769 struct btrfs_free_space *entry, 2770 struct btrfs_free_cluster *cluster, 2771 u64 offset, u64 bytes, 2772 u64 cont1_bytes, u64 min_bytes) 2773 { 2774 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2775 unsigned long next_zero; 2776 unsigned long i; 2777 unsigned long want_bits; 2778 unsigned long min_bits; 2779 unsigned long found_bits; 2780 unsigned long start = 0; 2781 unsigned long total_found = 0; 2782 int ret; 2783 2784 i = offset_to_bit(entry->offset, ctl->unit, 2785 max_t(u64, offset, entry->offset)); 2786 want_bits = bytes_to_bits(bytes, ctl->unit); 2787 min_bits = bytes_to_bits(min_bytes, ctl->unit); 2788 2789 again: 2790 found_bits = 0; 2791 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 2792 next_zero = find_next_zero_bit(entry->bitmap, 2793 BITS_PER_BITMAP, i); 2794 if (next_zero - i >= min_bits) { 2795 found_bits = next_zero - i; 2796 break; 2797 } 2798 i = next_zero; 2799 } 2800 2801 if (!found_bits) 2802 return -ENOSPC; 2803 2804 if (!total_found) { 2805 start = i; 2806 cluster->max_size = 0; 2807 } 2808 2809 total_found += found_bits; 2810 2811 if (cluster->max_size < found_bits * ctl->unit) 2812 cluster->max_size = found_bits * ctl->unit; 2813 2814 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2815 i = next_zero + 1; 2816 goto again; 2817 } 2818 2819 cluster->window_start = start * ctl->unit + entry->offset; 2820 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2821 ret = tree_insert_offset(&cluster->root, entry->offset, 2822 &entry->offset_index, 1); 2823 ASSERT(!ret); /* -EEXIST; Logic error */ 2824 2825 trace_btrfs_setup_cluster(block_group, cluster, 2826 total_found * ctl->unit, 1); 2827 return 0; 2828 } 2829 2830 /* 2831 * This searches the block group for just extents to fill the cluster with. 2832 * Try to find a cluster with at least bytes total bytes, at least one 2833 * extent of cont1_bytes, and other clusters of at least min_bytes. 2834 */ 2835 static noinline int 2836 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2837 struct btrfs_free_cluster *cluster, 2838 struct list_head *bitmaps, u64 offset, u64 bytes, 2839 u64 cont1_bytes, u64 min_bytes) 2840 { 2841 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2842 struct btrfs_free_space *first = NULL; 2843 struct btrfs_free_space *entry = NULL; 2844 struct btrfs_free_space *last; 2845 struct rb_node *node; 2846 u64 window_free; 2847 u64 max_extent; 2848 u64 total_size = 0; 2849 2850 entry = tree_search_offset(ctl, offset, 0, 1); 2851 if (!entry) 2852 return -ENOSPC; 2853 2854 /* 2855 * We don't want bitmaps, so just move along until we find a normal 2856 * extent entry. 2857 */ 2858 while (entry->bitmap || entry->bytes < min_bytes) { 2859 if (entry->bitmap && list_empty(&entry->list)) 2860 list_add_tail(&entry->list, bitmaps); 2861 node = rb_next(&entry->offset_index); 2862 if (!node) 2863 return -ENOSPC; 2864 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2865 } 2866 2867 window_free = entry->bytes; 2868 max_extent = entry->bytes; 2869 first = entry; 2870 last = entry; 2871 2872 for (node = rb_next(&entry->offset_index); node; 2873 node = rb_next(&entry->offset_index)) { 2874 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2875 2876 if (entry->bitmap) { 2877 if (list_empty(&entry->list)) 2878 list_add_tail(&entry->list, bitmaps); 2879 continue; 2880 } 2881 2882 if (entry->bytes < min_bytes) 2883 continue; 2884 2885 last = entry; 2886 window_free += entry->bytes; 2887 if (entry->bytes > max_extent) 2888 max_extent = entry->bytes; 2889 } 2890 2891 if (window_free < bytes || max_extent < cont1_bytes) 2892 return -ENOSPC; 2893 2894 cluster->window_start = first->offset; 2895 2896 node = &first->offset_index; 2897 2898 /* 2899 * now we've found our entries, pull them out of the free space 2900 * cache and put them into the cluster rbtree 2901 */ 2902 do { 2903 int ret; 2904 2905 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2906 node = rb_next(&entry->offset_index); 2907 if (entry->bitmap || entry->bytes < min_bytes) 2908 continue; 2909 2910 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2911 ret = tree_insert_offset(&cluster->root, entry->offset, 2912 &entry->offset_index, 0); 2913 total_size += entry->bytes; 2914 ASSERT(!ret); /* -EEXIST; Logic error */ 2915 } while (node && entry != last); 2916 2917 cluster->max_size = max_extent; 2918 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2919 return 0; 2920 } 2921 2922 /* 2923 * This specifically looks for bitmaps that may work in the cluster, we assume 2924 * that we have already failed to find extents that will work. 2925 */ 2926 static noinline int 2927 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2928 struct btrfs_free_cluster *cluster, 2929 struct list_head *bitmaps, u64 offset, u64 bytes, 2930 u64 cont1_bytes, u64 min_bytes) 2931 { 2932 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2933 struct btrfs_free_space *entry; 2934 int ret = -ENOSPC; 2935 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2936 2937 if (ctl->total_bitmaps == 0) 2938 return -ENOSPC; 2939 2940 /* 2941 * The bitmap that covers offset won't be in the list unless offset 2942 * is just its start offset. 2943 */ 2944 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2945 if (entry->offset != bitmap_offset) { 2946 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2947 if (entry && list_empty(&entry->list)) 2948 list_add(&entry->list, bitmaps); 2949 } 2950 2951 list_for_each_entry(entry, bitmaps, list) { 2952 if (entry->bytes < bytes) 2953 continue; 2954 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2955 bytes, cont1_bytes, min_bytes); 2956 if (!ret) 2957 return 0; 2958 } 2959 2960 /* 2961 * The bitmaps list has all the bitmaps that record free space 2962 * starting after offset, so no more search is required. 2963 */ 2964 return -ENOSPC; 2965 } 2966 2967 /* 2968 * here we try to find a cluster of blocks in a block group. The goal 2969 * is to find at least bytes+empty_size. 2970 * We might not find them all in one contiguous area. 2971 * 2972 * returns zero and sets up cluster if things worked out, otherwise 2973 * it returns -enospc 2974 */ 2975 int btrfs_find_space_cluster(struct btrfs_root *root, 2976 struct btrfs_block_group_cache *block_group, 2977 struct btrfs_free_cluster *cluster, 2978 u64 offset, u64 bytes, u64 empty_size) 2979 { 2980 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2981 struct btrfs_free_space *entry, *tmp; 2982 LIST_HEAD(bitmaps); 2983 u64 min_bytes; 2984 u64 cont1_bytes; 2985 int ret; 2986 2987 /* 2988 * Choose the minimum extent size we'll require for this 2989 * cluster. For SSD_SPREAD, don't allow any fragmentation. 2990 * For metadata, allow allocates with smaller extents. For 2991 * data, keep it dense. 2992 */ 2993 if (btrfs_test_opt(root, SSD_SPREAD)) { 2994 cont1_bytes = min_bytes = bytes + empty_size; 2995 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2996 cont1_bytes = bytes; 2997 min_bytes = block_group->sectorsize; 2998 } else { 2999 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3000 min_bytes = block_group->sectorsize; 3001 } 3002 3003 spin_lock(&ctl->tree_lock); 3004 3005 /* 3006 * If we know we don't have enough space to make a cluster don't even 3007 * bother doing all the work to try and find one. 3008 */ 3009 if (ctl->free_space < bytes) { 3010 spin_unlock(&ctl->tree_lock); 3011 return -ENOSPC; 3012 } 3013 3014 spin_lock(&cluster->lock); 3015 3016 /* someone already found a cluster, hooray */ 3017 if (cluster->block_group) { 3018 ret = 0; 3019 goto out; 3020 } 3021 3022 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3023 min_bytes); 3024 3025 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3026 bytes + empty_size, 3027 cont1_bytes, min_bytes); 3028 if (ret) 3029 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3030 offset, bytes + empty_size, 3031 cont1_bytes, min_bytes); 3032 3033 /* Clear our temporary list */ 3034 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3035 list_del_init(&entry->list); 3036 3037 if (!ret) { 3038 atomic_inc(&block_group->count); 3039 list_add_tail(&cluster->block_group_list, 3040 &block_group->cluster_list); 3041 cluster->block_group = block_group; 3042 } else { 3043 trace_btrfs_failed_cluster_setup(block_group); 3044 } 3045 out: 3046 spin_unlock(&cluster->lock); 3047 spin_unlock(&ctl->tree_lock); 3048 3049 return ret; 3050 } 3051 3052 /* 3053 * simple code to zero out a cluster 3054 */ 3055 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3056 { 3057 spin_lock_init(&cluster->lock); 3058 spin_lock_init(&cluster->refill_lock); 3059 cluster->root = RB_ROOT; 3060 cluster->max_size = 0; 3061 INIT_LIST_HEAD(&cluster->block_group_list); 3062 cluster->block_group = NULL; 3063 } 3064 3065 static int do_trimming(struct btrfs_block_group_cache *block_group, 3066 u64 *total_trimmed, u64 start, u64 bytes, 3067 u64 reserved_start, u64 reserved_bytes, 3068 struct btrfs_trim_range *trim_entry) 3069 { 3070 struct btrfs_space_info *space_info = block_group->space_info; 3071 struct btrfs_fs_info *fs_info = block_group->fs_info; 3072 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3073 int ret; 3074 int update = 0; 3075 u64 trimmed = 0; 3076 3077 spin_lock(&space_info->lock); 3078 spin_lock(&block_group->lock); 3079 if (!block_group->ro) { 3080 block_group->reserved += reserved_bytes; 3081 space_info->bytes_reserved += reserved_bytes; 3082 update = 1; 3083 } 3084 spin_unlock(&block_group->lock); 3085 spin_unlock(&space_info->lock); 3086 3087 ret = btrfs_discard_extent(fs_info->extent_root, 3088 start, bytes, &trimmed); 3089 if (!ret) 3090 *total_trimmed += trimmed; 3091 3092 mutex_lock(&ctl->cache_writeout_mutex); 3093 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 3094 list_del(&trim_entry->list); 3095 mutex_unlock(&ctl->cache_writeout_mutex); 3096 3097 if (update) { 3098 spin_lock(&space_info->lock); 3099 spin_lock(&block_group->lock); 3100 if (block_group->ro) 3101 space_info->bytes_readonly += reserved_bytes; 3102 block_group->reserved -= reserved_bytes; 3103 space_info->bytes_reserved -= reserved_bytes; 3104 spin_unlock(&space_info->lock); 3105 spin_unlock(&block_group->lock); 3106 } 3107 3108 return ret; 3109 } 3110 3111 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 3112 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3113 { 3114 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3115 struct btrfs_free_space *entry; 3116 struct rb_node *node; 3117 int ret = 0; 3118 u64 extent_start; 3119 u64 extent_bytes; 3120 u64 bytes; 3121 3122 while (start < end) { 3123 struct btrfs_trim_range trim_entry; 3124 3125 mutex_lock(&ctl->cache_writeout_mutex); 3126 spin_lock(&ctl->tree_lock); 3127 3128 if (ctl->free_space < minlen) { 3129 spin_unlock(&ctl->tree_lock); 3130 mutex_unlock(&ctl->cache_writeout_mutex); 3131 break; 3132 } 3133 3134 entry = tree_search_offset(ctl, start, 0, 1); 3135 if (!entry) { 3136 spin_unlock(&ctl->tree_lock); 3137 mutex_unlock(&ctl->cache_writeout_mutex); 3138 break; 3139 } 3140 3141 /* skip bitmaps */ 3142 while (entry->bitmap) { 3143 node = rb_next(&entry->offset_index); 3144 if (!node) { 3145 spin_unlock(&ctl->tree_lock); 3146 mutex_unlock(&ctl->cache_writeout_mutex); 3147 goto out; 3148 } 3149 entry = rb_entry(node, struct btrfs_free_space, 3150 offset_index); 3151 } 3152 3153 if (entry->offset >= end) { 3154 spin_unlock(&ctl->tree_lock); 3155 mutex_unlock(&ctl->cache_writeout_mutex); 3156 break; 3157 } 3158 3159 extent_start = entry->offset; 3160 extent_bytes = entry->bytes; 3161 start = max(start, extent_start); 3162 bytes = min(extent_start + extent_bytes, end) - start; 3163 if (bytes < minlen) { 3164 spin_unlock(&ctl->tree_lock); 3165 mutex_unlock(&ctl->cache_writeout_mutex); 3166 goto next; 3167 } 3168 3169 unlink_free_space(ctl, entry); 3170 kmem_cache_free(btrfs_free_space_cachep, entry); 3171 3172 spin_unlock(&ctl->tree_lock); 3173 trim_entry.start = extent_start; 3174 trim_entry.bytes = extent_bytes; 3175 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3176 mutex_unlock(&ctl->cache_writeout_mutex); 3177 3178 ret = do_trimming(block_group, total_trimmed, start, bytes, 3179 extent_start, extent_bytes, &trim_entry); 3180 if (ret) 3181 break; 3182 next: 3183 start += bytes; 3184 3185 if (fatal_signal_pending(current)) { 3186 ret = -ERESTARTSYS; 3187 break; 3188 } 3189 3190 cond_resched(); 3191 } 3192 out: 3193 return ret; 3194 } 3195 3196 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 3197 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 3198 { 3199 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3200 struct btrfs_free_space *entry; 3201 int ret = 0; 3202 int ret2; 3203 u64 bytes; 3204 u64 offset = offset_to_bitmap(ctl, start); 3205 3206 while (offset < end) { 3207 bool next_bitmap = false; 3208 struct btrfs_trim_range trim_entry; 3209 3210 mutex_lock(&ctl->cache_writeout_mutex); 3211 spin_lock(&ctl->tree_lock); 3212 3213 if (ctl->free_space < minlen) { 3214 spin_unlock(&ctl->tree_lock); 3215 mutex_unlock(&ctl->cache_writeout_mutex); 3216 break; 3217 } 3218 3219 entry = tree_search_offset(ctl, offset, 1, 0); 3220 if (!entry) { 3221 spin_unlock(&ctl->tree_lock); 3222 mutex_unlock(&ctl->cache_writeout_mutex); 3223 next_bitmap = true; 3224 goto next; 3225 } 3226 3227 bytes = minlen; 3228 ret2 = search_bitmap(ctl, entry, &start, &bytes); 3229 if (ret2 || start >= end) { 3230 spin_unlock(&ctl->tree_lock); 3231 mutex_unlock(&ctl->cache_writeout_mutex); 3232 next_bitmap = true; 3233 goto next; 3234 } 3235 3236 bytes = min(bytes, end - start); 3237 if (bytes < minlen) { 3238 spin_unlock(&ctl->tree_lock); 3239 mutex_unlock(&ctl->cache_writeout_mutex); 3240 goto next; 3241 } 3242 3243 bitmap_clear_bits(ctl, entry, start, bytes); 3244 if (entry->bytes == 0) 3245 free_bitmap(ctl, entry); 3246 3247 spin_unlock(&ctl->tree_lock); 3248 trim_entry.start = start; 3249 trim_entry.bytes = bytes; 3250 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3251 mutex_unlock(&ctl->cache_writeout_mutex); 3252 3253 ret = do_trimming(block_group, total_trimmed, start, bytes, 3254 start, bytes, &trim_entry); 3255 if (ret) 3256 break; 3257 next: 3258 if (next_bitmap) { 3259 offset += BITS_PER_BITMAP * ctl->unit; 3260 } else { 3261 start += bytes; 3262 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 3263 offset += BITS_PER_BITMAP * ctl->unit; 3264 } 3265 3266 if (fatal_signal_pending(current)) { 3267 ret = -ERESTARTSYS; 3268 break; 3269 } 3270 3271 cond_resched(); 3272 } 3273 3274 return ret; 3275 } 3276 3277 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 3278 u64 *trimmed, u64 start, u64 end, u64 minlen) 3279 { 3280 int ret; 3281 3282 *trimmed = 0; 3283 3284 spin_lock(&block_group->lock); 3285 if (block_group->removed) { 3286 spin_unlock(&block_group->lock); 3287 return 0; 3288 } 3289 atomic_inc(&block_group->trimming); 3290 spin_unlock(&block_group->lock); 3291 3292 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 3293 if (ret) 3294 goto out; 3295 3296 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 3297 out: 3298 spin_lock(&block_group->lock); 3299 if (atomic_dec_and_test(&block_group->trimming) && 3300 block_group->removed) { 3301 struct extent_map_tree *em_tree; 3302 struct extent_map *em; 3303 3304 spin_unlock(&block_group->lock); 3305 3306 lock_chunks(block_group->fs_info->chunk_root); 3307 em_tree = &block_group->fs_info->mapping_tree.map_tree; 3308 write_lock(&em_tree->lock); 3309 em = lookup_extent_mapping(em_tree, block_group->key.objectid, 3310 1); 3311 BUG_ON(!em); /* logic error, can't happen */ 3312 /* 3313 * remove_extent_mapping() will delete us from the pinned_chunks 3314 * list, which is protected by the chunk mutex. 3315 */ 3316 remove_extent_mapping(em_tree, em); 3317 write_unlock(&em_tree->lock); 3318 unlock_chunks(block_group->fs_info->chunk_root); 3319 3320 /* once for us and once for the tree */ 3321 free_extent_map(em); 3322 free_extent_map(em); 3323 3324 /* 3325 * We've left one free space entry and other tasks trimming 3326 * this block group have left 1 entry each one. Free them. 3327 */ 3328 __btrfs_remove_free_space_cache(block_group->free_space_ctl); 3329 } else { 3330 spin_unlock(&block_group->lock); 3331 } 3332 3333 return ret; 3334 } 3335 3336 /* 3337 * Find the left-most item in the cache tree, and then return the 3338 * smallest inode number in the item. 3339 * 3340 * Note: the returned inode number may not be the smallest one in 3341 * the tree, if the left-most item is a bitmap. 3342 */ 3343 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3344 { 3345 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3346 struct btrfs_free_space *entry = NULL; 3347 u64 ino = 0; 3348 3349 spin_lock(&ctl->tree_lock); 3350 3351 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3352 goto out; 3353 3354 entry = rb_entry(rb_first(&ctl->free_space_offset), 3355 struct btrfs_free_space, offset_index); 3356 3357 if (!entry->bitmap) { 3358 ino = entry->offset; 3359 3360 unlink_free_space(ctl, entry); 3361 entry->offset++; 3362 entry->bytes--; 3363 if (!entry->bytes) 3364 kmem_cache_free(btrfs_free_space_cachep, entry); 3365 else 3366 link_free_space(ctl, entry); 3367 } else { 3368 u64 offset = 0; 3369 u64 count = 1; 3370 int ret; 3371 3372 ret = search_bitmap(ctl, entry, &offset, &count); 3373 /* Logic error; Should be empty if it can't find anything */ 3374 ASSERT(!ret); 3375 3376 ino = offset; 3377 bitmap_clear_bits(ctl, entry, offset, 1); 3378 if (entry->bytes == 0) 3379 free_bitmap(ctl, entry); 3380 } 3381 out: 3382 spin_unlock(&ctl->tree_lock); 3383 3384 return ino; 3385 } 3386 3387 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3388 struct btrfs_path *path) 3389 { 3390 struct inode *inode = NULL; 3391 3392 spin_lock(&root->ino_cache_lock); 3393 if (root->ino_cache_inode) 3394 inode = igrab(root->ino_cache_inode); 3395 spin_unlock(&root->ino_cache_lock); 3396 if (inode) 3397 return inode; 3398 3399 inode = __lookup_free_space_inode(root, path, 0); 3400 if (IS_ERR(inode)) 3401 return inode; 3402 3403 spin_lock(&root->ino_cache_lock); 3404 if (!btrfs_fs_closing(root->fs_info)) 3405 root->ino_cache_inode = igrab(inode); 3406 spin_unlock(&root->ino_cache_lock); 3407 3408 return inode; 3409 } 3410 3411 int create_free_ino_inode(struct btrfs_root *root, 3412 struct btrfs_trans_handle *trans, 3413 struct btrfs_path *path) 3414 { 3415 return __create_free_space_inode(root, trans, path, 3416 BTRFS_FREE_INO_OBJECTID, 0); 3417 } 3418 3419 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3420 { 3421 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3422 struct btrfs_path *path; 3423 struct inode *inode; 3424 int ret = 0; 3425 u64 root_gen = btrfs_root_generation(&root->root_item); 3426 3427 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3428 return 0; 3429 3430 /* 3431 * If we're unmounting then just return, since this does a search on the 3432 * normal root and not the commit root and we could deadlock. 3433 */ 3434 if (btrfs_fs_closing(fs_info)) 3435 return 0; 3436 3437 path = btrfs_alloc_path(); 3438 if (!path) 3439 return 0; 3440 3441 inode = lookup_free_ino_inode(root, path); 3442 if (IS_ERR(inode)) 3443 goto out; 3444 3445 if (root_gen != BTRFS_I(inode)->generation) 3446 goto out_put; 3447 3448 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3449 3450 if (ret < 0) 3451 btrfs_err(fs_info, 3452 "failed to load free ino cache for root %llu", 3453 root->root_key.objectid); 3454 out_put: 3455 iput(inode); 3456 out: 3457 btrfs_free_path(path); 3458 return ret; 3459 } 3460 3461 int btrfs_write_out_ino_cache(struct btrfs_root *root, 3462 struct btrfs_trans_handle *trans, 3463 struct btrfs_path *path, 3464 struct inode *inode) 3465 { 3466 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3467 int ret; 3468 struct btrfs_io_ctl io_ctl; 3469 bool release_metadata = true; 3470 3471 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 3472 return 0; 3473 3474 memset(&io_ctl, 0, sizeof(io_ctl)); 3475 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, 3476 trans, path, 0); 3477 if (!ret) { 3478 /* 3479 * At this point writepages() didn't error out, so our metadata 3480 * reservation is released when the writeback finishes, at 3481 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3482 * with or without an error. 3483 */ 3484 release_metadata = false; 3485 ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); 3486 } 3487 3488 if (ret) { 3489 if (release_metadata) 3490 btrfs_delalloc_release_metadata(inode, inode->i_size); 3491 #ifdef DEBUG 3492 btrfs_err(root->fs_info, 3493 "failed to write free ino cache for root %llu", 3494 root->root_key.objectid); 3495 #endif 3496 } 3497 3498 return ret; 3499 } 3500 3501 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 3502 /* 3503 * Use this if you need to make a bitmap or extent entry specifically, it 3504 * doesn't do any of the merging that add_free_space does, this acts a lot like 3505 * how the free space cache loading stuff works, so you can get really weird 3506 * configurations. 3507 */ 3508 int test_add_free_space_entry(struct btrfs_block_group_cache *cache, 3509 u64 offset, u64 bytes, bool bitmap) 3510 { 3511 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3512 struct btrfs_free_space *info = NULL, *bitmap_info; 3513 void *map = NULL; 3514 u64 bytes_added; 3515 int ret; 3516 3517 again: 3518 if (!info) { 3519 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 3520 if (!info) 3521 return -ENOMEM; 3522 } 3523 3524 if (!bitmap) { 3525 spin_lock(&ctl->tree_lock); 3526 info->offset = offset; 3527 info->bytes = bytes; 3528 ret = link_free_space(ctl, info); 3529 spin_unlock(&ctl->tree_lock); 3530 if (ret) 3531 kmem_cache_free(btrfs_free_space_cachep, info); 3532 return ret; 3533 } 3534 3535 if (!map) { 3536 map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 3537 if (!map) { 3538 kmem_cache_free(btrfs_free_space_cachep, info); 3539 return -ENOMEM; 3540 } 3541 } 3542 3543 spin_lock(&ctl->tree_lock); 3544 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3545 1, 0); 3546 if (!bitmap_info) { 3547 info->bitmap = map; 3548 map = NULL; 3549 add_new_bitmap(ctl, info, offset); 3550 bitmap_info = info; 3551 info = NULL; 3552 } 3553 3554 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 3555 bytes -= bytes_added; 3556 offset += bytes_added; 3557 spin_unlock(&ctl->tree_lock); 3558 3559 if (bytes) 3560 goto again; 3561 3562 if (info) 3563 kmem_cache_free(btrfs_free_space_cachep, info); 3564 if (map) 3565 kfree(map); 3566 return 0; 3567 } 3568 3569 /* 3570 * Checks to see if the given range is in the free space cache. This is really 3571 * just used to check the absence of space, so if there is free space in the 3572 * range at all we will return 1. 3573 */ 3574 int test_check_exists(struct btrfs_block_group_cache *cache, 3575 u64 offset, u64 bytes) 3576 { 3577 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 3578 struct btrfs_free_space *info; 3579 int ret = 0; 3580 3581 spin_lock(&ctl->tree_lock); 3582 info = tree_search_offset(ctl, offset, 0, 0); 3583 if (!info) { 3584 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 3585 1, 0); 3586 if (!info) 3587 goto out; 3588 } 3589 3590 have_info: 3591 if (info->bitmap) { 3592 u64 bit_off, bit_bytes; 3593 struct rb_node *n; 3594 struct btrfs_free_space *tmp; 3595 3596 bit_off = offset; 3597 bit_bytes = ctl->unit; 3598 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes); 3599 if (!ret) { 3600 if (bit_off == offset) { 3601 ret = 1; 3602 goto out; 3603 } else if (bit_off > offset && 3604 offset + bytes > bit_off) { 3605 ret = 1; 3606 goto out; 3607 } 3608 } 3609 3610 n = rb_prev(&info->offset_index); 3611 while (n) { 3612 tmp = rb_entry(n, struct btrfs_free_space, 3613 offset_index); 3614 if (tmp->offset + tmp->bytes < offset) 3615 break; 3616 if (offset + bytes < tmp->offset) { 3617 n = rb_prev(&info->offset_index); 3618 continue; 3619 } 3620 info = tmp; 3621 goto have_info; 3622 } 3623 3624 n = rb_next(&info->offset_index); 3625 while (n) { 3626 tmp = rb_entry(n, struct btrfs_free_space, 3627 offset_index); 3628 if (offset + bytes < tmp->offset) 3629 break; 3630 if (tmp->offset + tmp->bytes < offset) { 3631 n = rb_next(&info->offset_index); 3632 continue; 3633 } 3634 info = tmp; 3635 goto have_info; 3636 } 3637 3638 ret = 0; 3639 goto out; 3640 } 3641 3642 if (info->offset == offset) { 3643 ret = 1; 3644 goto out; 3645 } 3646 3647 if (offset > info->offset && offset < info->offset + info->bytes) 3648 ret = 1; 3649 out: 3650 spin_unlock(&ctl->tree_lock); 3651 return ret; 3652 } 3653 #endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 3654