1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 33 34 static int link_free_space(struct btrfs_free_space_ctl *ctl, 35 struct btrfs_free_space *info); 36 37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 38 struct btrfs_path *path, 39 u64 offset) 40 { 41 struct btrfs_key key; 42 struct btrfs_key location; 43 struct btrfs_disk_key disk_key; 44 struct btrfs_free_space_header *header; 45 struct extent_buffer *leaf; 46 struct inode *inode = NULL; 47 int ret; 48 49 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 50 key.offset = offset; 51 key.type = 0; 52 53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 54 if (ret < 0) 55 return ERR_PTR(ret); 56 if (ret > 0) { 57 btrfs_release_path(path); 58 return ERR_PTR(-ENOENT); 59 } 60 61 leaf = path->nodes[0]; 62 header = btrfs_item_ptr(leaf, path->slots[0], 63 struct btrfs_free_space_header); 64 btrfs_free_space_key(leaf, header, &disk_key); 65 btrfs_disk_key_to_cpu(&location, &disk_key); 66 btrfs_release_path(path); 67 68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 69 if (!inode) 70 return ERR_PTR(-ENOENT); 71 if (IS_ERR(inode)) 72 return inode; 73 if (is_bad_inode(inode)) { 74 iput(inode); 75 return ERR_PTR(-ENOENT); 76 } 77 78 inode->i_mapping->flags &= ~__GFP_FS; 79 80 return inode; 81 } 82 83 struct inode *lookup_free_space_inode(struct btrfs_root *root, 84 struct btrfs_block_group_cache 85 *block_group, struct btrfs_path *path) 86 { 87 struct inode *inode = NULL; 88 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 89 90 spin_lock(&block_group->lock); 91 if (block_group->inode) 92 inode = igrab(block_group->inode); 93 spin_unlock(&block_group->lock); 94 if (inode) 95 return inode; 96 97 inode = __lookup_free_space_inode(root, path, 98 block_group->key.objectid); 99 if (IS_ERR(inode)) 100 return inode; 101 102 spin_lock(&block_group->lock); 103 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 104 printk(KERN_INFO "Old style space inode found, converting.\n"); 105 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 106 BTRFS_INODE_NODATACOW; 107 block_group->disk_cache_state = BTRFS_DC_CLEAR; 108 } 109 110 if (!block_group->iref) { 111 block_group->inode = igrab(inode); 112 block_group->iref = 1; 113 } 114 spin_unlock(&block_group->lock); 115 116 return inode; 117 } 118 119 int __create_free_space_inode(struct btrfs_root *root, 120 struct btrfs_trans_handle *trans, 121 struct btrfs_path *path, u64 ino, u64 offset) 122 { 123 struct btrfs_key key; 124 struct btrfs_disk_key disk_key; 125 struct btrfs_free_space_header *header; 126 struct btrfs_inode_item *inode_item; 127 struct extent_buffer *leaf; 128 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 129 int ret; 130 131 ret = btrfs_insert_empty_inode(trans, root, path, ino); 132 if (ret) 133 return ret; 134 135 /* We inline crc's for the free disk space cache */ 136 if (ino != BTRFS_FREE_INO_OBJECTID) 137 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 138 139 leaf = path->nodes[0]; 140 inode_item = btrfs_item_ptr(leaf, path->slots[0], 141 struct btrfs_inode_item); 142 btrfs_item_key(leaf, &disk_key, path->slots[0]); 143 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 144 sizeof(*inode_item)); 145 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 146 btrfs_set_inode_size(leaf, inode_item, 0); 147 btrfs_set_inode_nbytes(leaf, inode_item, 0); 148 btrfs_set_inode_uid(leaf, inode_item, 0); 149 btrfs_set_inode_gid(leaf, inode_item, 0); 150 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 151 btrfs_set_inode_flags(leaf, inode_item, flags); 152 btrfs_set_inode_nlink(leaf, inode_item, 1); 153 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 154 btrfs_set_inode_block_group(leaf, inode_item, offset); 155 btrfs_mark_buffer_dirty(leaf); 156 btrfs_release_path(path); 157 158 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 159 key.offset = offset; 160 key.type = 0; 161 162 ret = btrfs_insert_empty_item(trans, root, path, &key, 163 sizeof(struct btrfs_free_space_header)); 164 if (ret < 0) { 165 btrfs_release_path(path); 166 return ret; 167 } 168 leaf = path->nodes[0]; 169 header = btrfs_item_ptr(leaf, path->slots[0], 170 struct btrfs_free_space_header); 171 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 172 btrfs_set_free_space_key(leaf, header, &disk_key); 173 btrfs_mark_buffer_dirty(leaf); 174 btrfs_release_path(path); 175 176 return 0; 177 } 178 179 int create_free_space_inode(struct btrfs_root *root, 180 struct btrfs_trans_handle *trans, 181 struct btrfs_block_group_cache *block_group, 182 struct btrfs_path *path) 183 { 184 int ret; 185 u64 ino; 186 187 ret = btrfs_find_free_objectid(root, &ino); 188 if (ret < 0) 189 return ret; 190 191 return __create_free_space_inode(root, trans, path, ino, 192 block_group->key.objectid); 193 } 194 195 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 196 struct btrfs_trans_handle *trans, 197 struct btrfs_path *path, 198 struct inode *inode) 199 { 200 struct btrfs_block_rsv *rsv; 201 u64 needed_bytes; 202 loff_t oldsize; 203 int ret = 0; 204 205 rsv = trans->block_rsv; 206 trans->block_rsv = &root->fs_info->global_block_rsv; 207 208 /* 1 for slack space, 1 for updating the inode */ 209 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 210 btrfs_calc_trans_metadata_size(root, 1); 211 212 spin_lock(&trans->block_rsv->lock); 213 if (trans->block_rsv->reserved < needed_bytes) { 214 spin_unlock(&trans->block_rsv->lock); 215 trans->block_rsv = rsv; 216 return -ENOSPC; 217 } 218 spin_unlock(&trans->block_rsv->lock); 219 220 oldsize = i_size_read(inode); 221 btrfs_i_size_write(inode, 0); 222 truncate_pagecache(inode, oldsize, 0); 223 224 /* 225 * We don't need an orphan item because truncating the free space cache 226 * will never be split across transactions. 227 */ 228 ret = btrfs_truncate_inode_items(trans, root, inode, 229 0, BTRFS_EXTENT_DATA_KEY); 230 231 if (ret) { 232 trans->block_rsv = rsv; 233 btrfs_abort_transaction(trans, root, ret); 234 return ret; 235 } 236 237 ret = btrfs_update_inode(trans, root, inode); 238 if (ret) 239 btrfs_abort_transaction(trans, root, ret); 240 trans->block_rsv = rsv; 241 242 return ret; 243 } 244 245 static int readahead_cache(struct inode *inode) 246 { 247 struct file_ra_state *ra; 248 unsigned long last_index; 249 250 ra = kzalloc(sizeof(*ra), GFP_NOFS); 251 if (!ra) 252 return -ENOMEM; 253 254 file_ra_state_init(ra, inode->i_mapping); 255 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 256 257 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 258 259 kfree(ra); 260 261 return 0; 262 } 263 264 struct io_ctl { 265 void *cur, *orig; 266 struct page *page; 267 struct page **pages; 268 struct btrfs_root *root; 269 unsigned long size; 270 int index; 271 int num_pages; 272 unsigned check_crcs:1; 273 }; 274 275 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, 276 struct btrfs_root *root) 277 { 278 memset(io_ctl, 0, sizeof(struct io_ctl)); 279 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 280 PAGE_CACHE_SHIFT; 281 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, 282 GFP_NOFS); 283 if (!io_ctl->pages) 284 return -ENOMEM; 285 io_ctl->root = root; 286 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 287 io_ctl->check_crcs = 1; 288 return 0; 289 } 290 291 static void io_ctl_free(struct io_ctl *io_ctl) 292 { 293 kfree(io_ctl->pages); 294 } 295 296 static void io_ctl_unmap_page(struct io_ctl *io_ctl) 297 { 298 if (io_ctl->cur) { 299 kunmap(io_ctl->page); 300 io_ctl->cur = NULL; 301 io_ctl->orig = NULL; 302 } 303 } 304 305 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 306 { 307 WARN_ON(io_ctl->cur); 308 BUG_ON(io_ctl->index >= io_ctl->num_pages); 309 io_ctl->page = io_ctl->pages[io_ctl->index++]; 310 io_ctl->cur = kmap(io_ctl->page); 311 io_ctl->orig = io_ctl->cur; 312 io_ctl->size = PAGE_CACHE_SIZE; 313 if (clear) 314 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 315 } 316 317 static void io_ctl_drop_pages(struct io_ctl *io_ctl) 318 { 319 int i; 320 321 io_ctl_unmap_page(io_ctl); 322 323 for (i = 0; i < io_ctl->num_pages; i++) { 324 if (io_ctl->pages[i]) { 325 ClearPageChecked(io_ctl->pages[i]); 326 unlock_page(io_ctl->pages[i]); 327 page_cache_release(io_ctl->pages[i]); 328 } 329 } 330 } 331 332 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, 333 int uptodate) 334 { 335 struct page *page; 336 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 337 int i; 338 339 for (i = 0; i < io_ctl->num_pages; i++) { 340 page = find_or_create_page(inode->i_mapping, i, mask); 341 if (!page) { 342 io_ctl_drop_pages(io_ctl); 343 return -ENOMEM; 344 } 345 io_ctl->pages[i] = page; 346 if (uptodate && !PageUptodate(page)) { 347 btrfs_readpage(NULL, page); 348 lock_page(page); 349 if (!PageUptodate(page)) { 350 printk(KERN_ERR "btrfs: error reading free " 351 "space cache\n"); 352 io_ctl_drop_pages(io_ctl); 353 return -EIO; 354 } 355 } 356 } 357 358 for (i = 0; i < io_ctl->num_pages; i++) { 359 clear_page_dirty_for_io(io_ctl->pages[i]); 360 set_page_extent_mapped(io_ctl->pages[i]); 361 } 362 363 return 0; 364 } 365 366 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) 367 { 368 u64 *val; 369 370 io_ctl_map_page(io_ctl, 1); 371 372 /* 373 * Skip the csum areas. If we don't check crcs then we just have a 374 * 64bit chunk at the front of the first page. 375 */ 376 if (io_ctl->check_crcs) { 377 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 378 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 379 } else { 380 io_ctl->cur += sizeof(u64); 381 io_ctl->size -= sizeof(u64) * 2; 382 } 383 384 val = io_ctl->cur; 385 *val = cpu_to_le64(generation); 386 io_ctl->cur += sizeof(u64); 387 } 388 389 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) 390 { 391 u64 *gen; 392 393 /* 394 * Skip the crc area. If we don't check crcs then we just have a 64bit 395 * chunk at the front of the first page. 396 */ 397 if (io_ctl->check_crcs) { 398 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 399 io_ctl->size -= sizeof(u64) + 400 (sizeof(u32) * io_ctl->num_pages); 401 } else { 402 io_ctl->cur += sizeof(u64); 403 io_ctl->size -= sizeof(u64) * 2; 404 } 405 406 gen = io_ctl->cur; 407 if (le64_to_cpu(*gen) != generation) { 408 printk_ratelimited(KERN_ERR "btrfs: space cache generation " 409 "(%Lu) does not match inode (%Lu)\n", *gen, 410 generation); 411 io_ctl_unmap_page(io_ctl); 412 return -EIO; 413 } 414 io_ctl->cur += sizeof(u64); 415 return 0; 416 } 417 418 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) 419 { 420 u32 *tmp; 421 u32 crc = ~(u32)0; 422 unsigned offset = 0; 423 424 if (!io_ctl->check_crcs) { 425 io_ctl_unmap_page(io_ctl); 426 return; 427 } 428 429 if (index == 0) 430 offset = sizeof(u32) * io_ctl->num_pages; 431 432 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 433 PAGE_CACHE_SIZE - offset); 434 btrfs_csum_final(crc, (char *)&crc); 435 io_ctl_unmap_page(io_ctl); 436 tmp = kmap(io_ctl->pages[0]); 437 tmp += index; 438 *tmp = crc; 439 kunmap(io_ctl->pages[0]); 440 } 441 442 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) 443 { 444 u32 *tmp, val; 445 u32 crc = ~(u32)0; 446 unsigned offset = 0; 447 448 if (!io_ctl->check_crcs) { 449 io_ctl_map_page(io_ctl, 0); 450 return 0; 451 } 452 453 if (index == 0) 454 offset = sizeof(u32) * io_ctl->num_pages; 455 456 tmp = kmap(io_ctl->pages[0]); 457 tmp += index; 458 val = *tmp; 459 kunmap(io_ctl->pages[0]); 460 461 io_ctl_map_page(io_ctl, 0); 462 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 463 PAGE_CACHE_SIZE - offset); 464 btrfs_csum_final(crc, (char *)&crc); 465 if (val != crc) { 466 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " 467 "space cache\n"); 468 io_ctl_unmap_page(io_ctl); 469 return -EIO; 470 } 471 472 return 0; 473 } 474 475 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, 476 void *bitmap) 477 { 478 struct btrfs_free_space_entry *entry; 479 480 if (!io_ctl->cur) 481 return -ENOSPC; 482 483 entry = io_ctl->cur; 484 entry->offset = cpu_to_le64(offset); 485 entry->bytes = cpu_to_le64(bytes); 486 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 487 BTRFS_FREE_SPACE_EXTENT; 488 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 489 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 490 491 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 492 return 0; 493 494 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 495 496 /* No more pages to map */ 497 if (io_ctl->index >= io_ctl->num_pages) 498 return 0; 499 500 /* map the next page */ 501 io_ctl_map_page(io_ctl, 1); 502 return 0; 503 } 504 505 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) 506 { 507 if (!io_ctl->cur) 508 return -ENOSPC; 509 510 /* 511 * If we aren't at the start of the current page, unmap this one and 512 * map the next one if there is any left. 513 */ 514 if (io_ctl->cur != io_ctl->orig) { 515 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 516 if (io_ctl->index >= io_ctl->num_pages) 517 return -ENOSPC; 518 io_ctl_map_page(io_ctl, 0); 519 } 520 521 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); 522 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 523 if (io_ctl->index < io_ctl->num_pages) 524 io_ctl_map_page(io_ctl, 0); 525 return 0; 526 } 527 528 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) 529 { 530 /* 531 * If we're not on the boundary we know we've modified the page and we 532 * need to crc the page. 533 */ 534 if (io_ctl->cur != io_ctl->orig) 535 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 536 else 537 io_ctl_unmap_page(io_ctl); 538 539 while (io_ctl->index < io_ctl->num_pages) { 540 io_ctl_map_page(io_ctl, 1); 541 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 542 } 543 } 544 545 static int io_ctl_read_entry(struct io_ctl *io_ctl, 546 struct btrfs_free_space *entry, u8 *type) 547 { 548 struct btrfs_free_space_entry *e; 549 int ret; 550 551 if (!io_ctl->cur) { 552 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 553 if (ret) 554 return ret; 555 } 556 557 e = io_ctl->cur; 558 entry->offset = le64_to_cpu(e->offset); 559 entry->bytes = le64_to_cpu(e->bytes); 560 *type = e->type; 561 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 562 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 563 564 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 565 return 0; 566 567 io_ctl_unmap_page(io_ctl); 568 569 return 0; 570 } 571 572 static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 573 struct btrfs_free_space *entry) 574 { 575 int ret; 576 577 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 578 if (ret) 579 return ret; 580 581 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); 582 io_ctl_unmap_page(io_ctl); 583 584 return 0; 585 } 586 587 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 588 struct btrfs_free_space_ctl *ctl, 589 struct btrfs_path *path, u64 offset) 590 { 591 struct btrfs_free_space_header *header; 592 struct extent_buffer *leaf; 593 struct io_ctl io_ctl; 594 struct btrfs_key key; 595 struct btrfs_free_space *e, *n; 596 struct list_head bitmaps; 597 u64 num_entries; 598 u64 num_bitmaps; 599 u64 generation; 600 u8 type; 601 int ret = 0; 602 603 INIT_LIST_HEAD(&bitmaps); 604 605 /* Nothing in the space cache, goodbye */ 606 if (!i_size_read(inode)) 607 return 0; 608 609 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 610 key.offset = offset; 611 key.type = 0; 612 613 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 614 if (ret < 0) 615 return 0; 616 else if (ret > 0) { 617 btrfs_release_path(path); 618 return 0; 619 } 620 621 ret = -1; 622 623 leaf = path->nodes[0]; 624 header = btrfs_item_ptr(leaf, path->slots[0], 625 struct btrfs_free_space_header); 626 num_entries = btrfs_free_space_entries(leaf, header); 627 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 628 generation = btrfs_free_space_generation(leaf, header); 629 btrfs_release_path(path); 630 631 if (BTRFS_I(inode)->generation != generation) { 632 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 633 " not match free space cache generation (%llu)\n", 634 (unsigned long long)BTRFS_I(inode)->generation, 635 (unsigned long long)generation); 636 return 0; 637 } 638 639 if (!num_entries) 640 return 0; 641 642 ret = io_ctl_init(&io_ctl, inode, root); 643 if (ret) 644 return ret; 645 646 ret = readahead_cache(inode); 647 if (ret) 648 goto out; 649 650 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 651 if (ret) 652 goto out; 653 654 ret = io_ctl_check_crc(&io_ctl, 0); 655 if (ret) 656 goto free_cache; 657 658 ret = io_ctl_check_generation(&io_ctl, generation); 659 if (ret) 660 goto free_cache; 661 662 while (num_entries) { 663 e = kmem_cache_zalloc(btrfs_free_space_cachep, 664 GFP_NOFS); 665 if (!e) 666 goto free_cache; 667 668 ret = io_ctl_read_entry(&io_ctl, e, &type); 669 if (ret) { 670 kmem_cache_free(btrfs_free_space_cachep, e); 671 goto free_cache; 672 } 673 674 if (!e->bytes) { 675 kmem_cache_free(btrfs_free_space_cachep, e); 676 goto free_cache; 677 } 678 679 if (type == BTRFS_FREE_SPACE_EXTENT) { 680 spin_lock(&ctl->tree_lock); 681 ret = link_free_space(ctl, e); 682 spin_unlock(&ctl->tree_lock); 683 if (ret) { 684 printk(KERN_ERR "Duplicate entries in " 685 "free space cache, dumping\n"); 686 kmem_cache_free(btrfs_free_space_cachep, e); 687 goto free_cache; 688 } 689 } else { 690 BUG_ON(!num_bitmaps); 691 num_bitmaps--; 692 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 693 if (!e->bitmap) { 694 kmem_cache_free( 695 btrfs_free_space_cachep, e); 696 goto free_cache; 697 } 698 spin_lock(&ctl->tree_lock); 699 ret = link_free_space(ctl, e); 700 ctl->total_bitmaps++; 701 ctl->op->recalc_thresholds(ctl); 702 spin_unlock(&ctl->tree_lock); 703 if (ret) { 704 printk(KERN_ERR "Duplicate entries in " 705 "free space cache, dumping\n"); 706 kmem_cache_free(btrfs_free_space_cachep, e); 707 goto free_cache; 708 } 709 list_add_tail(&e->list, &bitmaps); 710 } 711 712 num_entries--; 713 } 714 715 io_ctl_unmap_page(&io_ctl); 716 717 /* 718 * We add the bitmaps at the end of the entries in order that 719 * the bitmap entries are added to the cache. 720 */ 721 list_for_each_entry_safe(e, n, &bitmaps, list) { 722 list_del_init(&e->list); 723 ret = io_ctl_read_bitmap(&io_ctl, e); 724 if (ret) 725 goto free_cache; 726 } 727 728 io_ctl_drop_pages(&io_ctl); 729 ret = 1; 730 out: 731 io_ctl_free(&io_ctl); 732 return ret; 733 free_cache: 734 io_ctl_drop_pages(&io_ctl); 735 __btrfs_remove_free_space_cache(ctl); 736 goto out; 737 } 738 739 int load_free_space_cache(struct btrfs_fs_info *fs_info, 740 struct btrfs_block_group_cache *block_group) 741 { 742 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 743 struct btrfs_root *root = fs_info->tree_root; 744 struct inode *inode; 745 struct btrfs_path *path; 746 int ret = 0; 747 bool matched; 748 u64 used = btrfs_block_group_used(&block_group->item); 749 750 /* 751 * If we're unmounting then just return, since this does a search on the 752 * normal root and not the commit root and we could deadlock. 753 */ 754 if (btrfs_fs_closing(fs_info)) 755 return 0; 756 757 /* 758 * If this block group has been marked to be cleared for one reason or 759 * another then we can't trust the on disk cache, so just return. 760 */ 761 spin_lock(&block_group->lock); 762 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 763 spin_unlock(&block_group->lock); 764 return 0; 765 } 766 spin_unlock(&block_group->lock); 767 768 path = btrfs_alloc_path(); 769 if (!path) 770 return 0; 771 772 inode = lookup_free_space_inode(root, block_group, path); 773 if (IS_ERR(inode)) { 774 btrfs_free_path(path); 775 return 0; 776 } 777 778 /* We may have converted the inode and made the cache invalid. */ 779 spin_lock(&block_group->lock); 780 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 781 spin_unlock(&block_group->lock); 782 btrfs_free_path(path); 783 goto out; 784 } 785 spin_unlock(&block_group->lock); 786 787 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 788 path, block_group->key.objectid); 789 btrfs_free_path(path); 790 if (ret <= 0) 791 goto out; 792 793 spin_lock(&ctl->tree_lock); 794 matched = (ctl->free_space == (block_group->key.offset - used - 795 block_group->bytes_super)); 796 spin_unlock(&ctl->tree_lock); 797 798 if (!matched) { 799 __btrfs_remove_free_space_cache(ctl); 800 printk(KERN_ERR "block group %llu has an wrong amount of free " 801 "space\n", block_group->key.objectid); 802 ret = -1; 803 } 804 out: 805 if (ret < 0) { 806 /* This cache is bogus, make sure it gets cleared */ 807 spin_lock(&block_group->lock); 808 block_group->disk_cache_state = BTRFS_DC_CLEAR; 809 spin_unlock(&block_group->lock); 810 ret = 0; 811 812 printk(KERN_ERR "btrfs: failed to load free space cache " 813 "for block group %llu\n", block_group->key.objectid); 814 } 815 816 iput(inode); 817 return ret; 818 } 819 820 /** 821 * __btrfs_write_out_cache - write out cached info to an inode 822 * @root - the root the inode belongs to 823 * @ctl - the free space cache we are going to write out 824 * @block_group - the block_group for this cache if it belongs to a block_group 825 * @trans - the trans handle 826 * @path - the path to use 827 * @offset - the offset for the key we'll insert 828 * 829 * This function writes out a free space cache struct to disk for quick recovery 830 * on mount. This will return 0 if it was successfull in writing the cache out, 831 * and -1 if it was not. 832 */ 833 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 834 struct btrfs_free_space_ctl *ctl, 835 struct btrfs_block_group_cache *block_group, 836 struct btrfs_trans_handle *trans, 837 struct btrfs_path *path, u64 offset) 838 { 839 struct btrfs_free_space_header *header; 840 struct extent_buffer *leaf; 841 struct rb_node *node; 842 struct list_head *pos, *n; 843 struct extent_state *cached_state = NULL; 844 struct btrfs_free_cluster *cluster = NULL; 845 struct extent_io_tree *unpin = NULL; 846 struct io_ctl io_ctl; 847 struct list_head bitmap_list; 848 struct btrfs_key key; 849 u64 start, extent_start, extent_end, len; 850 int entries = 0; 851 int bitmaps = 0; 852 int ret; 853 int err = -1; 854 855 INIT_LIST_HEAD(&bitmap_list); 856 857 if (!i_size_read(inode)) 858 return -1; 859 860 ret = io_ctl_init(&io_ctl, inode, root); 861 if (ret) 862 return -1; 863 864 /* Get the cluster for this block_group if it exists */ 865 if (block_group && !list_empty(&block_group->cluster_list)) 866 cluster = list_entry(block_group->cluster_list.next, 867 struct btrfs_free_cluster, 868 block_group_list); 869 870 /* Lock all pages first so we can lock the extent safely. */ 871 io_ctl_prepare_pages(&io_ctl, inode, 0); 872 873 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 874 0, &cached_state); 875 876 node = rb_first(&ctl->free_space_offset); 877 if (!node && cluster) { 878 node = rb_first(&cluster->root); 879 cluster = NULL; 880 } 881 882 /* Make sure we can fit our crcs into the first page */ 883 if (io_ctl.check_crcs && 884 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { 885 WARN_ON(1); 886 goto out_nospc; 887 } 888 889 io_ctl_set_generation(&io_ctl, trans->transid); 890 891 /* Write out the extent entries */ 892 while (node) { 893 struct btrfs_free_space *e; 894 895 e = rb_entry(node, struct btrfs_free_space, offset_index); 896 entries++; 897 898 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, 899 e->bitmap); 900 if (ret) 901 goto out_nospc; 902 903 if (e->bitmap) { 904 list_add_tail(&e->list, &bitmap_list); 905 bitmaps++; 906 } 907 node = rb_next(node); 908 if (!node && cluster) { 909 node = rb_first(&cluster->root); 910 cluster = NULL; 911 } 912 } 913 914 /* 915 * We want to add any pinned extents to our free space cache 916 * so we don't leak the space 917 */ 918 919 /* 920 * We shouldn't have switched the pinned extents yet so this is the 921 * right one 922 */ 923 unpin = root->fs_info->pinned_extents; 924 925 if (block_group) 926 start = block_group->key.objectid; 927 928 while (block_group && (start < block_group->key.objectid + 929 block_group->key.offset)) { 930 ret = find_first_extent_bit(unpin, start, 931 &extent_start, &extent_end, 932 EXTENT_DIRTY); 933 if (ret) { 934 ret = 0; 935 break; 936 } 937 938 /* This pinned extent is out of our range */ 939 if (extent_start >= block_group->key.objectid + 940 block_group->key.offset) 941 break; 942 943 extent_start = max(extent_start, start); 944 extent_end = min(block_group->key.objectid + 945 block_group->key.offset, extent_end + 1); 946 len = extent_end - extent_start; 947 948 entries++; 949 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); 950 if (ret) 951 goto out_nospc; 952 953 start = extent_end; 954 } 955 956 /* Write out the bitmaps */ 957 list_for_each_safe(pos, n, &bitmap_list) { 958 struct btrfs_free_space *entry = 959 list_entry(pos, struct btrfs_free_space, list); 960 961 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); 962 if (ret) 963 goto out_nospc; 964 list_del_init(&entry->list); 965 } 966 967 /* Zero out the rest of the pages just to make sure */ 968 io_ctl_zero_remaining_pages(&io_ctl); 969 970 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, 971 0, i_size_read(inode), &cached_state); 972 io_ctl_drop_pages(&io_ctl); 973 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 974 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 975 976 if (ret) 977 goto out; 978 979 980 ret = filemap_write_and_wait(inode->i_mapping); 981 if (ret) 982 goto out; 983 984 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 985 key.offset = offset; 986 key.type = 0; 987 988 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 989 if (ret < 0) { 990 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 991 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 992 GFP_NOFS); 993 goto out; 994 } 995 leaf = path->nodes[0]; 996 if (ret > 0) { 997 struct btrfs_key found_key; 998 BUG_ON(!path->slots[0]); 999 path->slots[0]--; 1000 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1001 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1002 found_key.offset != offset) { 1003 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1004 inode->i_size - 1, 1005 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1006 NULL, GFP_NOFS); 1007 btrfs_release_path(path); 1008 goto out; 1009 } 1010 } 1011 1012 BTRFS_I(inode)->generation = trans->transid; 1013 header = btrfs_item_ptr(leaf, path->slots[0], 1014 struct btrfs_free_space_header); 1015 btrfs_set_free_space_entries(leaf, header, entries); 1016 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1017 btrfs_set_free_space_generation(leaf, header, trans->transid); 1018 btrfs_mark_buffer_dirty(leaf); 1019 btrfs_release_path(path); 1020 1021 err = 0; 1022 out: 1023 io_ctl_free(&io_ctl); 1024 if (err) { 1025 invalidate_inode_pages2(inode->i_mapping); 1026 BTRFS_I(inode)->generation = 0; 1027 } 1028 btrfs_update_inode(trans, root, inode); 1029 return err; 1030 1031 out_nospc: 1032 list_for_each_safe(pos, n, &bitmap_list) { 1033 struct btrfs_free_space *entry = 1034 list_entry(pos, struct btrfs_free_space, list); 1035 list_del_init(&entry->list); 1036 } 1037 io_ctl_drop_pages(&io_ctl); 1038 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1039 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1040 goto out; 1041 } 1042 1043 int btrfs_write_out_cache(struct btrfs_root *root, 1044 struct btrfs_trans_handle *trans, 1045 struct btrfs_block_group_cache *block_group, 1046 struct btrfs_path *path) 1047 { 1048 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1049 struct inode *inode; 1050 int ret = 0; 1051 1052 root = root->fs_info->tree_root; 1053 1054 spin_lock(&block_group->lock); 1055 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1056 spin_unlock(&block_group->lock); 1057 return 0; 1058 } 1059 spin_unlock(&block_group->lock); 1060 1061 inode = lookup_free_space_inode(root, block_group, path); 1062 if (IS_ERR(inode)) 1063 return 0; 1064 1065 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1066 path, block_group->key.objectid); 1067 if (ret) { 1068 spin_lock(&block_group->lock); 1069 block_group->disk_cache_state = BTRFS_DC_ERROR; 1070 spin_unlock(&block_group->lock); 1071 ret = 0; 1072 #ifdef DEBUG 1073 printk(KERN_ERR "btrfs: failed to write free space cache " 1074 "for block group %llu\n", block_group->key.objectid); 1075 #endif 1076 } 1077 1078 iput(inode); 1079 return ret; 1080 } 1081 1082 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1083 u64 offset) 1084 { 1085 BUG_ON(offset < bitmap_start); 1086 offset -= bitmap_start; 1087 return (unsigned long)(div_u64(offset, unit)); 1088 } 1089 1090 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1091 { 1092 return (unsigned long)(div_u64(bytes, unit)); 1093 } 1094 1095 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1096 u64 offset) 1097 { 1098 u64 bitmap_start; 1099 u64 bytes_per_bitmap; 1100 1101 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1102 bitmap_start = offset - ctl->start; 1103 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1104 bitmap_start *= bytes_per_bitmap; 1105 bitmap_start += ctl->start; 1106 1107 return bitmap_start; 1108 } 1109 1110 static int tree_insert_offset(struct rb_root *root, u64 offset, 1111 struct rb_node *node, int bitmap) 1112 { 1113 struct rb_node **p = &root->rb_node; 1114 struct rb_node *parent = NULL; 1115 struct btrfs_free_space *info; 1116 1117 while (*p) { 1118 parent = *p; 1119 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1120 1121 if (offset < info->offset) { 1122 p = &(*p)->rb_left; 1123 } else if (offset > info->offset) { 1124 p = &(*p)->rb_right; 1125 } else { 1126 /* 1127 * we could have a bitmap entry and an extent entry 1128 * share the same offset. If this is the case, we want 1129 * the extent entry to always be found first if we do a 1130 * linear search through the tree, since we want to have 1131 * the quickest allocation time, and allocating from an 1132 * extent is faster than allocating from a bitmap. So 1133 * if we're inserting a bitmap and we find an entry at 1134 * this offset, we want to go right, or after this entry 1135 * logically. If we are inserting an extent and we've 1136 * found a bitmap, we want to go left, or before 1137 * logically. 1138 */ 1139 if (bitmap) { 1140 if (info->bitmap) { 1141 WARN_ON_ONCE(1); 1142 return -EEXIST; 1143 } 1144 p = &(*p)->rb_right; 1145 } else { 1146 if (!info->bitmap) { 1147 WARN_ON_ONCE(1); 1148 return -EEXIST; 1149 } 1150 p = &(*p)->rb_left; 1151 } 1152 } 1153 } 1154 1155 rb_link_node(node, parent, p); 1156 rb_insert_color(node, root); 1157 1158 return 0; 1159 } 1160 1161 /* 1162 * searches the tree for the given offset. 1163 * 1164 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1165 * want a section that has at least bytes size and comes at or after the given 1166 * offset. 1167 */ 1168 static struct btrfs_free_space * 1169 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1170 u64 offset, int bitmap_only, int fuzzy) 1171 { 1172 struct rb_node *n = ctl->free_space_offset.rb_node; 1173 struct btrfs_free_space *entry, *prev = NULL; 1174 1175 /* find entry that is closest to the 'offset' */ 1176 while (1) { 1177 if (!n) { 1178 entry = NULL; 1179 break; 1180 } 1181 1182 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1183 prev = entry; 1184 1185 if (offset < entry->offset) 1186 n = n->rb_left; 1187 else if (offset > entry->offset) 1188 n = n->rb_right; 1189 else 1190 break; 1191 } 1192 1193 if (bitmap_only) { 1194 if (!entry) 1195 return NULL; 1196 if (entry->bitmap) 1197 return entry; 1198 1199 /* 1200 * bitmap entry and extent entry may share same offset, 1201 * in that case, bitmap entry comes after extent entry. 1202 */ 1203 n = rb_next(n); 1204 if (!n) 1205 return NULL; 1206 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1207 if (entry->offset != offset) 1208 return NULL; 1209 1210 WARN_ON(!entry->bitmap); 1211 return entry; 1212 } else if (entry) { 1213 if (entry->bitmap) { 1214 /* 1215 * if previous extent entry covers the offset, 1216 * we should return it instead of the bitmap entry 1217 */ 1218 n = &entry->offset_index; 1219 while (1) { 1220 n = rb_prev(n); 1221 if (!n) 1222 break; 1223 prev = rb_entry(n, struct btrfs_free_space, 1224 offset_index); 1225 if (!prev->bitmap) { 1226 if (prev->offset + prev->bytes > offset) 1227 entry = prev; 1228 break; 1229 } 1230 } 1231 } 1232 return entry; 1233 } 1234 1235 if (!prev) 1236 return NULL; 1237 1238 /* find last entry before the 'offset' */ 1239 entry = prev; 1240 if (entry->offset > offset) { 1241 n = rb_prev(&entry->offset_index); 1242 if (n) { 1243 entry = rb_entry(n, struct btrfs_free_space, 1244 offset_index); 1245 BUG_ON(entry->offset > offset); 1246 } else { 1247 if (fuzzy) 1248 return entry; 1249 else 1250 return NULL; 1251 } 1252 } 1253 1254 if (entry->bitmap) { 1255 n = &entry->offset_index; 1256 while (1) { 1257 n = rb_prev(n); 1258 if (!n) 1259 break; 1260 prev = rb_entry(n, struct btrfs_free_space, 1261 offset_index); 1262 if (!prev->bitmap) { 1263 if (prev->offset + prev->bytes > offset) 1264 return prev; 1265 break; 1266 } 1267 } 1268 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1269 return entry; 1270 } else if (entry->offset + entry->bytes > offset) 1271 return entry; 1272 1273 if (!fuzzy) 1274 return NULL; 1275 1276 while (1) { 1277 if (entry->bitmap) { 1278 if (entry->offset + BITS_PER_BITMAP * 1279 ctl->unit > offset) 1280 break; 1281 } else { 1282 if (entry->offset + entry->bytes > offset) 1283 break; 1284 } 1285 1286 n = rb_next(&entry->offset_index); 1287 if (!n) 1288 return NULL; 1289 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1290 } 1291 return entry; 1292 } 1293 1294 static inline void 1295 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1296 struct btrfs_free_space *info) 1297 { 1298 rb_erase(&info->offset_index, &ctl->free_space_offset); 1299 ctl->free_extents--; 1300 } 1301 1302 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1303 struct btrfs_free_space *info) 1304 { 1305 __unlink_free_space(ctl, info); 1306 ctl->free_space -= info->bytes; 1307 } 1308 1309 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1310 struct btrfs_free_space *info) 1311 { 1312 int ret = 0; 1313 1314 BUG_ON(!info->bitmap && !info->bytes); 1315 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1316 &info->offset_index, (info->bitmap != NULL)); 1317 if (ret) 1318 return ret; 1319 1320 ctl->free_space += info->bytes; 1321 ctl->free_extents++; 1322 return ret; 1323 } 1324 1325 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1326 { 1327 struct btrfs_block_group_cache *block_group = ctl->private; 1328 u64 max_bytes; 1329 u64 bitmap_bytes; 1330 u64 extent_bytes; 1331 u64 size = block_group->key.offset; 1332 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1333 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1334 1335 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1336 1337 /* 1338 * The goal is to keep the total amount of memory used per 1gb of space 1339 * at or below 32k, so we need to adjust how much memory we allow to be 1340 * used by extent based free space tracking 1341 */ 1342 if (size < 1024 * 1024 * 1024) 1343 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1344 else 1345 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1346 div64_u64(size, 1024 * 1024 * 1024); 1347 1348 /* 1349 * we want to account for 1 more bitmap than what we have so we can make 1350 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1351 * we add more bitmaps. 1352 */ 1353 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1354 1355 if (bitmap_bytes >= max_bytes) { 1356 ctl->extents_thresh = 0; 1357 return; 1358 } 1359 1360 /* 1361 * we want the extent entry threshold to always be at most 1/2 the maxw 1362 * bytes we can have, or whatever is less than that. 1363 */ 1364 extent_bytes = max_bytes - bitmap_bytes; 1365 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1366 1367 ctl->extents_thresh = 1368 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1369 } 1370 1371 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1372 struct btrfs_free_space *info, 1373 u64 offset, u64 bytes) 1374 { 1375 unsigned long start, count; 1376 1377 start = offset_to_bit(info->offset, ctl->unit, offset); 1378 count = bytes_to_bits(bytes, ctl->unit); 1379 BUG_ON(start + count > BITS_PER_BITMAP); 1380 1381 bitmap_clear(info->bitmap, start, count); 1382 1383 info->bytes -= bytes; 1384 } 1385 1386 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1387 struct btrfs_free_space *info, u64 offset, 1388 u64 bytes) 1389 { 1390 __bitmap_clear_bits(ctl, info, offset, bytes); 1391 ctl->free_space -= bytes; 1392 } 1393 1394 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1395 struct btrfs_free_space *info, u64 offset, 1396 u64 bytes) 1397 { 1398 unsigned long start, count; 1399 1400 start = offset_to_bit(info->offset, ctl->unit, offset); 1401 count = bytes_to_bits(bytes, ctl->unit); 1402 BUG_ON(start + count > BITS_PER_BITMAP); 1403 1404 bitmap_set(info->bitmap, start, count); 1405 1406 info->bytes += bytes; 1407 ctl->free_space += bytes; 1408 } 1409 1410 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1411 struct btrfs_free_space *bitmap_info, u64 *offset, 1412 u64 *bytes) 1413 { 1414 unsigned long found_bits = 0; 1415 unsigned long bits, i; 1416 unsigned long next_zero; 1417 1418 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1419 max_t(u64, *offset, bitmap_info->offset)); 1420 bits = bytes_to_bits(*bytes, ctl->unit); 1421 1422 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1423 i < BITS_PER_BITMAP; 1424 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 1425 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1426 BITS_PER_BITMAP, i); 1427 if ((next_zero - i) >= bits) { 1428 found_bits = next_zero - i; 1429 break; 1430 } 1431 i = next_zero; 1432 } 1433 1434 if (found_bits) { 1435 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1436 *bytes = (u64)(found_bits) * ctl->unit; 1437 return 0; 1438 } 1439 1440 return -1; 1441 } 1442 1443 static struct btrfs_free_space * 1444 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes) 1445 { 1446 struct btrfs_free_space *entry; 1447 struct rb_node *node; 1448 int ret; 1449 1450 if (!ctl->free_space_offset.rb_node) 1451 return NULL; 1452 1453 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1454 if (!entry) 1455 return NULL; 1456 1457 for (node = &entry->offset_index; node; node = rb_next(node)) { 1458 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1459 if (entry->bytes < *bytes) 1460 continue; 1461 1462 if (entry->bitmap) { 1463 ret = search_bitmap(ctl, entry, offset, bytes); 1464 if (!ret) 1465 return entry; 1466 continue; 1467 } 1468 1469 *offset = entry->offset; 1470 *bytes = entry->bytes; 1471 return entry; 1472 } 1473 1474 return NULL; 1475 } 1476 1477 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1478 struct btrfs_free_space *info, u64 offset) 1479 { 1480 info->offset = offset_to_bitmap(ctl, offset); 1481 info->bytes = 0; 1482 INIT_LIST_HEAD(&info->list); 1483 link_free_space(ctl, info); 1484 ctl->total_bitmaps++; 1485 1486 ctl->op->recalc_thresholds(ctl); 1487 } 1488 1489 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1490 struct btrfs_free_space *bitmap_info) 1491 { 1492 unlink_free_space(ctl, bitmap_info); 1493 kfree(bitmap_info->bitmap); 1494 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1495 ctl->total_bitmaps--; 1496 ctl->op->recalc_thresholds(ctl); 1497 } 1498 1499 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1500 struct btrfs_free_space *bitmap_info, 1501 u64 *offset, u64 *bytes) 1502 { 1503 u64 end; 1504 u64 search_start, search_bytes; 1505 int ret; 1506 1507 again: 1508 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1509 1510 /* 1511 * XXX - this can go away after a few releases. 1512 * 1513 * since the only user of btrfs_remove_free_space is the tree logging 1514 * stuff, and the only way to test that is under crash conditions, we 1515 * want to have this debug stuff here just in case somethings not 1516 * working. Search the bitmap for the space we are trying to use to 1517 * make sure its actually there. If its not there then we need to stop 1518 * because something has gone wrong. 1519 */ 1520 search_start = *offset; 1521 search_bytes = *bytes; 1522 search_bytes = min(search_bytes, end - search_start + 1); 1523 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1524 BUG_ON(ret < 0 || search_start != *offset); 1525 1526 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1527 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1528 *bytes -= end - *offset + 1; 1529 *offset = end + 1; 1530 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1531 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1532 *bytes = 0; 1533 } 1534 1535 if (*bytes) { 1536 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1537 if (!bitmap_info->bytes) 1538 free_bitmap(ctl, bitmap_info); 1539 1540 /* 1541 * no entry after this bitmap, but we still have bytes to 1542 * remove, so something has gone wrong. 1543 */ 1544 if (!next) 1545 return -EINVAL; 1546 1547 bitmap_info = rb_entry(next, struct btrfs_free_space, 1548 offset_index); 1549 1550 /* 1551 * if the next entry isn't a bitmap we need to return to let the 1552 * extent stuff do its work. 1553 */ 1554 if (!bitmap_info->bitmap) 1555 return -EAGAIN; 1556 1557 /* 1558 * Ok the next item is a bitmap, but it may not actually hold 1559 * the information for the rest of this free space stuff, so 1560 * look for it, and if we don't find it return so we can try 1561 * everything over again. 1562 */ 1563 search_start = *offset; 1564 search_bytes = *bytes; 1565 ret = search_bitmap(ctl, bitmap_info, &search_start, 1566 &search_bytes); 1567 if (ret < 0 || search_start != *offset) 1568 return -EAGAIN; 1569 1570 goto again; 1571 } else if (!bitmap_info->bytes) 1572 free_bitmap(ctl, bitmap_info); 1573 1574 return 0; 1575 } 1576 1577 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1578 struct btrfs_free_space *info, u64 offset, 1579 u64 bytes) 1580 { 1581 u64 bytes_to_set = 0; 1582 u64 end; 1583 1584 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1585 1586 bytes_to_set = min(end - offset, bytes); 1587 1588 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1589 1590 return bytes_to_set; 1591 1592 } 1593 1594 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1595 struct btrfs_free_space *info) 1596 { 1597 struct btrfs_block_group_cache *block_group = ctl->private; 1598 1599 /* 1600 * If we are below the extents threshold then we can add this as an 1601 * extent, and don't have to deal with the bitmap 1602 */ 1603 if (ctl->free_extents < ctl->extents_thresh) { 1604 /* 1605 * If this block group has some small extents we don't want to 1606 * use up all of our free slots in the cache with them, we want 1607 * to reserve them to larger extents, however if we have plent 1608 * of cache left then go ahead an dadd them, no sense in adding 1609 * the overhead of a bitmap if we don't have to. 1610 */ 1611 if (info->bytes <= block_group->sectorsize * 4) { 1612 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1613 return false; 1614 } else { 1615 return false; 1616 } 1617 } 1618 1619 /* 1620 * some block groups are so tiny they can't be enveloped by a bitmap, so 1621 * don't even bother to create a bitmap for this 1622 */ 1623 if (BITS_PER_BITMAP * block_group->sectorsize > 1624 block_group->key.offset) 1625 return false; 1626 1627 return true; 1628 } 1629 1630 static struct btrfs_free_space_op free_space_op = { 1631 .recalc_thresholds = recalculate_thresholds, 1632 .use_bitmap = use_bitmap, 1633 }; 1634 1635 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1636 struct btrfs_free_space *info) 1637 { 1638 struct btrfs_free_space *bitmap_info; 1639 struct btrfs_block_group_cache *block_group = NULL; 1640 int added = 0; 1641 u64 bytes, offset, bytes_added; 1642 int ret; 1643 1644 bytes = info->bytes; 1645 offset = info->offset; 1646 1647 if (!ctl->op->use_bitmap(ctl, info)) 1648 return 0; 1649 1650 if (ctl->op == &free_space_op) 1651 block_group = ctl->private; 1652 again: 1653 /* 1654 * Since we link bitmaps right into the cluster we need to see if we 1655 * have a cluster here, and if so and it has our bitmap we need to add 1656 * the free space to that bitmap. 1657 */ 1658 if (block_group && !list_empty(&block_group->cluster_list)) { 1659 struct btrfs_free_cluster *cluster; 1660 struct rb_node *node; 1661 struct btrfs_free_space *entry; 1662 1663 cluster = list_entry(block_group->cluster_list.next, 1664 struct btrfs_free_cluster, 1665 block_group_list); 1666 spin_lock(&cluster->lock); 1667 node = rb_first(&cluster->root); 1668 if (!node) { 1669 spin_unlock(&cluster->lock); 1670 goto no_cluster_bitmap; 1671 } 1672 1673 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1674 if (!entry->bitmap) { 1675 spin_unlock(&cluster->lock); 1676 goto no_cluster_bitmap; 1677 } 1678 1679 if (entry->offset == offset_to_bitmap(ctl, offset)) { 1680 bytes_added = add_bytes_to_bitmap(ctl, entry, 1681 offset, bytes); 1682 bytes -= bytes_added; 1683 offset += bytes_added; 1684 } 1685 spin_unlock(&cluster->lock); 1686 if (!bytes) { 1687 ret = 1; 1688 goto out; 1689 } 1690 } 1691 1692 no_cluster_bitmap: 1693 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1694 1, 0); 1695 if (!bitmap_info) { 1696 BUG_ON(added); 1697 goto new_bitmap; 1698 } 1699 1700 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 1701 bytes -= bytes_added; 1702 offset += bytes_added; 1703 added = 0; 1704 1705 if (!bytes) { 1706 ret = 1; 1707 goto out; 1708 } else 1709 goto again; 1710 1711 new_bitmap: 1712 if (info && info->bitmap) { 1713 add_new_bitmap(ctl, info, offset); 1714 added = 1; 1715 info = NULL; 1716 goto again; 1717 } else { 1718 spin_unlock(&ctl->tree_lock); 1719 1720 /* no pre-allocated info, allocate a new one */ 1721 if (!info) { 1722 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1723 GFP_NOFS); 1724 if (!info) { 1725 spin_lock(&ctl->tree_lock); 1726 ret = -ENOMEM; 1727 goto out; 1728 } 1729 } 1730 1731 /* allocate the bitmap */ 1732 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1733 spin_lock(&ctl->tree_lock); 1734 if (!info->bitmap) { 1735 ret = -ENOMEM; 1736 goto out; 1737 } 1738 goto again; 1739 } 1740 1741 out: 1742 if (info) { 1743 if (info->bitmap) 1744 kfree(info->bitmap); 1745 kmem_cache_free(btrfs_free_space_cachep, info); 1746 } 1747 1748 return ret; 1749 } 1750 1751 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 1752 struct btrfs_free_space *info, bool update_stat) 1753 { 1754 struct btrfs_free_space *left_info; 1755 struct btrfs_free_space *right_info; 1756 bool merged = false; 1757 u64 offset = info->offset; 1758 u64 bytes = info->bytes; 1759 1760 /* 1761 * first we want to see if there is free space adjacent to the range we 1762 * are adding, if there is remove that struct and add a new one to 1763 * cover the entire range 1764 */ 1765 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 1766 if (right_info && rb_prev(&right_info->offset_index)) 1767 left_info = rb_entry(rb_prev(&right_info->offset_index), 1768 struct btrfs_free_space, offset_index); 1769 else 1770 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 1771 1772 if (right_info && !right_info->bitmap) { 1773 if (update_stat) 1774 unlink_free_space(ctl, right_info); 1775 else 1776 __unlink_free_space(ctl, right_info); 1777 info->bytes += right_info->bytes; 1778 kmem_cache_free(btrfs_free_space_cachep, right_info); 1779 merged = true; 1780 } 1781 1782 if (left_info && !left_info->bitmap && 1783 left_info->offset + left_info->bytes == offset) { 1784 if (update_stat) 1785 unlink_free_space(ctl, left_info); 1786 else 1787 __unlink_free_space(ctl, left_info); 1788 info->offset = left_info->offset; 1789 info->bytes += left_info->bytes; 1790 kmem_cache_free(btrfs_free_space_cachep, left_info); 1791 merged = true; 1792 } 1793 1794 return merged; 1795 } 1796 1797 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 1798 u64 offset, u64 bytes) 1799 { 1800 struct btrfs_free_space *info; 1801 int ret = 0; 1802 1803 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 1804 if (!info) 1805 return -ENOMEM; 1806 1807 info->offset = offset; 1808 info->bytes = bytes; 1809 1810 spin_lock(&ctl->tree_lock); 1811 1812 if (try_merge_free_space(ctl, info, true)) 1813 goto link; 1814 1815 /* 1816 * There was no extent directly to the left or right of this new 1817 * extent then we know we're going to have to allocate a new extent, so 1818 * before we do that see if we need to drop this into a bitmap 1819 */ 1820 ret = insert_into_bitmap(ctl, info); 1821 if (ret < 0) { 1822 goto out; 1823 } else if (ret) { 1824 ret = 0; 1825 goto out; 1826 } 1827 link: 1828 ret = link_free_space(ctl, info); 1829 if (ret) 1830 kmem_cache_free(btrfs_free_space_cachep, info); 1831 out: 1832 spin_unlock(&ctl->tree_lock); 1833 1834 if (ret) { 1835 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1836 BUG_ON(ret == -EEXIST); 1837 } 1838 1839 return ret; 1840 } 1841 1842 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1843 u64 offset, u64 bytes) 1844 { 1845 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1846 struct btrfs_free_space *info; 1847 struct btrfs_free_space *next_info = NULL; 1848 int ret = 0; 1849 1850 spin_lock(&ctl->tree_lock); 1851 1852 again: 1853 info = tree_search_offset(ctl, offset, 0, 0); 1854 if (!info) { 1855 /* 1856 * oops didn't find an extent that matched the space we wanted 1857 * to remove, look for a bitmap instead 1858 */ 1859 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1860 1, 0); 1861 if (!info) { 1862 /* the tree logging code might be calling us before we 1863 * have fully loaded the free space rbtree for this 1864 * block group. So it is possible the entry won't 1865 * be in the rbtree yet at all. The caching code 1866 * will make sure not to put it in the rbtree if 1867 * the logging code has pinned it. 1868 */ 1869 goto out_lock; 1870 } 1871 } 1872 1873 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1874 u64 end; 1875 next_info = rb_entry(rb_next(&info->offset_index), 1876 struct btrfs_free_space, 1877 offset_index); 1878 1879 if (next_info->bitmap) 1880 end = next_info->offset + 1881 BITS_PER_BITMAP * ctl->unit - 1; 1882 else 1883 end = next_info->offset + next_info->bytes; 1884 1885 if (next_info->bytes < bytes || 1886 next_info->offset > offset || offset > end) { 1887 printk(KERN_CRIT "Found free space at %llu, size %llu," 1888 " trying to use %llu\n", 1889 (unsigned long long)info->offset, 1890 (unsigned long long)info->bytes, 1891 (unsigned long long)bytes); 1892 WARN_ON(1); 1893 ret = -EINVAL; 1894 goto out_lock; 1895 } 1896 1897 info = next_info; 1898 } 1899 1900 if (info->bytes == bytes) { 1901 unlink_free_space(ctl, info); 1902 if (info->bitmap) { 1903 kfree(info->bitmap); 1904 ctl->total_bitmaps--; 1905 } 1906 kmem_cache_free(btrfs_free_space_cachep, info); 1907 ret = 0; 1908 goto out_lock; 1909 } 1910 1911 if (!info->bitmap && info->offset == offset) { 1912 unlink_free_space(ctl, info); 1913 info->offset += bytes; 1914 info->bytes -= bytes; 1915 ret = link_free_space(ctl, info); 1916 WARN_ON(ret); 1917 goto out_lock; 1918 } 1919 1920 if (!info->bitmap && info->offset <= offset && 1921 info->offset + info->bytes >= offset + bytes) { 1922 u64 old_start = info->offset; 1923 /* 1924 * we're freeing space in the middle of the info, 1925 * this can happen during tree log replay 1926 * 1927 * first unlink the old info and then 1928 * insert it again after the hole we're creating 1929 */ 1930 unlink_free_space(ctl, info); 1931 if (offset + bytes < info->offset + info->bytes) { 1932 u64 old_end = info->offset + info->bytes; 1933 1934 info->offset = offset + bytes; 1935 info->bytes = old_end - info->offset; 1936 ret = link_free_space(ctl, info); 1937 WARN_ON(ret); 1938 if (ret) 1939 goto out_lock; 1940 } else { 1941 /* the hole we're creating ends at the end 1942 * of the info struct, just free the info 1943 */ 1944 kmem_cache_free(btrfs_free_space_cachep, info); 1945 } 1946 spin_unlock(&ctl->tree_lock); 1947 1948 /* step two, insert a new info struct to cover 1949 * anything before the hole 1950 */ 1951 ret = btrfs_add_free_space(block_group, old_start, 1952 offset - old_start); 1953 WARN_ON(ret); /* -ENOMEM */ 1954 goto out; 1955 } 1956 1957 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1958 if (ret == -EAGAIN) 1959 goto again; 1960 BUG_ON(ret); /* logic error */ 1961 out_lock: 1962 spin_unlock(&ctl->tree_lock); 1963 out: 1964 return ret; 1965 } 1966 1967 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1968 u64 bytes) 1969 { 1970 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1971 struct btrfs_free_space *info; 1972 struct rb_node *n; 1973 int count = 0; 1974 1975 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 1976 info = rb_entry(n, struct btrfs_free_space, offset_index); 1977 if (info->bytes >= bytes) 1978 count++; 1979 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1980 (unsigned long long)info->offset, 1981 (unsigned long long)info->bytes, 1982 (info->bitmap) ? "yes" : "no"); 1983 } 1984 printk(KERN_INFO "block group has cluster?: %s\n", 1985 list_empty(&block_group->cluster_list) ? "no" : "yes"); 1986 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 1987 "\n", count); 1988 } 1989 1990 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1991 { 1992 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1993 1994 spin_lock_init(&ctl->tree_lock); 1995 ctl->unit = block_group->sectorsize; 1996 ctl->start = block_group->key.objectid; 1997 ctl->private = block_group; 1998 ctl->op = &free_space_op; 1999 2000 /* 2001 * we only want to have 32k of ram per block group for keeping 2002 * track of free space, and if we pass 1/2 of that we want to 2003 * start converting things over to using bitmaps 2004 */ 2005 ctl->extents_thresh = ((1024 * 32) / 2) / 2006 sizeof(struct btrfs_free_space); 2007 } 2008 2009 /* 2010 * for a given cluster, put all of its extents back into the free 2011 * space cache. If the block group passed doesn't match the block group 2012 * pointed to by the cluster, someone else raced in and freed the 2013 * cluster already. In that case, we just return without changing anything 2014 */ 2015 static int 2016 __btrfs_return_cluster_to_free_space( 2017 struct btrfs_block_group_cache *block_group, 2018 struct btrfs_free_cluster *cluster) 2019 { 2020 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2021 struct btrfs_free_space *entry; 2022 struct rb_node *node; 2023 2024 spin_lock(&cluster->lock); 2025 if (cluster->block_group != block_group) 2026 goto out; 2027 2028 cluster->block_group = NULL; 2029 cluster->window_start = 0; 2030 list_del_init(&cluster->block_group_list); 2031 2032 node = rb_first(&cluster->root); 2033 while (node) { 2034 bool bitmap; 2035 2036 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2037 node = rb_next(&entry->offset_index); 2038 rb_erase(&entry->offset_index, &cluster->root); 2039 2040 bitmap = (entry->bitmap != NULL); 2041 if (!bitmap) 2042 try_merge_free_space(ctl, entry, false); 2043 tree_insert_offset(&ctl->free_space_offset, 2044 entry->offset, &entry->offset_index, bitmap); 2045 } 2046 cluster->root = RB_ROOT; 2047 2048 out: 2049 spin_unlock(&cluster->lock); 2050 btrfs_put_block_group(block_group); 2051 return 0; 2052 } 2053 2054 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) 2055 { 2056 struct btrfs_free_space *info; 2057 struct rb_node *node; 2058 2059 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2060 info = rb_entry(node, struct btrfs_free_space, offset_index); 2061 if (!info->bitmap) { 2062 unlink_free_space(ctl, info); 2063 kmem_cache_free(btrfs_free_space_cachep, info); 2064 } else { 2065 free_bitmap(ctl, info); 2066 } 2067 if (need_resched()) { 2068 spin_unlock(&ctl->tree_lock); 2069 cond_resched(); 2070 spin_lock(&ctl->tree_lock); 2071 } 2072 } 2073 } 2074 2075 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2076 { 2077 spin_lock(&ctl->tree_lock); 2078 __btrfs_remove_free_space_cache_locked(ctl); 2079 spin_unlock(&ctl->tree_lock); 2080 } 2081 2082 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2083 { 2084 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2085 struct btrfs_free_cluster *cluster; 2086 struct list_head *head; 2087 2088 spin_lock(&ctl->tree_lock); 2089 while ((head = block_group->cluster_list.next) != 2090 &block_group->cluster_list) { 2091 cluster = list_entry(head, struct btrfs_free_cluster, 2092 block_group_list); 2093 2094 WARN_ON(cluster->block_group != block_group); 2095 __btrfs_return_cluster_to_free_space(block_group, cluster); 2096 if (need_resched()) { 2097 spin_unlock(&ctl->tree_lock); 2098 cond_resched(); 2099 spin_lock(&ctl->tree_lock); 2100 } 2101 } 2102 __btrfs_remove_free_space_cache_locked(ctl); 2103 spin_unlock(&ctl->tree_lock); 2104 2105 } 2106 2107 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2108 u64 offset, u64 bytes, u64 empty_size) 2109 { 2110 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2111 struct btrfs_free_space *entry = NULL; 2112 u64 bytes_search = bytes + empty_size; 2113 u64 ret = 0; 2114 2115 spin_lock(&ctl->tree_lock); 2116 entry = find_free_space(ctl, &offset, &bytes_search); 2117 if (!entry) 2118 goto out; 2119 2120 ret = offset; 2121 if (entry->bitmap) { 2122 bitmap_clear_bits(ctl, entry, offset, bytes); 2123 if (!entry->bytes) 2124 free_bitmap(ctl, entry); 2125 } else { 2126 unlink_free_space(ctl, entry); 2127 entry->offset += bytes; 2128 entry->bytes -= bytes; 2129 if (!entry->bytes) 2130 kmem_cache_free(btrfs_free_space_cachep, entry); 2131 else 2132 link_free_space(ctl, entry); 2133 } 2134 2135 out: 2136 spin_unlock(&ctl->tree_lock); 2137 2138 return ret; 2139 } 2140 2141 /* 2142 * given a cluster, put all of its extents back into the free space 2143 * cache. If a block group is passed, this function will only free 2144 * a cluster that belongs to the passed block group. 2145 * 2146 * Otherwise, it'll get a reference on the block group pointed to by the 2147 * cluster and remove the cluster from it. 2148 */ 2149 int btrfs_return_cluster_to_free_space( 2150 struct btrfs_block_group_cache *block_group, 2151 struct btrfs_free_cluster *cluster) 2152 { 2153 struct btrfs_free_space_ctl *ctl; 2154 int ret; 2155 2156 /* first, get a safe pointer to the block group */ 2157 spin_lock(&cluster->lock); 2158 if (!block_group) { 2159 block_group = cluster->block_group; 2160 if (!block_group) { 2161 spin_unlock(&cluster->lock); 2162 return 0; 2163 } 2164 } else if (cluster->block_group != block_group) { 2165 /* someone else has already freed it don't redo their work */ 2166 spin_unlock(&cluster->lock); 2167 return 0; 2168 } 2169 atomic_inc(&block_group->count); 2170 spin_unlock(&cluster->lock); 2171 2172 ctl = block_group->free_space_ctl; 2173 2174 /* now return any extents the cluster had on it */ 2175 spin_lock(&ctl->tree_lock); 2176 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2177 spin_unlock(&ctl->tree_lock); 2178 2179 /* finally drop our ref */ 2180 btrfs_put_block_group(block_group); 2181 return ret; 2182 } 2183 2184 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2185 struct btrfs_free_cluster *cluster, 2186 struct btrfs_free_space *entry, 2187 u64 bytes, u64 min_start) 2188 { 2189 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2190 int err; 2191 u64 search_start = cluster->window_start; 2192 u64 search_bytes = bytes; 2193 u64 ret = 0; 2194 2195 search_start = min_start; 2196 search_bytes = bytes; 2197 2198 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2199 if (err) 2200 return 0; 2201 2202 ret = search_start; 2203 __bitmap_clear_bits(ctl, entry, ret, bytes); 2204 2205 return ret; 2206 } 2207 2208 /* 2209 * given a cluster, try to allocate 'bytes' from it, returns 0 2210 * if it couldn't find anything suitably large, or a logical disk offset 2211 * if things worked out 2212 */ 2213 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2214 struct btrfs_free_cluster *cluster, u64 bytes, 2215 u64 min_start) 2216 { 2217 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2218 struct btrfs_free_space *entry = NULL; 2219 struct rb_node *node; 2220 u64 ret = 0; 2221 2222 spin_lock(&cluster->lock); 2223 if (bytes > cluster->max_size) 2224 goto out; 2225 2226 if (cluster->block_group != block_group) 2227 goto out; 2228 2229 node = rb_first(&cluster->root); 2230 if (!node) 2231 goto out; 2232 2233 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2234 while(1) { 2235 if (entry->bytes < bytes || 2236 (!entry->bitmap && entry->offset < min_start)) { 2237 node = rb_next(&entry->offset_index); 2238 if (!node) 2239 break; 2240 entry = rb_entry(node, struct btrfs_free_space, 2241 offset_index); 2242 continue; 2243 } 2244 2245 if (entry->bitmap) { 2246 ret = btrfs_alloc_from_bitmap(block_group, 2247 cluster, entry, bytes, 2248 cluster->window_start); 2249 if (ret == 0) { 2250 node = rb_next(&entry->offset_index); 2251 if (!node) 2252 break; 2253 entry = rb_entry(node, struct btrfs_free_space, 2254 offset_index); 2255 continue; 2256 } 2257 cluster->window_start += bytes; 2258 } else { 2259 ret = entry->offset; 2260 2261 entry->offset += bytes; 2262 entry->bytes -= bytes; 2263 } 2264 2265 if (entry->bytes == 0) 2266 rb_erase(&entry->offset_index, &cluster->root); 2267 break; 2268 } 2269 out: 2270 spin_unlock(&cluster->lock); 2271 2272 if (!ret) 2273 return 0; 2274 2275 spin_lock(&ctl->tree_lock); 2276 2277 ctl->free_space -= bytes; 2278 if (entry->bytes == 0) { 2279 ctl->free_extents--; 2280 if (entry->bitmap) { 2281 kfree(entry->bitmap); 2282 ctl->total_bitmaps--; 2283 ctl->op->recalc_thresholds(ctl); 2284 } 2285 kmem_cache_free(btrfs_free_space_cachep, entry); 2286 } 2287 2288 spin_unlock(&ctl->tree_lock); 2289 2290 return ret; 2291 } 2292 2293 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2294 struct btrfs_free_space *entry, 2295 struct btrfs_free_cluster *cluster, 2296 u64 offset, u64 bytes, 2297 u64 cont1_bytes, u64 min_bytes) 2298 { 2299 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2300 unsigned long next_zero; 2301 unsigned long i; 2302 unsigned long want_bits; 2303 unsigned long min_bits; 2304 unsigned long found_bits; 2305 unsigned long start = 0; 2306 unsigned long total_found = 0; 2307 int ret; 2308 2309 i = offset_to_bit(entry->offset, block_group->sectorsize, 2310 max_t(u64, offset, entry->offset)); 2311 want_bits = bytes_to_bits(bytes, block_group->sectorsize); 2312 min_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 2313 2314 again: 2315 found_bits = 0; 2316 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 2317 i < BITS_PER_BITMAP; 2318 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 2319 next_zero = find_next_zero_bit(entry->bitmap, 2320 BITS_PER_BITMAP, i); 2321 if (next_zero - i >= min_bits) { 2322 found_bits = next_zero - i; 2323 break; 2324 } 2325 i = next_zero; 2326 } 2327 2328 if (!found_bits) 2329 return -ENOSPC; 2330 2331 if (!total_found) { 2332 start = i; 2333 cluster->max_size = 0; 2334 } 2335 2336 total_found += found_bits; 2337 2338 if (cluster->max_size < found_bits * block_group->sectorsize) 2339 cluster->max_size = found_bits * block_group->sectorsize; 2340 2341 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2342 i = next_zero + 1; 2343 goto again; 2344 } 2345 2346 cluster->window_start = start * block_group->sectorsize + 2347 entry->offset; 2348 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2349 ret = tree_insert_offset(&cluster->root, entry->offset, 2350 &entry->offset_index, 1); 2351 BUG_ON(ret); /* -EEXIST; Logic error */ 2352 2353 trace_btrfs_setup_cluster(block_group, cluster, 2354 total_found * block_group->sectorsize, 1); 2355 return 0; 2356 } 2357 2358 /* 2359 * This searches the block group for just extents to fill the cluster with. 2360 * Try to find a cluster with at least bytes total bytes, at least one 2361 * extent of cont1_bytes, and other clusters of at least min_bytes. 2362 */ 2363 static noinline int 2364 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2365 struct btrfs_free_cluster *cluster, 2366 struct list_head *bitmaps, u64 offset, u64 bytes, 2367 u64 cont1_bytes, u64 min_bytes) 2368 { 2369 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2370 struct btrfs_free_space *first = NULL; 2371 struct btrfs_free_space *entry = NULL; 2372 struct btrfs_free_space *last; 2373 struct rb_node *node; 2374 u64 window_start; 2375 u64 window_free; 2376 u64 max_extent; 2377 u64 total_size = 0; 2378 2379 entry = tree_search_offset(ctl, offset, 0, 1); 2380 if (!entry) 2381 return -ENOSPC; 2382 2383 /* 2384 * We don't want bitmaps, so just move along until we find a normal 2385 * extent entry. 2386 */ 2387 while (entry->bitmap || entry->bytes < min_bytes) { 2388 if (entry->bitmap && list_empty(&entry->list)) 2389 list_add_tail(&entry->list, bitmaps); 2390 node = rb_next(&entry->offset_index); 2391 if (!node) 2392 return -ENOSPC; 2393 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2394 } 2395 2396 window_start = entry->offset; 2397 window_free = entry->bytes; 2398 max_extent = entry->bytes; 2399 first = entry; 2400 last = entry; 2401 2402 for (node = rb_next(&entry->offset_index); node; 2403 node = rb_next(&entry->offset_index)) { 2404 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2405 2406 if (entry->bitmap) { 2407 if (list_empty(&entry->list)) 2408 list_add_tail(&entry->list, bitmaps); 2409 continue; 2410 } 2411 2412 if (entry->bytes < min_bytes) 2413 continue; 2414 2415 last = entry; 2416 window_free += entry->bytes; 2417 if (entry->bytes > max_extent) 2418 max_extent = entry->bytes; 2419 } 2420 2421 if (window_free < bytes || max_extent < cont1_bytes) 2422 return -ENOSPC; 2423 2424 cluster->window_start = first->offset; 2425 2426 node = &first->offset_index; 2427 2428 /* 2429 * now we've found our entries, pull them out of the free space 2430 * cache and put them into the cluster rbtree 2431 */ 2432 do { 2433 int ret; 2434 2435 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2436 node = rb_next(&entry->offset_index); 2437 if (entry->bitmap || entry->bytes < min_bytes) 2438 continue; 2439 2440 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2441 ret = tree_insert_offset(&cluster->root, entry->offset, 2442 &entry->offset_index, 0); 2443 total_size += entry->bytes; 2444 BUG_ON(ret); /* -EEXIST; Logic error */ 2445 } while (node && entry != last); 2446 2447 cluster->max_size = max_extent; 2448 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2449 return 0; 2450 } 2451 2452 /* 2453 * This specifically looks for bitmaps that may work in the cluster, we assume 2454 * that we have already failed to find extents that will work. 2455 */ 2456 static noinline int 2457 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2458 struct btrfs_free_cluster *cluster, 2459 struct list_head *bitmaps, u64 offset, u64 bytes, 2460 u64 cont1_bytes, u64 min_bytes) 2461 { 2462 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2463 struct btrfs_free_space *entry; 2464 int ret = -ENOSPC; 2465 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2466 2467 if (ctl->total_bitmaps == 0) 2468 return -ENOSPC; 2469 2470 /* 2471 * The bitmap that covers offset won't be in the list unless offset 2472 * is just its start offset. 2473 */ 2474 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2475 if (entry->offset != bitmap_offset) { 2476 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2477 if (entry && list_empty(&entry->list)) 2478 list_add(&entry->list, bitmaps); 2479 } 2480 2481 list_for_each_entry(entry, bitmaps, list) { 2482 if (entry->bytes < bytes) 2483 continue; 2484 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2485 bytes, cont1_bytes, min_bytes); 2486 if (!ret) 2487 return 0; 2488 } 2489 2490 /* 2491 * The bitmaps list has all the bitmaps that record free space 2492 * starting after offset, so no more search is required. 2493 */ 2494 return -ENOSPC; 2495 } 2496 2497 /* 2498 * here we try to find a cluster of blocks in a block group. The goal 2499 * is to find at least bytes+empty_size. 2500 * We might not find them all in one contiguous area. 2501 * 2502 * returns zero and sets up cluster if things worked out, otherwise 2503 * it returns -enospc 2504 */ 2505 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2506 struct btrfs_root *root, 2507 struct btrfs_block_group_cache *block_group, 2508 struct btrfs_free_cluster *cluster, 2509 u64 offset, u64 bytes, u64 empty_size) 2510 { 2511 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2512 struct btrfs_free_space *entry, *tmp; 2513 LIST_HEAD(bitmaps); 2514 u64 min_bytes; 2515 u64 cont1_bytes; 2516 int ret; 2517 2518 /* 2519 * Choose the minimum extent size we'll require for this 2520 * cluster. For SSD_SPREAD, don't allow any fragmentation. 2521 * For metadata, allow allocates with smaller extents. For 2522 * data, keep it dense. 2523 */ 2524 if (btrfs_test_opt(root, SSD_SPREAD)) { 2525 cont1_bytes = min_bytes = bytes + empty_size; 2526 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2527 cont1_bytes = bytes; 2528 min_bytes = block_group->sectorsize; 2529 } else { 2530 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 2531 min_bytes = block_group->sectorsize; 2532 } 2533 2534 spin_lock(&ctl->tree_lock); 2535 2536 /* 2537 * If we know we don't have enough space to make a cluster don't even 2538 * bother doing all the work to try and find one. 2539 */ 2540 if (ctl->free_space < bytes) { 2541 spin_unlock(&ctl->tree_lock); 2542 return -ENOSPC; 2543 } 2544 2545 spin_lock(&cluster->lock); 2546 2547 /* someone already found a cluster, hooray */ 2548 if (cluster->block_group) { 2549 ret = 0; 2550 goto out; 2551 } 2552 2553 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 2554 min_bytes); 2555 2556 INIT_LIST_HEAD(&bitmaps); 2557 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2558 bytes + empty_size, 2559 cont1_bytes, min_bytes); 2560 if (ret) 2561 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 2562 offset, bytes + empty_size, 2563 cont1_bytes, min_bytes); 2564 2565 /* Clear our temporary list */ 2566 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 2567 list_del_init(&entry->list); 2568 2569 if (!ret) { 2570 atomic_inc(&block_group->count); 2571 list_add_tail(&cluster->block_group_list, 2572 &block_group->cluster_list); 2573 cluster->block_group = block_group; 2574 } else { 2575 trace_btrfs_failed_cluster_setup(block_group); 2576 } 2577 out: 2578 spin_unlock(&cluster->lock); 2579 spin_unlock(&ctl->tree_lock); 2580 2581 return ret; 2582 } 2583 2584 /* 2585 * simple code to zero out a cluster 2586 */ 2587 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 2588 { 2589 spin_lock_init(&cluster->lock); 2590 spin_lock_init(&cluster->refill_lock); 2591 cluster->root = RB_ROOT; 2592 cluster->max_size = 0; 2593 INIT_LIST_HEAD(&cluster->block_group_list); 2594 cluster->block_group = NULL; 2595 } 2596 2597 static int do_trimming(struct btrfs_block_group_cache *block_group, 2598 u64 *total_trimmed, u64 start, u64 bytes, 2599 u64 reserved_start, u64 reserved_bytes) 2600 { 2601 struct btrfs_space_info *space_info = block_group->space_info; 2602 struct btrfs_fs_info *fs_info = block_group->fs_info; 2603 int ret; 2604 int update = 0; 2605 u64 trimmed = 0; 2606 2607 spin_lock(&space_info->lock); 2608 spin_lock(&block_group->lock); 2609 if (!block_group->ro) { 2610 block_group->reserved += reserved_bytes; 2611 space_info->bytes_reserved += reserved_bytes; 2612 update = 1; 2613 } 2614 spin_unlock(&block_group->lock); 2615 spin_unlock(&space_info->lock); 2616 2617 ret = btrfs_error_discard_extent(fs_info->extent_root, 2618 start, bytes, &trimmed); 2619 if (!ret) 2620 *total_trimmed += trimmed; 2621 2622 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 2623 2624 if (update) { 2625 spin_lock(&space_info->lock); 2626 spin_lock(&block_group->lock); 2627 if (block_group->ro) 2628 space_info->bytes_readonly += reserved_bytes; 2629 block_group->reserved -= reserved_bytes; 2630 space_info->bytes_reserved -= reserved_bytes; 2631 spin_unlock(&space_info->lock); 2632 spin_unlock(&block_group->lock); 2633 } 2634 2635 return ret; 2636 } 2637 2638 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 2639 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2640 { 2641 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2642 struct btrfs_free_space *entry; 2643 struct rb_node *node; 2644 int ret = 0; 2645 u64 extent_start; 2646 u64 extent_bytes; 2647 u64 bytes; 2648 2649 while (start < end) { 2650 spin_lock(&ctl->tree_lock); 2651 2652 if (ctl->free_space < minlen) { 2653 spin_unlock(&ctl->tree_lock); 2654 break; 2655 } 2656 2657 entry = tree_search_offset(ctl, start, 0, 1); 2658 if (!entry) { 2659 spin_unlock(&ctl->tree_lock); 2660 break; 2661 } 2662 2663 /* skip bitmaps */ 2664 while (entry->bitmap) { 2665 node = rb_next(&entry->offset_index); 2666 if (!node) { 2667 spin_unlock(&ctl->tree_lock); 2668 goto out; 2669 } 2670 entry = rb_entry(node, struct btrfs_free_space, 2671 offset_index); 2672 } 2673 2674 if (entry->offset >= end) { 2675 spin_unlock(&ctl->tree_lock); 2676 break; 2677 } 2678 2679 extent_start = entry->offset; 2680 extent_bytes = entry->bytes; 2681 start = max(start, extent_start); 2682 bytes = min(extent_start + extent_bytes, end) - start; 2683 if (bytes < minlen) { 2684 spin_unlock(&ctl->tree_lock); 2685 goto next; 2686 } 2687 2688 unlink_free_space(ctl, entry); 2689 kmem_cache_free(btrfs_free_space_cachep, entry); 2690 2691 spin_unlock(&ctl->tree_lock); 2692 2693 ret = do_trimming(block_group, total_trimmed, start, bytes, 2694 extent_start, extent_bytes); 2695 if (ret) 2696 break; 2697 next: 2698 start += bytes; 2699 2700 if (fatal_signal_pending(current)) { 2701 ret = -ERESTARTSYS; 2702 break; 2703 } 2704 2705 cond_resched(); 2706 } 2707 out: 2708 return ret; 2709 } 2710 2711 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 2712 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2713 { 2714 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2715 struct btrfs_free_space *entry; 2716 int ret = 0; 2717 int ret2; 2718 u64 bytes; 2719 u64 offset = offset_to_bitmap(ctl, start); 2720 2721 while (offset < end) { 2722 bool next_bitmap = false; 2723 2724 spin_lock(&ctl->tree_lock); 2725 2726 if (ctl->free_space < minlen) { 2727 spin_unlock(&ctl->tree_lock); 2728 break; 2729 } 2730 2731 entry = tree_search_offset(ctl, offset, 1, 0); 2732 if (!entry) { 2733 spin_unlock(&ctl->tree_lock); 2734 next_bitmap = true; 2735 goto next; 2736 } 2737 2738 bytes = minlen; 2739 ret2 = search_bitmap(ctl, entry, &start, &bytes); 2740 if (ret2 || start >= end) { 2741 spin_unlock(&ctl->tree_lock); 2742 next_bitmap = true; 2743 goto next; 2744 } 2745 2746 bytes = min(bytes, end - start); 2747 if (bytes < minlen) { 2748 spin_unlock(&ctl->tree_lock); 2749 goto next; 2750 } 2751 2752 bitmap_clear_bits(ctl, entry, start, bytes); 2753 if (entry->bytes == 0) 2754 free_bitmap(ctl, entry); 2755 2756 spin_unlock(&ctl->tree_lock); 2757 2758 ret = do_trimming(block_group, total_trimmed, start, bytes, 2759 start, bytes); 2760 if (ret) 2761 break; 2762 next: 2763 if (next_bitmap) { 2764 offset += BITS_PER_BITMAP * ctl->unit; 2765 } else { 2766 start += bytes; 2767 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 2768 offset += BITS_PER_BITMAP * ctl->unit; 2769 } 2770 2771 if (fatal_signal_pending(current)) { 2772 ret = -ERESTARTSYS; 2773 break; 2774 } 2775 2776 cond_resched(); 2777 } 2778 2779 return ret; 2780 } 2781 2782 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2783 u64 *trimmed, u64 start, u64 end, u64 minlen) 2784 { 2785 int ret; 2786 2787 *trimmed = 0; 2788 2789 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 2790 if (ret) 2791 return ret; 2792 2793 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 2794 2795 return ret; 2796 } 2797 2798 /* 2799 * Find the left-most item in the cache tree, and then return the 2800 * smallest inode number in the item. 2801 * 2802 * Note: the returned inode number may not be the smallest one in 2803 * the tree, if the left-most item is a bitmap. 2804 */ 2805 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 2806 { 2807 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 2808 struct btrfs_free_space *entry = NULL; 2809 u64 ino = 0; 2810 2811 spin_lock(&ctl->tree_lock); 2812 2813 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 2814 goto out; 2815 2816 entry = rb_entry(rb_first(&ctl->free_space_offset), 2817 struct btrfs_free_space, offset_index); 2818 2819 if (!entry->bitmap) { 2820 ino = entry->offset; 2821 2822 unlink_free_space(ctl, entry); 2823 entry->offset++; 2824 entry->bytes--; 2825 if (!entry->bytes) 2826 kmem_cache_free(btrfs_free_space_cachep, entry); 2827 else 2828 link_free_space(ctl, entry); 2829 } else { 2830 u64 offset = 0; 2831 u64 count = 1; 2832 int ret; 2833 2834 ret = search_bitmap(ctl, entry, &offset, &count); 2835 /* Logic error; Should be empty if it can't find anything */ 2836 BUG_ON(ret); 2837 2838 ino = offset; 2839 bitmap_clear_bits(ctl, entry, offset, 1); 2840 if (entry->bytes == 0) 2841 free_bitmap(ctl, entry); 2842 } 2843 out: 2844 spin_unlock(&ctl->tree_lock); 2845 2846 return ino; 2847 } 2848 2849 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 2850 struct btrfs_path *path) 2851 { 2852 struct inode *inode = NULL; 2853 2854 spin_lock(&root->cache_lock); 2855 if (root->cache_inode) 2856 inode = igrab(root->cache_inode); 2857 spin_unlock(&root->cache_lock); 2858 if (inode) 2859 return inode; 2860 2861 inode = __lookup_free_space_inode(root, path, 0); 2862 if (IS_ERR(inode)) 2863 return inode; 2864 2865 spin_lock(&root->cache_lock); 2866 if (!btrfs_fs_closing(root->fs_info)) 2867 root->cache_inode = igrab(inode); 2868 spin_unlock(&root->cache_lock); 2869 2870 return inode; 2871 } 2872 2873 int create_free_ino_inode(struct btrfs_root *root, 2874 struct btrfs_trans_handle *trans, 2875 struct btrfs_path *path) 2876 { 2877 return __create_free_space_inode(root, trans, path, 2878 BTRFS_FREE_INO_OBJECTID, 0); 2879 } 2880 2881 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2882 { 2883 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2884 struct btrfs_path *path; 2885 struct inode *inode; 2886 int ret = 0; 2887 u64 root_gen = btrfs_root_generation(&root->root_item); 2888 2889 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2890 return 0; 2891 2892 /* 2893 * If we're unmounting then just return, since this does a search on the 2894 * normal root and not the commit root and we could deadlock. 2895 */ 2896 if (btrfs_fs_closing(fs_info)) 2897 return 0; 2898 2899 path = btrfs_alloc_path(); 2900 if (!path) 2901 return 0; 2902 2903 inode = lookup_free_ino_inode(root, path); 2904 if (IS_ERR(inode)) 2905 goto out; 2906 2907 if (root_gen != BTRFS_I(inode)->generation) 2908 goto out_put; 2909 2910 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2911 2912 if (ret < 0) 2913 printk(KERN_ERR "btrfs: failed to load free ino cache for " 2914 "root %llu\n", root->root_key.objectid); 2915 out_put: 2916 iput(inode); 2917 out: 2918 btrfs_free_path(path); 2919 return ret; 2920 } 2921 2922 int btrfs_write_out_ino_cache(struct btrfs_root *root, 2923 struct btrfs_trans_handle *trans, 2924 struct btrfs_path *path) 2925 { 2926 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2927 struct inode *inode; 2928 int ret; 2929 2930 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2931 return 0; 2932 2933 inode = lookup_free_ino_inode(root, path); 2934 if (IS_ERR(inode)) 2935 return 0; 2936 2937 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 2938 if (ret) { 2939 btrfs_delalloc_release_metadata(inode, inode->i_size); 2940 #ifdef DEBUG 2941 printk(KERN_ERR "btrfs: failed to write free ino cache " 2942 "for root %llu\n", root->root_key.objectid); 2943 #endif 2944 } 2945 2946 iput(inode); 2947 return ret; 2948 } 2949