1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/slab.h> 22 #include <linux/math64.h> 23 #include <linux/ratelimit.h> 24 #include "ctree.h" 25 #include "free-space-cache.h" 26 #include "transaction.h" 27 #include "disk-io.h" 28 #include "extent_io.h" 29 #include "inode-map.h" 30 31 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 33 34 static int link_free_space(struct btrfs_free_space_ctl *ctl, 35 struct btrfs_free_space *info); 36 37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 38 struct btrfs_path *path, 39 u64 offset) 40 { 41 struct btrfs_key key; 42 struct btrfs_key location; 43 struct btrfs_disk_key disk_key; 44 struct btrfs_free_space_header *header; 45 struct extent_buffer *leaf; 46 struct inode *inode = NULL; 47 int ret; 48 49 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 50 key.offset = offset; 51 key.type = 0; 52 53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 54 if (ret < 0) 55 return ERR_PTR(ret); 56 if (ret > 0) { 57 btrfs_release_path(path); 58 return ERR_PTR(-ENOENT); 59 } 60 61 leaf = path->nodes[0]; 62 header = btrfs_item_ptr(leaf, path->slots[0], 63 struct btrfs_free_space_header); 64 btrfs_free_space_key(leaf, header, &disk_key); 65 btrfs_disk_key_to_cpu(&location, &disk_key); 66 btrfs_release_path(path); 67 68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL); 69 if (!inode) 70 return ERR_PTR(-ENOENT); 71 if (IS_ERR(inode)) 72 return inode; 73 if (is_bad_inode(inode)) { 74 iput(inode); 75 return ERR_PTR(-ENOENT); 76 } 77 78 inode->i_mapping->flags &= ~__GFP_FS; 79 80 return inode; 81 } 82 83 struct inode *lookup_free_space_inode(struct btrfs_root *root, 84 struct btrfs_block_group_cache 85 *block_group, struct btrfs_path *path) 86 { 87 struct inode *inode = NULL; 88 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 89 90 spin_lock(&block_group->lock); 91 if (block_group->inode) 92 inode = igrab(block_group->inode); 93 spin_unlock(&block_group->lock); 94 if (inode) 95 return inode; 96 97 inode = __lookup_free_space_inode(root, path, 98 block_group->key.objectid); 99 if (IS_ERR(inode)) 100 return inode; 101 102 spin_lock(&block_group->lock); 103 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 104 printk(KERN_INFO "Old style space inode found, converting.\n"); 105 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 106 BTRFS_INODE_NODATACOW; 107 block_group->disk_cache_state = BTRFS_DC_CLEAR; 108 } 109 110 if (!block_group->iref) { 111 block_group->inode = igrab(inode); 112 block_group->iref = 1; 113 } 114 spin_unlock(&block_group->lock); 115 116 return inode; 117 } 118 119 int __create_free_space_inode(struct btrfs_root *root, 120 struct btrfs_trans_handle *trans, 121 struct btrfs_path *path, u64 ino, u64 offset) 122 { 123 struct btrfs_key key; 124 struct btrfs_disk_key disk_key; 125 struct btrfs_free_space_header *header; 126 struct btrfs_inode_item *inode_item; 127 struct extent_buffer *leaf; 128 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 129 int ret; 130 131 ret = btrfs_insert_empty_inode(trans, root, path, ino); 132 if (ret) 133 return ret; 134 135 /* We inline crc's for the free disk space cache */ 136 if (ino != BTRFS_FREE_INO_OBJECTID) 137 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 138 139 leaf = path->nodes[0]; 140 inode_item = btrfs_item_ptr(leaf, path->slots[0], 141 struct btrfs_inode_item); 142 btrfs_item_key(leaf, &disk_key, path->slots[0]); 143 memset_extent_buffer(leaf, 0, (unsigned long)inode_item, 144 sizeof(*inode_item)); 145 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 146 btrfs_set_inode_size(leaf, inode_item, 0); 147 btrfs_set_inode_nbytes(leaf, inode_item, 0); 148 btrfs_set_inode_uid(leaf, inode_item, 0); 149 btrfs_set_inode_gid(leaf, inode_item, 0); 150 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 151 btrfs_set_inode_flags(leaf, inode_item, flags); 152 btrfs_set_inode_nlink(leaf, inode_item, 1); 153 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 154 btrfs_set_inode_block_group(leaf, inode_item, offset); 155 btrfs_mark_buffer_dirty(leaf); 156 btrfs_release_path(path); 157 158 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 159 key.offset = offset; 160 key.type = 0; 161 162 ret = btrfs_insert_empty_item(trans, root, path, &key, 163 sizeof(struct btrfs_free_space_header)); 164 if (ret < 0) { 165 btrfs_release_path(path); 166 return ret; 167 } 168 leaf = path->nodes[0]; 169 header = btrfs_item_ptr(leaf, path->slots[0], 170 struct btrfs_free_space_header); 171 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header)); 172 btrfs_set_free_space_key(leaf, header, &disk_key); 173 btrfs_mark_buffer_dirty(leaf); 174 btrfs_release_path(path); 175 176 return 0; 177 } 178 179 int create_free_space_inode(struct btrfs_root *root, 180 struct btrfs_trans_handle *trans, 181 struct btrfs_block_group_cache *block_group, 182 struct btrfs_path *path) 183 { 184 int ret; 185 u64 ino; 186 187 ret = btrfs_find_free_objectid(root, &ino); 188 if (ret < 0) 189 return ret; 190 191 return __create_free_space_inode(root, trans, path, ino, 192 block_group->key.objectid); 193 } 194 195 int btrfs_truncate_free_space_cache(struct btrfs_root *root, 196 struct btrfs_trans_handle *trans, 197 struct btrfs_path *path, 198 struct inode *inode) 199 { 200 struct btrfs_block_rsv *rsv; 201 u64 needed_bytes; 202 loff_t oldsize; 203 int ret = 0; 204 205 rsv = trans->block_rsv; 206 trans->block_rsv = &root->fs_info->global_block_rsv; 207 208 /* 1 for slack space, 1 for updating the inode */ 209 needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) + 210 btrfs_calc_trans_metadata_size(root, 1); 211 212 spin_lock(&trans->block_rsv->lock); 213 if (trans->block_rsv->reserved < needed_bytes) { 214 spin_unlock(&trans->block_rsv->lock); 215 trans->block_rsv = rsv; 216 return -ENOSPC; 217 } 218 spin_unlock(&trans->block_rsv->lock); 219 220 oldsize = i_size_read(inode); 221 btrfs_i_size_write(inode, 0); 222 truncate_pagecache(inode, oldsize, 0); 223 224 /* 225 * We don't need an orphan item because truncating the free space cache 226 * will never be split across transactions. 227 */ 228 ret = btrfs_truncate_inode_items(trans, root, inode, 229 0, BTRFS_EXTENT_DATA_KEY); 230 231 if (ret) { 232 trans->block_rsv = rsv; 233 WARN_ON(1); 234 return ret; 235 } 236 237 ret = btrfs_update_inode(trans, root, inode); 238 trans->block_rsv = rsv; 239 240 return ret; 241 } 242 243 static int readahead_cache(struct inode *inode) 244 { 245 struct file_ra_state *ra; 246 unsigned long last_index; 247 248 ra = kzalloc(sizeof(*ra), GFP_NOFS); 249 if (!ra) 250 return -ENOMEM; 251 252 file_ra_state_init(ra, inode->i_mapping); 253 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 254 255 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 256 257 kfree(ra); 258 259 return 0; 260 } 261 262 struct io_ctl { 263 void *cur, *orig; 264 struct page *page; 265 struct page **pages; 266 struct btrfs_root *root; 267 unsigned long size; 268 int index; 269 int num_pages; 270 unsigned check_crcs:1; 271 }; 272 273 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, 274 struct btrfs_root *root) 275 { 276 memset(io_ctl, 0, sizeof(struct io_ctl)); 277 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 278 PAGE_CACHE_SHIFT; 279 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, 280 GFP_NOFS); 281 if (!io_ctl->pages) 282 return -ENOMEM; 283 io_ctl->root = root; 284 if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) 285 io_ctl->check_crcs = 1; 286 return 0; 287 } 288 289 static void io_ctl_free(struct io_ctl *io_ctl) 290 { 291 kfree(io_ctl->pages); 292 } 293 294 static void io_ctl_unmap_page(struct io_ctl *io_ctl) 295 { 296 if (io_ctl->cur) { 297 kunmap(io_ctl->page); 298 io_ctl->cur = NULL; 299 io_ctl->orig = NULL; 300 } 301 } 302 303 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) 304 { 305 WARN_ON(io_ctl->cur); 306 BUG_ON(io_ctl->index >= io_ctl->num_pages); 307 io_ctl->page = io_ctl->pages[io_ctl->index++]; 308 io_ctl->cur = kmap(io_ctl->page); 309 io_ctl->orig = io_ctl->cur; 310 io_ctl->size = PAGE_CACHE_SIZE; 311 if (clear) 312 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE); 313 } 314 315 static void io_ctl_drop_pages(struct io_ctl *io_ctl) 316 { 317 int i; 318 319 io_ctl_unmap_page(io_ctl); 320 321 for (i = 0; i < io_ctl->num_pages; i++) { 322 if (io_ctl->pages[i]) { 323 ClearPageChecked(io_ctl->pages[i]); 324 unlock_page(io_ctl->pages[i]); 325 page_cache_release(io_ctl->pages[i]); 326 } 327 } 328 } 329 330 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, 331 int uptodate) 332 { 333 struct page *page; 334 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 335 int i; 336 337 for (i = 0; i < io_ctl->num_pages; i++) { 338 page = find_or_create_page(inode->i_mapping, i, mask); 339 if (!page) { 340 io_ctl_drop_pages(io_ctl); 341 return -ENOMEM; 342 } 343 io_ctl->pages[i] = page; 344 if (uptodate && !PageUptodate(page)) { 345 btrfs_readpage(NULL, page); 346 lock_page(page); 347 if (!PageUptodate(page)) { 348 printk(KERN_ERR "btrfs: error reading free " 349 "space cache\n"); 350 io_ctl_drop_pages(io_ctl); 351 return -EIO; 352 } 353 } 354 } 355 356 for (i = 0; i < io_ctl->num_pages; i++) { 357 clear_page_dirty_for_io(io_ctl->pages[i]); 358 set_page_extent_mapped(io_ctl->pages[i]); 359 } 360 361 return 0; 362 } 363 364 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) 365 { 366 u64 *val; 367 368 io_ctl_map_page(io_ctl, 1); 369 370 /* 371 * Skip the csum areas. If we don't check crcs then we just have a 372 * 64bit chunk at the front of the first page. 373 */ 374 if (io_ctl->check_crcs) { 375 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 376 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 377 } else { 378 io_ctl->cur += sizeof(u64); 379 io_ctl->size -= sizeof(u64) * 2; 380 } 381 382 val = io_ctl->cur; 383 *val = cpu_to_le64(generation); 384 io_ctl->cur += sizeof(u64); 385 } 386 387 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) 388 { 389 u64 *gen; 390 391 /* 392 * Skip the crc area. If we don't check crcs then we just have a 64bit 393 * chunk at the front of the first page. 394 */ 395 if (io_ctl->check_crcs) { 396 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 397 io_ctl->size -= sizeof(u64) + 398 (sizeof(u32) * io_ctl->num_pages); 399 } else { 400 io_ctl->cur += sizeof(u64); 401 io_ctl->size -= sizeof(u64) * 2; 402 } 403 404 gen = io_ctl->cur; 405 if (le64_to_cpu(*gen) != generation) { 406 printk_ratelimited(KERN_ERR "btrfs: space cache generation " 407 "(%Lu) does not match inode (%Lu)\n", *gen, 408 generation); 409 io_ctl_unmap_page(io_ctl); 410 return -EIO; 411 } 412 io_ctl->cur += sizeof(u64); 413 return 0; 414 } 415 416 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index) 417 { 418 u32 *tmp; 419 u32 crc = ~(u32)0; 420 unsigned offset = 0; 421 422 if (!io_ctl->check_crcs) { 423 io_ctl_unmap_page(io_ctl); 424 return; 425 } 426 427 if (index == 0) 428 offset = sizeof(u32) * io_ctl->num_pages; 429 430 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 431 PAGE_CACHE_SIZE - offset); 432 btrfs_csum_final(crc, (char *)&crc); 433 io_ctl_unmap_page(io_ctl); 434 tmp = kmap(io_ctl->pages[0]); 435 tmp += index; 436 *tmp = crc; 437 kunmap(io_ctl->pages[0]); 438 } 439 440 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index) 441 { 442 u32 *tmp, val; 443 u32 crc = ~(u32)0; 444 unsigned offset = 0; 445 446 if (!io_ctl->check_crcs) { 447 io_ctl_map_page(io_ctl, 0); 448 return 0; 449 } 450 451 if (index == 0) 452 offset = sizeof(u32) * io_ctl->num_pages; 453 454 tmp = kmap(io_ctl->pages[0]); 455 tmp += index; 456 val = *tmp; 457 kunmap(io_ctl->pages[0]); 458 459 io_ctl_map_page(io_ctl, 0); 460 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 461 PAGE_CACHE_SIZE - offset); 462 btrfs_csum_final(crc, (char *)&crc); 463 if (val != crc) { 464 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free " 465 "space cache\n"); 466 io_ctl_unmap_page(io_ctl); 467 return -EIO; 468 } 469 470 return 0; 471 } 472 473 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes, 474 void *bitmap) 475 { 476 struct btrfs_free_space_entry *entry; 477 478 if (!io_ctl->cur) 479 return -ENOSPC; 480 481 entry = io_ctl->cur; 482 entry->offset = cpu_to_le64(offset); 483 entry->bytes = cpu_to_le64(bytes); 484 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 485 BTRFS_FREE_SPACE_EXTENT; 486 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 487 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 488 489 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 490 return 0; 491 492 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 493 494 /* No more pages to map */ 495 if (io_ctl->index >= io_ctl->num_pages) 496 return 0; 497 498 /* map the next page */ 499 io_ctl_map_page(io_ctl, 1); 500 return 0; 501 } 502 503 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap) 504 { 505 if (!io_ctl->cur) 506 return -ENOSPC; 507 508 /* 509 * If we aren't at the start of the current page, unmap this one and 510 * map the next one if there is any left. 511 */ 512 if (io_ctl->cur != io_ctl->orig) { 513 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 514 if (io_ctl->index >= io_ctl->num_pages) 515 return -ENOSPC; 516 io_ctl_map_page(io_ctl, 0); 517 } 518 519 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE); 520 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 521 if (io_ctl->index < io_ctl->num_pages) 522 io_ctl_map_page(io_ctl, 0); 523 return 0; 524 } 525 526 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl) 527 { 528 /* 529 * If we're not on the boundary we know we've modified the page and we 530 * need to crc the page. 531 */ 532 if (io_ctl->cur != io_ctl->orig) 533 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 534 else 535 io_ctl_unmap_page(io_ctl); 536 537 while (io_ctl->index < io_ctl->num_pages) { 538 io_ctl_map_page(io_ctl, 1); 539 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 540 } 541 } 542 543 static int io_ctl_read_entry(struct io_ctl *io_ctl, 544 struct btrfs_free_space *entry, u8 *type) 545 { 546 struct btrfs_free_space_entry *e; 547 int ret; 548 549 if (!io_ctl->cur) { 550 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 551 if (ret) 552 return ret; 553 } 554 555 e = io_ctl->cur; 556 entry->offset = le64_to_cpu(e->offset); 557 entry->bytes = le64_to_cpu(e->bytes); 558 *type = e->type; 559 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 560 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 561 562 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 563 return 0; 564 565 io_ctl_unmap_page(io_ctl); 566 567 return 0; 568 } 569 570 static int io_ctl_read_bitmap(struct io_ctl *io_ctl, 571 struct btrfs_free_space *entry) 572 { 573 int ret; 574 575 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 576 if (ret) 577 return ret; 578 579 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE); 580 io_ctl_unmap_page(io_ctl); 581 582 return 0; 583 } 584 585 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 586 struct btrfs_free_space_ctl *ctl, 587 struct btrfs_path *path, u64 offset) 588 { 589 struct btrfs_free_space_header *header; 590 struct extent_buffer *leaf; 591 struct io_ctl io_ctl; 592 struct btrfs_key key; 593 struct btrfs_free_space *e, *n; 594 struct list_head bitmaps; 595 u64 num_entries; 596 u64 num_bitmaps; 597 u64 generation; 598 u8 type; 599 int ret = 0; 600 601 INIT_LIST_HEAD(&bitmaps); 602 603 /* Nothing in the space cache, goodbye */ 604 if (!i_size_read(inode)) 605 return 0; 606 607 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 608 key.offset = offset; 609 key.type = 0; 610 611 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 612 if (ret < 0) 613 return 0; 614 else if (ret > 0) { 615 btrfs_release_path(path); 616 return 0; 617 } 618 619 ret = -1; 620 621 leaf = path->nodes[0]; 622 header = btrfs_item_ptr(leaf, path->slots[0], 623 struct btrfs_free_space_header); 624 num_entries = btrfs_free_space_entries(leaf, header); 625 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 626 generation = btrfs_free_space_generation(leaf, header); 627 btrfs_release_path(path); 628 629 if (BTRFS_I(inode)->generation != generation) { 630 printk(KERN_ERR "btrfs: free space inode generation (%llu) did" 631 " not match free space cache generation (%llu)\n", 632 (unsigned long long)BTRFS_I(inode)->generation, 633 (unsigned long long)generation); 634 return 0; 635 } 636 637 if (!num_entries) 638 return 0; 639 640 ret = io_ctl_init(&io_ctl, inode, root); 641 if (ret) 642 return ret; 643 644 ret = readahead_cache(inode); 645 if (ret) 646 goto out; 647 648 ret = io_ctl_prepare_pages(&io_ctl, inode, 1); 649 if (ret) 650 goto out; 651 652 ret = io_ctl_check_crc(&io_ctl, 0); 653 if (ret) 654 goto free_cache; 655 656 ret = io_ctl_check_generation(&io_ctl, generation); 657 if (ret) 658 goto free_cache; 659 660 while (num_entries) { 661 e = kmem_cache_zalloc(btrfs_free_space_cachep, 662 GFP_NOFS); 663 if (!e) 664 goto free_cache; 665 666 ret = io_ctl_read_entry(&io_ctl, e, &type); 667 if (ret) { 668 kmem_cache_free(btrfs_free_space_cachep, e); 669 goto free_cache; 670 } 671 672 if (!e->bytes) { 673 kmem_cache_free(btrfs_free_space_cachep, e); 674 goto free_cache; 675 } 676 677 if (type == BTRFS_FREE_SPACE_EXTENT) { 678 spin_lock(&ctl->tree_lock); 679 ret = link_free_space(ctl, e); 680 spin_unlock(&ctl->tree_lock); 681 if (ret) { 682 printk(KERN_ERR "Duplicate entries in " 683 "free space cache, dumping\n"); 684 kmem_cache_free(btrfs_free_space_cachep, e); 685 goto free_cache; 686 } 687 } else { 688 BUG_ON(!num_bitmaps); 689 num_bitmaps--; 690 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 691 if (!e->bitmap) { 692 kmem_cache_free( 693 btrfs_free_space_cachep, e); 694 goto free_cache; 695 } 696 spin_lock(&ctl->tree_lock); 697 ret = link_free_space(ctl, e); 698 ctl->total_bitmaps++; 699 ctl->op->recalc_thresholds(ctl); 700 spin_unlock(&ctl->tree_lock); 701 if (ret) { 702 printk(KERN_ERR "Duplicate entries in " 703 "free space cache, dumping\n"); 704 kmem_cache_free(btrfs_free_space_cachep, e); 705 goto free_cache; 706 } 707 list_add_tail(&e->list, &bitmaps); 708 } 709 710 num_entries--; 711 } 712 713 io_ctl_unmap_page(&io_ctl); 714 715 /* 716 * We add the bitmaps at the end of the entries in order that 717 * the bitmap entries are added to the cache. 718 */ 719 list_for_each_entry_safe(e, n, &bitmaps, list) { 720 list_del_init(&e->list); 721 ret = io_ctl_read_bitmap(&io_ctl, e); 722 if (ret) 723 goto free_cache; 724 } 725 726 io_ctl_drop_pages(&io_ctl); 727 ret = 1; 728 out: 729 io_ctl_free(&io_ctl); 730 return ret; 731 free_cache: 732 io_ctl_drop_pages(&io_ctl); 733 __btrfs_remove_free_space_cache(ctl); 734 goto out; 735 } 736 737 int load_free_space_cache(struct btrfs_fs_info *fs_info, 738 struct btrfs_block_group_cache *block_group) 739 { 740 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 741 struct btrfs_root *root = fs_info->tree_root; 742 struct inode *inode; 743 struct btrfs_path *path; 744 int ret = 0; 745 bool matched; 746 u64 used = btrfs_block_group_used(&block_group->item); 747 748 /* 749 * If we're unmounting then just return, since this does a search on the 750 * normal root and not the commit root and we could deadlock. 751 */ 752 if (btrfs_fs_closing(fs_info)) 753 return 0; 754 755 /* 756 * If this block group has been marked to be cleared for one reason or 757 * another then we can't trust the on disk cache, so just return. 758 */ 759 spin_lock(&block_group->lock); 760 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 761 spin_unlock(&block_group->lock); 762 return 0; 763 } 764 spin_unlock(&block_group->lock); 765 766 path = btrfs_alloc_path(); 767 if (!path) 768 return 0; 769 770 inode = lookup_free_space_inode(root, block_group, path); 771 if (IS_ERR(inode)) { 772 btrfs_free_path(path); 773 return 0; 774 } 775 776 /* We may have converted the inode and made the cache invalid. */ 777 spin_lock(&block_group->lock); 778 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 779 spin_unlock(&block_group->lock); 780 btrfs_free_path(path); 781 goto out; 782 } 783 spin_unlock(&block_group->lock); 784 785 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 786 path, block_group->key.objectid); 787 btrfs_free_path(path); 788 if (ret <= 0) 789 goto out; 790 791 spin_lock(&ctl->tree_lock); 792 matched = (ctl->free_space == (block_group->key.offset - used - 793 block_group->bytes_super)); 794 spin_unlock(&ctl->tree_lock); 795 796 if (!matched) { 797 __btrfs_remove_free_space_cache(ctl); 798 printk(KERN_ERR "block group %llu has an wrong amount of free " 799 "space\n", block_group->key.objectid); 800 ret = -1; 801 } 802 out: 803 if (ret < 0) { 804 /* This cache is bogus, make sure it gets cleared */ 805 spin_lock(&block_group->lock); 806 block_group->disk_cache_state = BTRFS_DC_CLEAR; 807 spin_unlock(&block_group->lock); 808 ret = 0; 809 810 printk(KERN_ERR "btrfs: failed to load free space cache " 811 "for block group %llu\n", block_group->key.objectid); 812 } 813 814 iput(inode); 815 return ret; 816 } 817 818 /** 819 * __btrfs_write_out_cache - write out cached info to an inode 820 * @root - the root the inode belongs to 821 * @ctl - the free space cache we are going to write out 822 * @block_group - the block_group for this cache if it belongs to a block_group 823 * @trans - the trans handle 824 * @path - the path to use 825 * @offset - the offset for the key we'll insert 826 * 827 * This function writes out a free space cache struct to disk for quick recovery 828 * on mount. This will return 0 if it was successfull in writing the cache out, 829 * and -1 if it was not. 830 */ 831 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 832 struct btrfs_free_space_ctl *ctl, 833 struct btrfs_block_group_cache *block_group, 834 struct btrfs_trans_handle *trans, 835 struct btrfs_path *path, u64 offset) 836 { 837 struct btrfs_free_space_header *header; 838 struct extent_buffer *leaf; 839 struct rb_node *node; 840 struct list_head *pos, *n; 841 struct extent_state *cached_state = NULL; 842 struct btrfs_free_cluster *cluster = NULL; 843 struct extent_io_tree *unpin = NULL; 844 struct io_ctl io_ctl; 845 struct list_head bitmap_list; 846 struct btrfs_key key; 847 u64 start, extent_start, extent_end, len; 848 int entries = 0; 849 int bitmaps = 0; 850 int ret; 851 int err = -1; 852 853 INIT_LIST_HEAD(&bitmap_list); 854 855 if (!i_size_read(inode)) 856 return -1; 857 858 ret = io_ctl_init(&io_ctl, inode, root); 859 if (ret) 860 return -1; 861 862 /* Get the cluster for this block_group if it exists */ 863 if (block_group && !list_empty(&block_group->cluster_list)) 864 cluster = list_entry(block_group->cluster_list.next, 865 struct btrfs_free_cluster, 866 block_group_list); 867 868 /* Lock all pages first so we can lock the extent safely. */ 869 io_ctl_prepare_pages(&io_ctl, inode, 0); 870 871 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 872 0, &cached_state, GFP_NOFS); 873 874 node = rb_first(&ctl->free_space_offset); 875 if (!node && cluster) { 876 node = rb_first(&cluster->root); 877 cluster = NULL; 878 } 879 880 /* Make sure we can fit our crcs into the first page */ 881 if (io_ctl.check_crcs && 882 (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) { 883 WARN_ON(1); 884 goto out_nospc; 885 } 886 887 io_ctl_set_generation(&io_ctl, trans->transid); 888 889 /* Write out the extent entries */ 890 while (node) { 891 struct btrfs_free_space *e; 892 893 e = rb_entry(node, struct btrfs_free_space, offset_index); 894 entries++; 895 896 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes, 897 e->bitmap); 898 if (ret) 899 goto out_nospc; 900 901 if (e->bitmap) { 902 list_add_tail(&e->list, &bitmap_list); 903 bitmaps++; 904 } 905 node = rb_next(node); 906 if (!node && cluster) { 907 node = rb_first(&cluster->root); 908 cluster = NULL; 909 } 910 } 911 912 /* 913 * We want to add any pinned extents to our free space cache 914 * so we don't leak the space 915 */ 916 917 /* 918 * We shouldn't have switched the pinned extents yet so this is the 919 * right one 920 */ 921 unpin = root->fs_info->pinned_extents; 922 923 if (block_group) 924 start = block_group->key.objectid; 925 926 while (block_group && (start < block_group->key.objectid + 927 block_group->key.offset)) { 928 ret = find_first_extent_bit(unpin, start, 929 &extent_start, &extent_end, 930 EXTENT_DIRTY); 931 if (ret) { 932 ret = 0; 933 break; 934 } 935 936 /* This pinned extent is out of our range */ 937 if (extent_start >= block_group->key.objectid + 938 block_group->key.offset) 939 break; 940 941 extent_start = max(extent_start, start); 942 extent_end = min(block_group->key.objectid + 943 block_group->key.offset, extent_end + 1); 944 len = extent_end - extent_start; 945 946 entries++; 947 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL); 948 if (ret) 949 goto out_nospc; 950 951 start = extent_end; 952 } 953 954 /* Write out the bitmaps */ 955 list_for_each_safe(pos, n, &bitmap_list) { 956 struct btrfs_free_space *entry = 957 list_entry(pos, struct btrfs_free_space, list); 958 959 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap); 960 if (ret) 961 goto out_nospc; 962 list_del_init(&entry->list); 963 } 964 965 /* Zero out the rest of the pages just to make sure */ 966 io_ctl_zero_remaining_pages(&io_ctl); 967 968 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, 969 0, i_size_read(inode), &cached_state); 970 io_ctl_drop_pages(&io_ctl); 971 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 972 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 973 974 if (ret) 975 goto out; 976 977 978 ret = filemap_write_and_wait(inode->i_mapping); 979 if (ret) 980 goto out; 981 982 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 983 key.offset = offset; 984 key.type = 0; 985 986 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 987 if (ret < 0) { 988 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 989 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, 990 GFP_NOFS); 991 goto out; 992 } 993 leaf = path->nodes[0]; 994 if (ret > 0) { 995 struct btrfs_key found_key; 996 BUG_ON(!path->slots[0]); 997 path->slots[0]--; 998 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 999 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1000 found_key.offset != offset) { 1001 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1002 inode->i_size - 1, 1003 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, 1004 NULL, GFP_NOFS); 1005 btrfs_release_path(path); 1006 goto out; 1007 } 1008 } 1009 1010 BTRFS_I(inode)->generation = trans->transid; 1011 header = btrfs_item_ptr(leaf, path->slots[0], 1012 struct btrfs_free_space_header); 1013 btrfs_set_free_space_entries(leaf, header, entries); 1014 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1015 btrfs_set_free_space_generation(leaf, header, trans->transid); 1016 btrfs_mark_buffer_dirty(leaf); 1017 btrfs_release_path(path); 1018 1019 err = 0; 1020 out: 1021 io_ctl_free(&io_ctl); 1022 if (err) { 1023 invalidate_inode_pages2(inode->i_mapping); 1024 BTRFS_I(inode)->generation = 0; 1025 } 1026 btrfs_update_inode(trans, root, inode); 1027 return err; 1028 1029 out_nospc: 1030 list_for_each_safe(pos, n, &bitmap_list) { 1031 struct btrfs_free_space *entry = 1032 list_entry(pos, struct btrfs_free_space, list); 1033 list_del_init(&entry->list); 1034 } 1035 io_ctl_drop_pages(&io_ctl); 1036 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1037 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 1038 goto out; 1039 } 1040 1041 int btrfs_write_out_cache(struct btrfs_root *root, 1042 struct btrfs_trans_handle *trans, 1043 struct btrfs_block_group_cache *block_group, 1044 struct btrfs_path *path) 1045 { 1046 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1047 struct inode *inode; 1048 int ret = 0; 1049 1050 root = root->fs_info->tree_root; 1051 1052 spin_lock(&block_group->lock); 1053 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1054 spin_unlock(&block_group->lock); 1055 return 0; 1056 } 1057 spin_unlock(&block_group->lock); 1058 1059 inode = lookup_free_space_inode(root, block_group, path); 1060 if (IS_ERR(inode)) 1061 return 0; 1062 1063 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans, 1064 path, block_group->key.objectid); 1065 if (ret) { 1066 spin_lock(&block_group->lock); 1067 block_group->disk_cache_state = BTRFS_DC_ERROR; 1068 spin_unlock(&block_group->lock); 1069 ret = 0; 1070 #ifdef DEBUG 1071 printk(KERN_ERR "btrfs: failed to write free space cace " 1072 "for block group %llu\n", block_group->key.objectid); 1073 #endif 1074 } 1075 1076 iput(inode); 1077 return ret; 1078 } 1079 1080 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1081 u64 offset) 1082 { 1083 BUG_ON(offset < bitmap_start); 1084 offset -= bitmap_start; 1085 return (unsigned long)(div_u64(offset, unit)); 1086 } 1087 1088 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1089 { 1090 return (unsigned long)(div_u64(bytes, unit)); 1091 } 1092 1093 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1094 u64 offset) 1095 { 1096 u64 bitmap_start; 1097 u64 bytes_per_bitmap; 1098 1099 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1100 bitmap_start = offset - ctl->start; 1101 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1102 bitmap_start *= bytes_per_bitmap; 1103 bitmap_start += ctl->start; 1104 1105 return bitmap_start; 1106 } 1107 1108 static int tree_insert_offset(struct rb_root *root, u64 offset, 1109 struct rb_node *node, int bitmap) 1110 { 1111 struct rb_node **p = &root->rb_node; 1112 struct rb_node *parent = NULL; 1113 struct btrfs_free_space *info; 1114 1115 while (*p) { 1116 parent = *p; 1117 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1118 1119 if (offset < info->offset) { 1120 p = &(*p)->rb_left; 1121 } else if (offset > info->offset) { 1122 p = &(*p)->rb_right; 1123 } else { 1124 /* 1125 * we could have a bitmap entry and an extent entry 1126 * share the same offset. If this is the case, we want 1127 * the extent entry to always be found first if we do a 1128 * linear search through the tree, since we want to have 1129 * the quickest allocation time, and allocating from an 1130 * extent is faster than allocating from a bitmap. So 1131 * if we're inserting a bitmap and we find an entry at 1132 * this offset, we want to go right, or after this entry 1133 * logically. If we are inserting an extent and we've 1134 * found a bitmap, we want to go left, or before 1135 * logically. 1136 */ 1137 if (bitmap) { 1138 if (info->bitmap) { 1139 WARN_ON_ONCE(1); 1140 return -EEXIST; 1141 } 1142 p = &(*p)->rb_right; 1143 } else { 1144 if (!info->bitmap) { 1145 WARN_ON_ONCE(1); 1146 return -EEXIST; 1147 } 1148 p = &(*p)->rb_left; 1149 } 1150 } 1151 } 1152 1153 rb_link_node(node, parent, p); 1154 rb_insert_color(node, root); 1155 1156 return 0; 1157 } 1158 1159 /* 1160 * searches the tree for the given offset. 1161 * 1162 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1163 * want a section that has at least bytes size and comes at or after the given 1164 * offset. 1165 */ 1166 static struct btrfs_free_space * 1167 tree_search_offset(struct btrfs_free_space_ctl *ctl, 1168 u64 offset, int bitmap_only, int fuzzy) 1169 { 1170 struct rb_node *n = ctl->free_space_offset.rb_node; 1171 struct btrfs_free_space *entry, *prev = NULL; 1172 1173 /* find entry that is closest to the 'offset' */ 1174 while (1) { 1175 if (!n) { 1176 entry = NULL; 1177 break; 1178 } 1179 1180 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1181 prev = entry; 1182 1183 if (offset < entry->offset) 1184 n = n->rb_left; 1185 else if (offset > entry->offset) 1186 n = n->rb_right; 1187 else 1188 break; 1189 } 1190 1191 if (bitmap_only) { 1192 if (!entry) 1193 return NULL; 1194 if (entry->bitmap) 1195 return entry; 1196 1197 /* 1198 * bitmap entry and extent entry may share same offset, 1199 * in that case, bitmap entry comes after extent entry. 1200 */ 1201 n = rb_next(n); 1202 if (!n) 1203 return NULL; 1204 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1205 if (entry->offset != offset) 1206 return NULL; 1207 1208 WARN_ON(!entry->bitmap); 1209 return entry; 1210 } else if (entry) { 1211 if (entry->bitmap) { 1212 /* 1213 * if previous extent entry covers the offset, 1214 * we should return it instead of the bitmap entry 1215 */ 1216 n = &entry->offset_index; 1217 while (1) { 1218 n = rb_prev(n); 1219 if (!n) 1220 break; 1221 prev = rb_entry(n, struct btrfs_free_space, 1222 offset_index); 1223 if (!prev->bitmap) { 1224 if (prev->offset + prev->bytes > offset) 1225 entry = prev; 1226 break; 1227 } 1228 } 1229 } 1230 return entry; 1231 } 1232 1233 if (!prev) 1234 return NULL; 1235 1236 /* find last entry before the 'offset' */ 1237 entry = prev; 1238 if (entry->offset > offset) { 1239 n = rb_prev(&entry->offset_index); 1240 if (n) { 1241 entry = rb_entry(n, struct btrfs_free_space, 1242 offset_index); 1243 BUG_ON(entry->offset > offset); 1244 } else { 1245 if (fuzzy) 1246 return entry; 1247 else 1248 return NULL; 1249 } 1250 } 1251 1252 if (entry->bitmap) { 1253 n = &entry->offset_index; 1254 while (1) { 1255 n = rb_prev(n); 1256 if (!n) 1257 break; 1258 prev = rb_entry(n, struct btrfs_free_space, 1259 offset_index); 1260 if (!prev->bitmap) { 1261 if (prev->offset + prev->bytes > offset) 1262 return prev; 1263 break; 1264 } 1265 } 1266 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1267 return entry; 1268 } else if (entry->offset + entry->bytes > offset) 1269 return entry; 1270 1271 if (!fuzzy) 1272 return NULL; 1273 1274 while (1) { 1275 if (entry->bitmap) { 1276 if (entry->offset + BITS_PER_BITMAP * 1277 ctl->unit > offset) 1278 break; 1279 } else { 1280 if (entry->offset + entry->bytes > offset) 1281 break; 1282 } 1283 1284 n = rb_next(&entry->offset_index); 1285 if (!n) 1286 return NULL; 1287 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1288 } 1289 return entry; 1290 } 1291 1292 static inline void 1293 __unlink_free_space(struct btrfs_free_space_ctl *ctl, 1294 struct btrfs_free_space *info) 1295 { 1296 rb_erase(&info->offset_index, &ctl->free_space_offset); 1297 ctl->free_extents--; 1298 } 1299 1300 static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1301 struct btrfs_free_space *info) 1302 { 1303 __unlink_free_space(ctl, info); 1304 ctl->free_space -= info->bytes; 1305 } 1306 1307 static int link_free_space(struct btrfs_free_space_ctl *ctl, 1308 struct btrfs_free_space *info) 1309 { 1310 int ret = 0; 1311 1312 BUG_ON(!info->bitmap && !info->bytes); 1313 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1314 &info->offset_index, (info->bitmap != NULL)); 1315 if (ret) 1316 return ret; 1317 1318 ctl->free_space += info->bytes; 1319 ctl->free_extents++; 1320 return ret; 1321 } 1322 1323 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1324 { 1325 struct btrfs_block_group_cache *block_group = ctl->private; 1326 u64 max_bytes; 1327 u64 bitmap_bytes; 1328 u64 extent_bytes; 1329 u64 size = block_group->key.offset; 1330 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 1331 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1332 1333 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1334 1335 /* 1336 * The goal is to keep the total amount of memory used per 1gb of space 1337 * at or below 32k, so we need to adjust how much memory we allow to be 1338 * used by extent based free space tracking 1339 */ 1340 if (size < 1024 * 1024 * 1024) 1341 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1342 else 1343 max_bytes = MAX_CACHE_BYTES_PER_GIG * 1344 div64_u64(size, 1024 * 1024 * 1024); 1345 1346 /* 1347 * we want to account for 1 more bitmap than what we have so we can make 1348 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as 1349 * we add more bitmaps. 1350 */ 1351 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE; 1352 1353 if (bitmap_bytes >= max_bytes) { 1354 ctl->extents_thresh = 0; 1355 return; 1356 } 1357 1358 /* 1359 * we want the extent entry threshold to always be at most 1/2 the maxw 1360 * bytes we can have, or whatever is less than that. 1361 */ 1362 extent_bytes = max_bytes - bitmap_bytes; 1363 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); 1364 1365 ctl->extents_thresh = 1366 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); 1367 } 1368 1369 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1370 struct btrfs_free_space *info, 1371 u64 offset, u64 bytes) 1372 { 1373 unsigned long start, count; 1374 1375 start = offset_to_bit(info->offset, ctl->unit, offset); 1376 count = bytes_to_bits(bytes, ctl->unit); 1377 BUG_ON(start + count > BITS_PER_BITMAP); 1378 1379 bitmap_clear(info->bitmap, start, count); 1380 1381 info->bytes -= bytes; 1382 } 1383 1384 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1385 struct btrfs_free_space *info, u64 offset, 1386 u64 bytes) 1387 { 1388 __bitmap_clear_bits(ctl, info, offset, bytes); 1389 ctl->free_space -= bytes; 1390 } 1391 1392 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1393 struct btrfs_free_space *info, u64 offset, 1394 u64 bytes) 1395 { 1396 unsigned long start, count; 1397 1398 start = offset_to_bit(info->offset, ctl->unit, offset); 1399 count = bytes_to_bits(bytes, ctl->unit); 1400 BUG_ON(start + count > BITS_PER_BITMAP); 1401 1402 bitmap_set(info->bitmap, start, count); 1403 1404 info->bytes += bytes; 1405 ctl->free_space += bytes; 1406 } 1407 1408 static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1409 struct btrfs_free_space *bitmap_info, u64 *offset, 1410 u64 *bytes) 1411 { 1412 unsigned long found_bits = 0; 1413 unsigned long bits, i; 1414 unsigned long next_zero; 1415 1416 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1417 max_t(u64, *offset, bitmap_info->offset)); 1418 bits = bytes_to_bits(*bytes, ctl->unit); 1419 1420 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 1421 i < BITS_PER_BITMAP; 1422 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 1423 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1424 BITS_PER_BITMAP, i); 1425 if ((next_zero - i) >= bits) { 1426 found_bits = next_zero - i; 1427 break; 1428 } 1429 i = next_zero; 1430 } 1431 1432 if (found_bits) { 1433 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1434 *bytes = (u64)(found_bits) * ctl->unit; 1435 return 0; 1436 } 1437 1438 return -1; 1439 } 1440 1441 static struct btrfs_free_space * 1442 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes) 1443 { 1444 struct btrfs_free_space *entry; 1445 struct rb_node *node; 1446 int ret; 1447 1448 if (!ctl->free_space_offset.rb_node) 1449 return NULL; 1450 1451 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1452 if (!entry) 1453 return NULL; 1454 1455 for (node = &entry->offset_index; node; node = rb_next(node)) { 1456 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1457 if (entry->bytes < *bytes) 1458 continue; 1459 1460 if (entry->bitmap) { 1461 ret = search_bitmap(ctl, entry, offset, bytes); 1462 if (!ret) 1463 return entry; 1464 continue; 1465 } 1466 1467 *offset = entry->offset; 1468 *bytes = entry->bytes; 1469 return entry; 1470 } 1471 1472 return NULL; 1473 } 1474 1475 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1476 struct btrfs_free_space *info, u64 offset) 1477 { 1478 info->offset = offset_to_bitmap(ctl, offset); 1479 info->bytes = 0; 1480 INIT_LIST_HEAD(&info->list); 1481 link_free_space(ctl, info); 1482 ctl->total_bitmaps++; 1483 1484 ctl->op->recalc_thresholds(ctl); 1485 } 1486 1487 static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1488 struct btrfs_free_space *bitmap_info) 1489 { 1490 unlink_free_space(ctl, bitmap_info); 1491 kfree(bitmap_info->bitmap); 1492 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1493 ctl->total_bitmaps--; 1494 ctl->op->recalc_thresholds(ctl); 1495 } 1496 1497 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1498 struct btrfs_free_space *bitmap_info, 1499 u64 *offset, u64 *bytes) 1500 { 1501 u64 end; 1502 u64 search_start, search_bytes; 1503 int ret; 1504 1505 again: 1506 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1507 1508 /* 1509 * XXX - this can go away after a few releases. 1510 * 1511 * since the only user of btrfs_remove_free_space is the tree logging 1512 * stuff, and the only way to test that is under crash conditions, we 1513 * want to have this debug stuff here just in case somethings not 1514 * working. Search the bitmap for the space we are trying to use to 1515 * make sure its actually there. If its not there then we need to stop 1516 * because something has gone wrong. 1517 */ 1518 search_start = *offset; 1519 search_bytes = *bytes; 1520 search_bytes = min(search_bytes, end - search_start + 1); 1521 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); 1522 BUG_ON(ret < 0 || search_start != *offset); 1523 1524 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 1525 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); 1526 *bytes -= end - *offset + 1; 1527 *offset = end + 1; 1528 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 1529 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); 1530 *bytes = 0; 1531 } 1532 1533 if (*bytes) { 1534 struct rb_node *next = rb_next(&bitmap_info->offset_index); 1535 if (!bitmap_info->bytes) 1536 free_bitmap(ctl, bitmap_info); 1537 1538 /* 1539 * no entry after this bitmap, but we still have bytes to 1540 * remove, so something has gone wrong. 1541 */ 1542 if (!next) 1543 return -EINVAL; 1544 1545 bitmap_info = rb_entry(next, struct btrfs_free_space, 1546 offset_index); 1547 1548 /* 1549 * if the next entry isn't a bitmap we need to return to let the 1550 * extent stuff do its work. 1551 */ 1552 if (!bitmap_info->bitmap) 1553 return -EAGAIN; 1554 1555 /* 1556 * Ok the next item is a bitmap, but it may not actually hold 1557 * the information for the rest of this free space stuff, so 1558 * look for it, and if we don't find it return so we can try 1559 * everything over again. 1560 */ 1561 search_start = *offset; 1562 search_bytes = *bytes; 1563 ret = search_bitmap(ctl, bitmap_info, &search_start, 1564 &search_bytes); 1565 if (ret < 0 || search_start != *offset) 1566 return -EAGAIN; 1567 1568 goto again; 1569 } else if (!bitmap_info->bytes) 1570 free_bitmap(ctl, bitmap_info); 1571 1572 return 0; 1573 } 1574 1575 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 1576 struct btrfs_free_space *info, u64 offset, 1577 u64 bytes) 1578 { 1579 u64 bytes_to_set = 0; 1580 u64 end; 1581 1582 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 1583 1584 bytes_to_set = min(end - offset, bytes); 1585 1586 bitmap_set_bits(ctl, info, offset, bytes_to_set); 1587 1588 return bytes_to_set; 1589 1590 } 1591 1592 static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 1593 struct btrfs_free_space *info) 1594 { 1595 struct btrfs_block_group_cache *block_group = ctl->private; 1596 1597 /* 1598 * If we are below the extents threshold then we can add this as an 1599 * extent, and don't have to deal with the bitmap 1600 */ 1601 if (ctl->free_extents < ctl->extents_thresh) { 1602 /* 1603 * If this block group has some small extents we don't want to 1604 * use up all of our free slots in the cache with them, we want 1605 * to reserve them to larger extents, however if we have plent 1606 * of cache left then go ahead an dadd them, no sense in adding 1607 * the overhead of a bitmap if we don't have to. 1608 */ 1609 if (info->bytes <= block_group->sectorsize * 4) { 1610 if (ctl->free_extents * 2 <= ctl->extents_thresh) 1611 return false; 1612 } else { 1613 return false; 1614 } 1615 } 1616 1617 /* 1618 * some block groups are so tiny they can't be enveloped by a bitmap, so 1619 * don't even bother to create a bitmap for this 1620 */ 1621 if (BITS_PER_BITMAP * block_group->sectorsize > 1622 block_group->key.offset) 1623 return false; 1624 1625 return true; 1626 } 1627 1628 static struct btrfs_free_space_op free_space_op = { 1629 .recalc_thresholds = recalculate_thresholds, 1630 .use_bitmap = use_bitmap, 1631 }; 1632 1633 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 1634 struct btrfs_free_space *info) 1635 { 1636 struct btrfs_free_space *bitmap_info; 1637 struct btrfs_block_group_cache *block_group = NULL; 1638 int added = 0; 1639 u64 bytes, offset, bytes_added; 1640 int ret; 1641 1642 bytes = info->bytes; 1643 offset = info->offset; 1644 1645 if (!ctl->op->use_bitmap(ctl, info)) 1646 return 0; 1647 1648 if (ctl->op == &free_space_op) 1649 block_group = ctl->private; 1650 again: 1651 /* 1652 * Since we link bitmaps right into the cluster we need to see if we 1653 * have a cluster here, and if so and it has our bitmap we need to add 1654 * the free space to that bitmap. 1655 */ 1656 if (block_group && !list_empty(&block_group->cluster_list)) { 1657 struct btrfs_free_cluster *cluster; 1658 struct rb_node *node; 1659 struct btrfs_free_space *entry; 1660 1661 cluster = list_entry(block_group->cluster_list.next, 1662 struct btrfs_free_cluster, 1663 block_group_list); 1664 spin_lock(&cluster->lock); 1665 node = rb_first(&cluster->root); 1666 if (!node) { 1667 spin_unlock(&cluster->lock); 1668 goto no_cluster_bitmap; 1669 } 1670 1671 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1672 if (!entry->bitmap) { 1673 spin_unlock(&cluster->lock); 1674 goto no_cluster_bitmap; 1675 } 1676 1677 if (entry->offset == offset_to_bitmap(ctl, offset)) { 1678 bytes_added = add_bytes_to_bitmap(ctl, entry, 1679 offset, bytes); 1680 bytes -= bytes_added; 1681 offset += bytes_added; 1682 } 1683 spin_unlock(&cluster->lock); 1684 if (!bytes) { 1685 ret = 1; 1686 goto out; 1687 } 1688 } 1689 1690 no_cluster_bitmap: 1691 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1692 1, 0); 1693 if (!bitmap_info) { 1694 BUG_ON(added); 1695 goto new_bitmap; 1696 } 1697 1698 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); 1699 bytes -= bytes_added; 1700 offset += bytes_added; 1701 added = 0; 1702 1703 if (!bytes) { 1704 ret = 1; 1705 goto out; 1706 } else 1707 goto again; 1708 1709 new_bitmap: 1710 if (info && info->bitmap) { 1711 add_new_bitmap(ctl, info, offset); 1712 added = 1; 1713 info = NULL; 1714 goto again; 1715 } else { 1716 spin_unlock(&ctl->tree_lock); 1717 1718 /* no pre-allocated info, allocate a new one */ 1719 if (!info) { 1720 info = kmem_cache_zalloc(btrfs_free_space_cachep, 1721 GFP_NOFS); 1722 if (!info) { 1723 spin_lock(&ctl->tree_lock); 1724 ret = -ENOMEM; 1725 goto out; 1726 } 1727 } 1728 1729 /* allocate the bitmap */ 1730 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 1731 spin_lock(&ctl->tree_lock); 1732 if (!info->bitmap) { 1733 ret = -ENOMEM; 1734 goto out; 1735 } 1736 goto again; 1737 } 1738 1739 out: 1740 if (info) { 1741 if (info->bitmap) 1742 kfree(info->bitmap); 1743 kmem_cache_free(btrfs_free_space_cachep, info); 1744 } 1745 1746 return ret; 1747 } 1748 1749 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 1750 struct btrfs_free_space *info, bool update_stat) 1751 { 1752 struct btrfs_free_space *left_info; 1753 struct btrfs_free_space *right_info; 1754 bool merged = false; 1755 u64 offset = info->offset; 1756 u64 bytes = info->bytes; 1757 1758 /* 1759 * first we want to see if there is free space adjacent to the range we 1760 * are adding, if there is remove that struct and add a new one to 1761 * cover the entire range 1762 */ 1763 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 1764 if (right_info && rb_prev(&right_info->offset_index)) 1765 left_info = rb_entry(rb_prev(&right_info->offset_index), 1766 struct btrfs_free_space, offset_index); 1767 else 1768 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 1769 1770 if (right_info && !right_info->bitmap) { 1771 if (update_stat) 1772 unlink_free_space(ctl, right_info); 1773 else 1774 __unlink_free_space(ctl, right_info); 1775 info->bytes += right_info->bytes; 1776 kmem_cache_free(btrfs_free_space_cachep, right_info); 1777 merged = true; 1778 } 1779 1780 if (left_info && !left_info->bitmap && 1781 left_info->offset + left_info->bytes == offset) { 1782 if (update_stat) 1783 unlink_free_space(ctl, left_info); 1784 else 1785 __unlink_free_space(ctl, left_info); 1786 info->offset = left_info->offset; 1787 info->bytes += left_info->bytes; 1788 kmem_cache_free(btrfs_free_space_cachep, left_info); 1789 merged = true; 1790 } 1791 1792 return merged; 1793 } 1794 1795 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, 1796 u64 offset, u64 bytes) 1797 { 1798 struct btrfs_free_space *info; 1799 int ret = 0; 1800 1801 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 1802 if (!info) 1803 return -ENOMEM; 1804 1805 info->offset = offset; 1806 info->bytes = bytes; 1807 1808 spin_lock(&ctl->tree_lock); 1809 1810 if (try_merge_free_space(ctl, info, true)) 1811 goto link; 1812 1813 /* 1814 * There was no extent directly to the left or right of this new 1815 * extent then we know we're going to have to allocate a new extent, so 1816 * before we do that see if we need to drop this into a bitmap 1817 */ 1818 ret = insert_into_bitmap(ctl, info); 1819 if (ret < 0) { 1820 goto out; 1821 } else if (ret) { 1822 ret = 0; 1823 goto out; 1824 } 1825 link: 1826 ret = link_free_space(ctl, info); 1827 if (ret) 1828 kmem_cache_free(btrfs_free_space_cachep, info); 1829 out: 1830 spin_unlock(&ctl->tree_lock); 1831 1832 if (ret) { 1833 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 1834 BUG_ON(ret == -EEXIST); 1835 } 1836 1837 return ret; 1838 } 1839 1840 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 1841 u64 offset, u64 bytes) 1842 { 1843 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1844 struct btrfs_free_space *info; 1845 struct btrfs_free_space *next_info = NULL; 1846 int ret = 0; 1847 1848 spin_lock(&ctl->tree_lock); 1849 1850 again: 1851 info = tree_search_offset(ctl, offset, 0, 0); 1852 if (!info) { 1853 /* 1854 * oops didn't find an extent that matched the space we wanted 1855 * to remove, look for a bitmap instead 1856 */ 1857 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1858 1, 0); 1859 if (!info) { 1860 /* the tree logging code might be calling us before we 1861 * have fully loaded the free space rbtree for this 1862 * block group. So it is possible the entry won't 1863 * be in the rbtree yet at all. The caching code 1864 * will make sure not to put it in the rbtree if 1865 * the logging code has pinned it. 1866 */ 1867 goto out_lock; 1868 } 1869 } 1870 1871 if (info->bytes < bytes && rb_next(&info->offset_index)) { 1872 u64 end; 1873 next_info = rb_entry(rb_next(&info->offset_index), 1874 struct btrfs_free_space, 1875 offset_index); 1876 1877 if (next_info->bitmap) 1878 end = next_info->offset + 1879 BITS_PER_BITMAP * ctl->unit - 1; 1880 else 1881 end = next_info->offset + next_info->bytes; 1882 1883 if (next_info->bytes < bytes || 1884 next_info->offset > offset || offset > end) { 1885 printk(KERN_CRIT "Found free space at %llu, size %llu," 1886 " trying to use %llu\n", 1887 (unsigned long long)info->offset, 1888 (unsigned long long)info->bytes, 1889 (unsigned long long)bytes); 1890 WARN_ON(1); 1891 ret = -EINVAL; 1892 goto out_lock; 1893 } 1894 1895 info = next_info; 1896 } 1897 1898 if (info->bytes == bytes) { 1899 unlink_free_space(ctl, info); 1900 if (info->bitmap) { 1901 kfree(info->bitmap); 1902 ctl->total_bitmaps--; 1903 } 1904 kmem_cache_free(btrfs_free_space_cachep, info); 1905 ret = 0; 1906 goto out_lock; 1907 } 1908 1909 if (!info->bitmap && info->offset == offset) { 1910 unlink_free_space(ctl, info); 1911 info->offset += bytes; 1912 info->bytes -= bytes; 1913 ret = link_free_space(ctl, info); 1914 WARN_ON(ret); 1915 goto out_lock; 1916 } 1917 1918 if (!info->bitmap && info->offset <= offset && 1919 info->offset + info->bytes >= offset + bytes) { 1920 u64 old_start = info->offset; 1921 /* 1922 * we're freeing space in the middle of the info, 1923 * this can happen during tree log replay 1924 * 1925 * first unlink the old info and then 1926 * insert it again after the hole we're creating 1927 */ 1928 unlink_free_space(ctl, info); 1929 if (offset + bytes < info->offset + info->bytes) { 1930 u64 old_end = info->offset + info->bytes; 1931 1932 info->offset = offset + bytes; 1933 info->bytes = old_end - info->offset; 1934 ret = link_free_space(ctl, info); 1935 WARN_ON(ret); 1936 if (ret) 1937 goto out_lock; 1938 } else { 1939 /* the hole we're creating ends at the end 1940 * of the info struct, just free the info 1941 */ 1942 kmem_cache_free(btrfs_free_space_cachep, info); 1943 } 1944 spin_unlock(&ctl->tree_lock); 1945 1946 /* step two, insert a new info struct to cover 1947 * anything before the hole 1948 */ 1949 ret = btrfs_add_free_space(block_group, old_start, 1950 offset - old_start); 1951 WARN_ON(ret); 1952 goto out; 1953 } 1954 1955 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1956 if (ret == -EAGAIN) 1957 goto again; 1958 BUG_ON(ret); 1959 out_lock: 1960 spin_unlock(&ctl->tree_lock); 1961 out: 1962 return ret; 1963 } 1964 1965 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 1966 u64 bytes) 1967 { 1968 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1969 struct btrfs_free_space *info; 1970 struct rb_node *n; 1971 int count = 0; 1972 1973 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 1974 info = rb_entry(n, struct btrfs_free_space, offset_index); 1975 if (info->bytes >= bytes) 1976 count++; 1977 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 1978 (unsigned long long)info->offset, 1979 (unsigned long long)info->bytes, 1980 (info->bitmap) ? "yes" : "no"); 1981 } 1982 printk(KERN_INFO "block group has cluster?: %s\n", 1983 list_empty(&block_group->cluster_list) ? "no" : "yes"); 1984 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 1985 "\n", count); 1986 } 1987 1988 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) 1989 { 1990 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1991 1992 spin_lock_init(&ctl->tree_lock); 1993 ctl->unit = block_group->sectorsize; 1994 ctl->start = block_group->key.objectid; 1995 ctl->private = block_group; 1996 ctl->op = &free_space_op; 1997 1998 /* 1999 * we only want to have 32k of ram per block group for keeping 2000 * track of free space, and if we pass 1/2 of that we want to 2001 * start converting things over to using bitmaps 2002 */ 2003 ctl->extents_thresh = ((1024 * 32) / 2) / 2004 sizeof(struct btrfs_free_space); 2005 } 2006 2007 /* 2008 * for a given cluster, put all of its extents back into the free 2009 * space cache. If the block group passed doesn't match the block group 2010 * pointed to by the cluster, someone else raced in and freed the 2011 * cluster already. In that case, we just return without changing anything 2012 */ 2013 static int 2014 __btrfs_return_cluster_to_free_space( 2015 struct btrfs_block_group_cache *block_group, 2016 struct btrfs_free_cluster *cluster) 2017 { 2018 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2019 struct btrfs_free_space *entry; 2020 struct rb_node *node; 2021 2022 spin_lock(&cluster->lock); 2023 if (cluster->block_group != block_group) 2024 goto out; 2025 2026 cluster->block_group = NULL; 2027 cluster->window_start = 0; 2028 list_del_init(&cluster->block_group_list); 2029 2030 node = rb_first(&cluster->root); 2031 while (node) { 2032 bool bitmap; 2033 2034 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2035 node = rb_next(&entry->offset_index); 2036 rb_erase(&entry->offset_index, &cluster->root); 2037 2038 bitmap = (entry->bitmap != NULL); 2039 if (!bitmap) 2040 try_merge_free_space(ctl, entry, false); 2041 tree_insert_offset(&ctl->free_space_offset, 2042 entry->offset, &entry->offset_index, bitmap); 2043 } 2044 cluster->root = RB_ROOT; 2045 2046 out: 2047 spin_unlock(&cluster->lock); 2048 btrfs_put_block_group(block_group); 2049 return 0; 2050 } 2051 2052 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl) 2053 { 2054 struct btrfs_free_space *info; 2055 struct rb_node *node; 2056 2057 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2058 info = rb_entry(node, struct btrfs_free_space, offset_index); 2059 if (!info->bitmap) { 2060 unlink_free_space(ctl, info); 2061 kmem_cache_free(btrfs_free_space_cachep, info); 2062 } else { 2063 free_bitmap(ctl, info); 2064 } 2065 if (need_resched()) { 2066 spin_unlock(&ctl->tree_lock); 2067 cond_resched(); 2068 spin_lock(&ctl->tree_lock); 2069 } 2070 } 2071 } 2072 2073 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2074 { 2075 spin_lock(&ctl->tree_lock); 2076 __btrfs_remove_free_space_cache_locked(ctl); 2077 spin_unlock(&ctl->tree_lock); 2078 } 2079 2080 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 2081 { 2082 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2083 struct btrfs_free_cluster *cluster; 2084 struct list_head *head; 2085 2086 spin_lock(&ctl->tree_lock); 2087 while ((head = block_group->cluster_list.next) != 2088 &block_group->cluster_list) { 2089 cluster = list_entry(head, struct btrfs_free_cluster, 2090 block_group_list); 2091 2092 WARN_ON(cluster->block_group != block_group); 2093 __btrfs_return_cluster_to_free_space(block_group, cluster); 2094 if (need_resched()) { 2095 spin_unlock(&ctl->tree_lock); 2096 cond_resched(); 2097 spin_lock(&ctl->tree_lock); 2098 } 2099 } 2100 __btrfs_remove_free_space_cache_locked(ctl); 2101 spin_unlock(&ctl->tree_lock); 2102 2103 } 2104 2105 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2106 u64 offset, u64 bytes, u64 empty_size) 2107 { 2108 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2109 struct btrfs_free_space *entry = NULL; 2110 u64 bytes_search = bytes + empty_size; 2111 u64 ret = 0; 2112 2113 spin_lock(&ctl->tree_lock); 2114 entry = find_free_space(ctl, &offset, &bytes_search); 2115 if (!entry) 2116 goto out; 2117 2118 ret = offset; 2119 if (entry->bitmap) { 2120 bitmap_clear_bits(ctl, entry, offset, bytes); 2121 if (!entry->bytes) 2122 free_bitmap(ctl, entry); 2123 } else { 2124 unlink_free_space(ctl, entry); 2125 entry->offset += bytes; 2126 entry->bytes -= bytes; 2127 if (!entry->bytes) 2128 kmem_cache_free(btrfs_free_space_cachep, entry); 2129 else 2130 link_free_space(ctl, entry); 2131 } 2132 2133 out: 2134 spin_unlock(&ctl->tree_lock); 2135 2136 return ret; 2137 } 2138 2139 /* 2140 * given a cluster, put all of its extents back into the free space 2141 * cache. If a block group is passed, this function will only free 2142 * a cluster that belongs to the passed block group. 2143 * 2144 * Otherwise, it'll get a reference on the block group pointed to by the 2145 * cluster and remove the cluster from it. 2146 */ 2147 int btrfs_return_cluster_to_free_space( 2148 struct btrfs_block_group_cache *block_group, 2149 struct btrfs_free_cluster *cluster) 2150 { 2151 struct btrfs_free_space_ctl *ctl; 2152 int ret; 2153 2154 /* first, get a safe pointer to the block group */ 2155 spin_lock(&cluster->lock); 2156 if (!block_group) { 2157 block_group = cluster->block_group; 2158 if (!block_group) { 2159 spin_unlock(&cluster->lock); 2160 return 0; 2161 } 2162 } else if (cluster->block_group != block_group) { 2163 /* someone else has already freed it don't redo their work */ 2164 spin_unlock(&cluster->lock); 2165 return 0; 2166 } 2167 atomic_inc(&block_group->count); 2168 spin_unlock(&cluster->lock); 2169 2170 ctl = block_group->free_space_ctl; 2171 2172 /* now return any extents the cluster had on it */ 2173 spin_lock(&ctl->tree_lock); 2174 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 2175 spin_unlock(&ctl->tree_lock); 2176 2177 /* finally drop our ref */ 2178 btrfs_put_block_group(block_group); 2179 return ret; 2180 } 2181 2182 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2183 struct btrfs_free_cluster *cluster, 2184 struct btrfs_free_space *entry, 2185 u64 bytes, u64 min_start) 2186 { 2187 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2188 int err; 2189 u64 search_start = cluster->window_start; 2190 u64 search_bytes = bytes; 2191 u64 ret = 0; 2192 2193 search_start = min_start; 2194 search_bytes = bytes; 2195 2196 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2197 if (err) 2198 return 0; 2199 2200 ret = search_start; 2201 __bitmap_clear_bits(ctl, entry, ret, bytes); 2202 2203 return ret; 2204 } 2205 2206 /* 2207 * given a cluster, try to allocate 'bytes' from it, returns 0 2208 * if it couldn't find anything suitably large, or a logical disk offset 2209 * if things worked out 2210 */ 2211 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2212 struct btrfs_free_cluster *cluster, u64 bytes, 2213 u64 min_start) 2214 { 2215 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2216 struct btrfs_free_space *entry = NULL; 2217 struct rb_node *node; 2218 u64 ret = 0; 2219 2220 spin_lock(&cluster->lock); 2221 if (bytes > cluster->max_size) 2222 goto out; 2223 2224 if (cluster->block_group != block_group) 2225 goto out; 2226 2227 node = rb_first(&cluster->root); 2228 if (!node) 2229 goto out; 2230 2231 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2232 while(1) { 2233 if (entry->bytes < bytes || 2234 (!entry->bitmap && entry->offset < min_start)) { 2235 node = rb_next(&entry->offset_index); 2236 if (!node) 2237 break; 2238 entry = rb_entry(node, struct btrfs_free_space, 2239 offset_index); 2240 continue; 2241 } 2242 2243 if (entry->bitmap) { 2244 ret = btrfs_alloc_from_bitmap(block_group, 2245 cluster, entry, bytes, 2246 cluster->window_start); 2247 if (ret == 0) { 2248 node = rb_next(&entry->offset_index); 2249 if (!node) 2250 break; 2251 entry = rb_entry(node, struct btrfs_free_space, 2252 offset_index); 2253 continue; 2254 } 2255 cluster->window_start += bytes; 2256 } else { 2257 ret = entry->offset; 2258 2259 entry->offset += bytes; 2260 entry->bytes -= bytes; 2261 } 2262 2263 if (entry->bytes == 0) 2264 rb_erase(&entry->offset_index, &cluster->root); 2265 break; 2266 } 2267 out: 2268 spin_unlock(&cluster->lock); 2269 2270 if (!ret) 2271 return 0; 2272 2273 spin_lock(&ctl->tree_lock); 2274 2275 ctl->free_space -= bytes; 2276 if (entry->bytes == 0) { 2277 ctl->free_extents--; 2278 if (entry->bitmap) { 2279 kfree(entry->bitmap); 2280 ctl->total_bitmaps--; 2281 ctl->op->recalc_thresholds(ctl); 2282 } 2283 kmem_cache_free(btrfs_free_space_cachep, entry); 2284 } 2285 2286 spin_unlock(&ctl->tree_lock); 2287 2288 return ret; 2289 } 2290 2291 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 2292 struct btrfs_free_space *entry, 2293 struct btrfs_free_cluster *cluster, 2294 u64 offset, u64 bytes, 2295 u64 cont1_bytes, u64 min_bytes) 2296 { 2297 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2298 unsigned long next_zero; 2299 unsigned long i; 2300 unsigned long want_bits; 2301 unsigned long min_bits; 2302 unsigned long found_bits; 2303 unsigned long start = 0; 2304 unsigned long total_found = 0; 2305 int ret; 2306 2307 i = offset_to_bit(entry->offset, block_group->sectorsize, 2308 max_t(u64, offset, entry->offset)); 2309 want_bits = bytes_to_bits(bytes, block_group->sectorsize); 2310 min_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 2311 2312 again: 2313 found_bits = 0; 2314 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 2315 i < BITS_PER_BITMAP; 2316 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 2317 next_zero = find_next_zero_bit(entry->bitmap, 2318 BITS_PER_BITMAP, i); 2319 if (next_zero - i >= min_bits) { 2320 found_bits = next_zero - i; 2321 break; 2322 } 2323 i = next_zero; 2324 } 2325 2326 if (!found_bits) 2327 return -ENOSPC; 2328 2329 if (!total_found) { 2330 start = i; 2331 cluster->max_size = 0; 2332 } 2333 2334 total_found += found_bits; 2335 2336 if (cluster->max_size < found_bits * block_group->sectorsize) 2337 cluster->max_size = found_bits * block_group->sectorsize; 2338 2339 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 2340 i = next_zero + 1; 2341 goto again; 2342 } 2343 2344 cluster->window_start = start * block_group->sectorsize + 2345 entry->offset; 2346 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2347 ret = tree_insert_offset(&cluster->root, entry->offset, 2348 &entry->offset_index, 1); 2349 BUG_ON(ret); 2350 2351 trace_btrfs_setup_cluster(block_group, cluster, 2352 total_found * block_group->sectorsize, 1); 2353 return 0; 2354 } 2355 2356 /* 2357 * This searches the block group for just extents to fill the cluster with. 2358 * Try to find a cluster with at least bytes total bytes, at least one 2359 * extent of cont1_bytes, and other clusters of at least min_bytes. 2360 */ 2361 static noinline int 2362 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, 2363 struct btrfs_free_cluster *cluster, 2364 struct list_head *bitmaps, u64 offset, u64 bytes, 2365 u64 cont1_bytes, u64 min_bytes) 2366 { 2367 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2368 struct btrfs_free_space *first = NULL; 2369 struct btrfs_free_space *entry = NULL; 2370 struct btrfs_free_space *last; 2371 struct rb_node *node; 2372 u64 window_start; 2373 u64 window_free; 2374 u64 max_extent; 2375 u64 total_size = 0; 2376 2377 entry = tree_search_offset(ctl, offset, 0, 1); 2378 if (!entry) 2379 return -ENOSPC; 2380 2381 /* 2382 * We don't want bitmaps, so just move along until we find a normal 2383 * extent entry. 2384 */ 2385 while (entry->bitmap || entry->bytes < min_bytes) { 2386 if (entry->bitmap && list_empty(&entry->list)) 2387 list_add_tail(&entry->list, bitmaps); 2388 node = rb_next(&entry->offset_index); 2389 if (!node) 2390 return -ENOSPC; 2391 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2392 } 2393 2394 window_start = entry->offset; 2395 window_free = entry->bytes; 2396 max_extent = entry->bytes; 2397 first = entry; 2398 last = entry; 2399 2400 for (node = rb_next(&entry->offset_index); node; 2401 node = rb_next(&entry->offset_index)) { 2402 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2403 2404 if (entry->bitmap) { 2405 if (list_empty(&entry->list)) 2406 list_add_tail(&entry->list, bitmaps); 2407 continue; 2408 } 2409 2410 if (entry->bytes < min_bytes) 2411 continue; 2412 2413 last = entry; 2414 window_free += entry->bytes; 2415 if (entry->bytes > max_extent) 2416 max_extent = entry->bytes; 2417 } 2418 2419 if (window_free < bytes || max_extent < cont1_bytes) 2420 return -ENOSPC; 2421 2422 cluster->window_start = first->offset; 2423 2424 node = &first->offset_index; 2425 2426 /* 2427 * now we've found our entries, pull them out of the free space 2428 * cache and put them into the cluster rbtree 2429 */ 2430 do { 2431 int ret; 2432 2433 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2434 node = rb_next(&entry->offset_index); 2435 if (entry->bitmap || entry->bytes < min_bytes) 2436 continue; 2437 2438 rb_erase(&entry->offset_index, &ctl->free_space_offset); 2439 ret = tree_insert_offset(&cluster->root, entry->offset, 2440 &entry->offset_index, 0); 2441 total_size += entry->bytes; 2442 BUG_ON(ret); 2443 } while (node && entry != last); 2444 2445 cluster->max_size = max_extent; 2446 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 2447 return 0; 2448 } 2449 2450 /* 2451 * This specifically looks for bitmaps that may work in the cluster, we assume 2452 * that we have already failed to find extents that will work. 2453 */ 2454 static noinline int 2455 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, 2456 struct btrfs_free_cluster *cluster, 2457 struct list_head *bitmaps, u64 offset, u64 bytes, 2458 u64 cont1_bytes, u64 min_bytes) 2459 { 2460 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2461 struct btrfs_free_space *entry; 2462 int ret = -ENOSPC; 2463 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 2464 2465 if (ctl->total_bitmaps == 0) 2466 return -ENOSPC; 2467 2468 /* 2469 * The bitmap that covers offset won't be in the list unless offset 2470 * is just its start offset. 2471 */ 2472 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 2473 if (entry->offset != bitmap_offset) { 2474 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 2475 if (entry && list_empty(&entry->list)) 2476 list_add(&entry->list, bitmaps); 2477 } 2478 2479 list_for_each_entry(entry, bitmaps, list) { 2480 if (entry->bytes < bytes) 2481 continue; 2482 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 2483 bytes, cont1_bytes, min_bytes); 2484 if (!ret) 2485 return 0; 2486 } 2487 2488 /* 2489 * The bitmaps list has all the bitmaps that record free space 2490 * starting after offset, so no more search is required. 2491 */ 2492 return -ENOSPC; 2493 } 2494 2495 /* 2496 * here we try to find a cluster of blocks in a block group. The goal 2497 * is to find at least bytes+empty_size. 2498 * We might not find them all in one contiguous area. 2499 * 2500 * returns zero and sets up cluster if things worked out, otherwise 2501 * it returns -enospc 2502 */ 2503 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 2504 struct btrfs_root *root, 2505 struct btrfs_block_group_cache *block_group, 2506 struct btrfs_free_cluster *cluster, 2507 u64 offset, u64 bytes, u64 empty_size) 2508 { 2509 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2510 struct btrfs_free_space *entry, *tmp; 2511 LIST_HEAD(bitmaps); 2512 u64 min_bytes; 2513 u64 cont1_bytes; 2514 int ret; 2515 2516 /* 2517 * Choose the minimum extent size we'll require for this 2518 * cluster. For SSD_SPREAD, don't allow any fragmentation. 2519 * For metadata, allow allocates with smaller extents. For 2520 * data, keep it dense. 2521 */ 2522 if (btrfs_test_opt(root, SSD_SPREAD)) { 2523 cont1_bytes = min_bytes = bytes + empty_size; 2524 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 2525 cont1_bytes = bytes; 2526 min_bytes = block_group->sectorsize; 2527 } else { 2528 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 2529 min_bytes = block_group->sectorsize; 2530 } 2531 2532 spin_lock(&ctl->tree_lock); 2533 2534 /* 2535 * If we know we don't have enough space to make a cluster don't even 2536 * bother doing all the work to try and find one. 2537 */ 2538 if (ctl->free_space < bytes) { 2539 spin_unlock(&ctl->tree_lock); 2540 return -ENOSPC; 2541 } 2542 2543 spin_lock(&cluster->lock); 2544 2545 /* someone already found a cluster, hooray */ 2546 if (cluster->block_group) { 2547 ret = 0; 2548 goto out; 2549 } 2550 2551 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 2552 min_bytes); 2553 2554 INIT_LIST_HEAD(&bitmaps); 2555 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 2556 bytes + empty_size, 2557 cont1_bytes, min_bytes); 2558 if (ret) 2559 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 2560 offset, bytes + empty_size, 2561 cont1_bytes, min_bytes); 2562 2563 /* Clear our temporary list */ 2564 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 2565 list_del_init(&entry->list); 2566 2567 if (!ret) { 2568 atomic_inc(&block_group->count); 2569 list_add_tail(&cluster->block_group_list, 2570 &block_group->cluster_list); 2571 cluster->block_group = block_group; 2572 } else { 2573 trace_btrfs_failed_cluster_setup(block_group); 2574 } 2575 out: 2576 spin_unlock(&cluster->lock); 2577 spin_unlock(&ctl->tree_lock); 2578 2579 return ret; 2580 } 2581 2582 /* 2583 * simple code to zero out a cluster 2584 */ 2585 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 2586 { 2587 spin_lock_init(&cluster->lock); 2588 spin_lock_init(&cluster->refill_lock); 2589 cluster->root = RB_ROOT; 2590 cluster->max_size = 0; 2591 INIT_LIST_HEAD(&cluster->block_group_list); 2592 cluster->block_group = NULL; 2593 } 2594 2595 static int do_trimming(struct btrfs_block_group_cache *block_group, 2596 u64 *total_trimmed, u64 start, u64 bytes, 2597 u64 reserved_start, u64 reserved_bytes) 2598 { 2599 struct btrfs_space_info *space_info = block_group->space_info; 2600 struct btrfs_fs_info *fs_info = block_group->fs_info; 2601 int ret; 2602 int update = 0; 2603 u64 trimmed = 0; 2604 2605 spin_lock(&space_info->lock); 2606 spin_lock(&block_group->lock); 2607 if (!block_group->ro) { 2608 block_group->reserved += reserved_bytes; 2609 space_info->bytes_reserved += reserved_bytes; 2610 update = 1; 2611 } 2612 spin_unlock(&block_group->lock); 2613 spin_unlock(&space_info->lock); 2614 2615 ret = btrfs_error_discard_extent(fs_info->extent_root, 2616 start, bytes, &trimmed); 2617 if (!ret) 2618 *total_trimmed += trimmed; 2619 2620 btrfs_add_free_space(block_group, reserved_start, reserved_bytes); 2621 2622 if (update) { 2623 spin_lock(&space_info->lock); 2624 spin_lock(&block_group->lock); 2625 if (block_group->ro) 2626 space_info->bytes_readonly += reserved_bytes; 2627 block_group->reserved -= reserved_bytes; 2628 space_info->bytes_reserved -= reserved_bytes; 2629 spin_unlock(&space_info->lock); 2630 spin_unlock(&block_group->lock); 2631 } 2632 2633 return ret; 2634 } 2635 2636 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, 2637 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2638 { 2639 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2640 struct btrfs_free_space *entry; 2641 struct rb_node *node; 2642 int ret = 0; 2643 u64 extent_start; 2644 u64 extent_bytes; 2645 u64 bytes; 2646 2647 while (start < end) { 2648 spin_lock(&ctl->tree_lock); 2649 2650 if (ctl->free_space < minlen) { 2651 spin_unlock(&ctl->tree_lock); 2652 break; 2653 } 2654 2655 entry = tree_search_offset(ctl, start, 0, 1); 2656 if (!entry) { 2657 spin_unlock(&ctl->tree_lock); 2658 break; 2659 } 2660 2661 /* skip bitmaps */ 2662 while (entry->bitmap) { 2663 node = rb_next(&entry->offset_index); 2664 if (!node) { 2665 spin_unlock(&ctl->tree_lock); 2666 goto out; 2667 } 2668 entry = rb_entry(node, struct btrfs_free_space, 2669 offset_index); 2670 } 2671 2672 if (entry->offset >= end) { 2673 spin_unlock(&ctl->tree_lock); 2674 break; 2675 } 2676 2677 extent_start = entry->offset; 2678 extent_bytes = entry->bytes; 2679 start = max(start, extent_start); 2680 bytes = min(extent_start + extent_bytes, end) - start; 2681 if (bytes < minlen) { 2682 spin_unlock(&ctl->tree_lock); 2683 goto next; 2684 } 2685 2686 unlink_free_space(ctl, entry); 2687 kmem_cache_free(btrfs_free_space_cachep, entry); 2688 2689 spin_unlock(&ctl->tree_lock); 2690 2691 ret = do_trimming(block_group, total_trimmed, start, bytes, 2692 extent_start, extent_bytes); 2693 if (ret) 2694 break; 2695 next: 2696 start += bytes; 2697 2698 if (fatal_signal_pending(current)) { 2699 ret = -ERESTARTSYS; 2700 break; 2701 } 2702 2703 cond_resched(); 2704 } 2705 out: 2706 return ret; 2707 } 2708 2709 static int trim_bitmaps(struct btrfs_block_group_cache *block_group, 2710 u64 *total_trimmed, u64 start, u64 end, u64 minlen) 2711 { 2712 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2713 struct btrfs_free_space *entry; 2714 int ret = 0; 2715 int ret2; 2716 u64 bytes; 2717 u64 offset = offset_to_bitmap(ctl, start); 2718 2719 while (offset < end) { 2720 bool next_bitmap = false; 2721 2722 spin_lock(&ctl->tree_lock); 2723 2724 if (ctl->free_space < minlen) { 2725 spin_unlock(&ctl->tree_lock); 2726 break; 2727 } 2728 2729 entry = tree_search_offset(ctl, offset, 1, 0); 2730 if (!entry) { 2731 spin_unlock(&ctl->tree_lock); 2732 next_bitmap = true; 2733 goto next; 2734 } 2735 2736 bytes = minlen; 2737 ret2 = search_bitmap(ctl, entry, &start, &bytes); 2738 if (ret2 || start >= end) { 2739 spin_unlock(&ctl->tree_lock); 2740 next_bitmap = true; 2741 goto next; 2742 } 2743 2744 bytes = min(bytes, end - start); 2745 if (bytes < minlen) { 2746 spin_unlock(&ctl->tree_lock); 2747 goto next; 2748 } 2749 2750 bitmap_clear_bits(ctl, entry, start, bytes); 2751 if (entry->bytes == 0) 2752 free_bitmap(ctl, entry); 2753 2754 spin_unlock(&ctl->tree_lock); 2755 2756 ret = do_trimming(block_group, total_trimmed, start, bytes, 2757 start, bytes); 2758 if (ret) 2759 break; 2760 next: 2761 if (next_bitmap) { 2762 offset += BITS_PER_BITMAP * ctl->unit; 2763 } else { 2764 start += bytes; 2765 if (start >= offset + BITS_PER_BITMAP * ctl->unit) 2766 offset += BITS_PER_BITMAP * ctl->unit; 2767 } 2768 2769 if (fatal_signal_pending(current)) { 2770 ret = -ERESTARTSYS; 2771 break; 2772 } 2773 2774 cond_resched(); 2775 } 2776 2777 return ret; 2778 } 2779 2780 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, 2781 u64 *trimmed, u64 start, u64 end, u64 minlen) 2782 { 2783 int ret; 2784 2785 *trimmed = 0; 2786 2787 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); 2788 if (ret) 2789 return ret; 2790 2791 ret = trim_bitmaps(block_group, trimmed, start, end, minlen); 2792 2793 return ret; 2794 } 2795 2796 /* 2797 * Find the left-most item in the cache tree, and then return the 2798 * smallest inode number in the item. 2799 * 2800 * Note: the returned inode number may not be the smallest one in 2801 * the tree, if the left-most item is a bitmap. 2802 */ 2803 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 2804 { 2805 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 2806 struct btrfs_free_space *entry = NULL; 2807 u64 ino = 0; 2808 2809 spin_lock(&ctl->tree_lock); 2810 2811 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 2812 goto out; 2813 2814 entry = rb_entry(rb_first(&ctl->free_space_offset), 2815 struct btrfs_free_space, offset_index); 2816 2817 if (!entry->bitmap) { 2818 ino = entry->offset; 2819 2820 unlink_free_space(ctl, entry); 2821 entry->offset++; 2822 entry->bytes--; 2823 if (!entry->bytes) 2824 kmem_cache_free(btrfs_free_space_cachep, entry); 2825 else 2826 link_free_space(ctl, entry); 2827 } else { 2828 u64 offset = 0; 2829 u64 count = 1; 2830 int ret; 2831 2832 ret = search_bitmap(ctl, entry, &offset, &count); 2833 BUG_ON(ret); 2834 2835 ino = offset; 2836 bitmap_clear_bits(ctl, entry, offset, 1); 2837 if (entry->bytes == 0) 2838 free_bitmap(ctl, entry); 2839 } 2840 out: 2841 spin_unlock(&ctl->tree_lock); 2842 2843 return ino; 2844 } 2845 2846 struct inode *lookup_free_ino_inode(struct btrfs_root *root, 2847 struct btrfs_path *path) 2848 { 2849 struct inode *inode = NULL; 2850 2851 spin_lock(&root->cache_lock); 2852 if (root->cache_inode) 2853 inode = igrab(root->cache_inode); 2854 spin_unlock(&root->cache_lock); 2855 if (inode) 2856 return inode; 2857 2858 inode = __lookup_free_space_inode(root, path, 0); 2859 if (IS_ERR(inode)) 2860 return inode; 2861 2862 spin_lock(&root->cache_lock); 2863 if (!btrfs_fs_closing(root->fs_info)) 2864 root->cache_inode = igrab(inode); 2865 spin_unlock(&root->cache_lock); 2866 2867 return inode; 2868 } 2869 2870 int create_free_ino_inode(struct btrfs_root *root, 2871 struct btrfs_trans_handle *trans, 2872 struct btrfs_path *path) 2873 { 2874 return __create_free_space_inode(root, trans, path, 2875 BTRFS_FREE_INO_OBJECTID, 0); 2876 } 2877 2878 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 2879 { 2880 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2881 struct btrfs_path *path; 2882 struct inode *inode; 2883 int ret = 0; 2884 u64 root_gen = btrfs_root_generation(&root->root_item); 2885 2886 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2887 return 0; 2888 2889 /* 2890 * If we're unmounting then just return, since this does a search on the 2891 * normal root and not the commit root and we could deadlock. 2892 */ 2893 if (btrfs_fs_closing(fs_info)) 2894 return 0; 2895 2896 path = btrfs_alloc_path(); 2897 if (!path) 2898 return 0; 2899 2900 inode = lookup_free_ino_inode(root, path); 2901 if (IS_ERR(inode)) 2902 goto out; 2903 2904 if (root_gen != BTRFS_I(inode)->generation) 2905 goto out_put; 2906 2907 ret = __load_free_space_cache(root, inode, ctl, path, 0); 2908 2909 if (ret < 0) 2910 printk(KERN_ERR "btrfs: failed to load free ino cache for " 2911 "root %llu\n", root->root_key.objectid); 2912 out_put: 2913 iput(inode); 2914 out: 2915 btrfs_free_path(path); 2916 return ret; 2917 } 2918 2919 int btrfs_write_out_ino_cache(struct btrfs_root *root, 2920 struct btrfs_trans_handle *trans, 2921 struct btrfs_path *path) 2922 { 2923 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 2924 struct inode *inode; 2925 int ret; 2926 2927 if (!btrfs_test_opt(root, INODE_MAP_CACHE)) 2928 return 0; 2929 2930 inode = lookup_free_ino_inode(root, path); 2931 if (IS_ERR(inode)) 2932 return 0; 2933 2934 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0); 2935 if (ret) { 2936 btrfs_delalloc_release_metadata(inode, inode->i_size); 2937 #ifdef DEBUG 2938 printk(KERN_ERR "btrfs: failed to write free ino cache " 2939 "for root %llu\n", root->root_key.objectid); 2940 #endif 2941 } 2942 2943 iput(inode); 2944 return ret; 2945 } 2946